c - 并行矩阵相乘的问题

我在使用 MPI 时遇到问题用于矩阵相乘。

我的程序从两个文件中读取两个 n x n 矩阵，并且应该使用 MPI。但我在其中一个进程中遇到段错误。这是我运行代码时得到的输出:

read matrix A from matrixA
read matrix B from matrixB

mpirun noticed that process rank 1 with PID 15599 on node VirtualBox exited on signal 11 (Segmentation fault).

这是我的代码:

int main (int argc, char * argv[])
{
    /* Check the number of arguments */
    int n; /* Dimension of the matrix */
    float *sa, *sb, *sc; /* Storage for matrix A, B, and C */
    float **a, **b, **c; /* 2D array to access matrix A, B, and C */
    int i, j, k;

    MPI_Init(&argc, &argv); //Initialize MPI operations
    MPI_Comm_rank(MPI_COMM_WORLD, &rank); //Get the rank
    MPI_Comm_size(MPI_COMM_WORLD, &size); //Get number of processes

    if(argc != 4) {
        printf("Usage: %s fileA fileB fileC\n", argv[0]);
        return 1;
    }

    if(rank == 0)
    {
        /* Read matrix A */
        printf("read matrix A from %s\n", argv[1]);
        read_matrix(argv[1], &a, &sa, &i, &j);
        if(i != j) {
            printf("ERROR: matrix A not square\n"); return 2;
        }
        n = i;

        //printf("%d", n);

        /* Read matrix B */
        printf("Read matrix B from %s\n", argv[2]);
        read_matrix(argv[2], &b, &sb, &i, &j);
        if(i != j) {
            printf("ERROR: matrix B not square\n");
            return 2;
        }
        if(n != i) {
            printf("ERROR: matrix A and B incompatible\n");
            return 2;
         }
    }

    printf("test");

    if(rank == 0)
    {
        /* Initialize matrix C */
        sc = (float*)malloc(n*n*sizeof(float));
        memset(sc, 0, n*n*sizeof(float));
        c = (float**)malloc(n*sizeof(float*));
        for(i=0; i<n; i++) c[i] = &sc[i*n];
    }

    ////////////////////////////////////////////////////////////////////////////////////////////
    float matrA[n][n];
    float matrB[n][n];
    float matrC[n][n];

    for(i = 0; i < n; i++)
    {
        for(j = 0; j < n; j++)
        {
            matrA[i][j] = sa[(i*n) + j];
            matrB[i][j] = sb[(i*n) + j];
        }
    }
    /* Master initializes work*/
    if (rank == 0)
    {
        start_time = MPI_Wtime();
        for (i = 1; i < size; i++)
        {
            //For each slave other than the master
            portion = (n / (size - 1)); // Calculate portion without master
            low_bound = (i - 1) * portion;
            if (((i + 1) == size) && ((n % (size - 1)) != 0))
            {
                //If rows of [A] cannot be equally divided among slaves,
                upper_bound = n; //the last slave gets all the remaining rows.
            }
            else
            {
                upper_bound = low_bound + portion; //Rows of [A] are equally divisable among slaves
            }
            //Send the low bound first without blocking, to the intended slave.
            MPI_Isend(&low_bound, 1, MPI_INT, i, MASTER_TO_SLAVE_TAG, MPI_COMM_WORLD, &request);

            //Next send the upper bound without blocking, to the intended slave
            MPI_Isend(&upper_bound, 1, MPI_INT, i, MASTER_TO_SLAVE_TAG + 1, MPI_COMM_WORLD, &request);

            //Finally send the allocated row portion of [A] without blocking, to the intended slave
            MPI_Isend(&matrA[low_bound][0], (upper_bound - low_bound) * n, MPI_FLOAT, i,     MASTER_TO_SLAVE_TAG + 2, MPI_COMM_WORLD, &request);
        }
    }


    //broadcast [B] to all the slaves
    MPI_Bcast(&matrB, n*n, MPI_FLOAT, 0, MPI_COMM_WORLD);
    /* work done by slaves*/
    if (rank > 0)
    {
        //receive low bound from the master
        MPI_Recv(&low_bound, 1, MPI_INT, 0, MASTER_TO_SLAVE_TAG, MPI_COMM_WORLD, &status);
        //next receive upper bound from the master
        MPI_Recv(&upper_bound, 1, MPI_INT, 0, MASTER_TO_SLAVE_TAG + 1, MPI_COMM_WORLD, &status);
        //finally receive row portion of [A] to be processed from the master
        MPI_Recv(&matrA[low_bound][0], (upper_bound - low_bound) * n, MPI_FLOAT, 0,   MASTER_TO_SLAVE_TAG + 2, MPI_COMM_WORLD, &status);
        for (i = low_bound; i < upper_bound; i++)
        {
        //iterate through a given set of rows of [A]
        for (j = 0; j < n; j++)
        {
            //iterate through columns of [B]
                for (k = 0; k < n; k++)
            {
                //iterate through rows of [B]
                matrC[i][j] += (matrA[i][k] * matrB[k][j]);
            }
        }
        }


        //send back the low bound first without blocking, to the master
        MPI_Isend(&low_bound, 1, MPI_INT, 0, SLAVE_TO_MASTER_TAG, MPI_COMM_WORLD, &request);
        //send the upper bound next without blocking, to the master
        MPI_Isend(&upper_bound, 1, MPI_INT, 0, SLAVE_TO_MASTER_TAG + 1, MPI_COMM_WORLD, &request);
        //finally send the processed portion of data without blocking, to the master
        MPI_Isend(&matrC[low_bound][0],
                  (upper_bound - low_bound) * n,
                  MPI_FLOAT,
                  0,
                  SLAVE_TO_MASTER_TAG + 2,
                  MPI_COMM_WORLD,
                  &request);
    }

    /* Master gathers processed work*/
    if (rank == 0)
    {
        for (i = 1; i < size; i++)
        {
            // Until all slaves have handed back the processed data,
            // receive low bound from a slave.
            MPI_Recv(&low_bound, 1, MPI_INT, i, SLAVE_TO_MASTER_TAG, MPI_COMM_WORLD, &status);

            //Receive upper bound from a slave
            MPI_Recv(&upper_bound, 1, MPI_INT, i, SLAVE_TO_MASTER_TAG + 1, MPI_COMM_WORLD, &status);

            //Receive processed data from a slave
            MPI_Recv(&matrC[low_bound][0],
                     (upper_bound - low_bound) * n,
                     MPI_FLOAT,
                     i,
                     SLAVE_TO_MASTER_TAG + 2,
                     MPI_COMM_WORLD,
                     &status);
        }
        end_time = MPI_Wtime();
        printf("\nRunning Time = %f\n\n", end_time - start_time);
    }
    MPI_Finalize(); //Finalize MPI operations

    /* Do the multiplication */
    ////////////////////////////////////////////////////  matmul(a, b, c, n);
    for(i = 0; i < n; i++)
    {
        for (j = 0; j < n; j++)
        {
            sc[(i*n) + j] = matrC[i][j];
        }
    }
}

最佳答案

每个进程都声明指向矩阵的指针，即:

float *sa, *sb, *sc; /* storage for matrix A, B, and C */

但只有进程0(分配并)填充数组sa和sb:

if(rank == 0)
  {
      ...
      read_matrix(argv[1], &a, &sa, &i, &j);
      ...
      read_matrix(argv[2], &b, &sb, &i, &j);
      ...
  }

但是，之后每个进程都会尝试访问 sa 和 sb 数组的位置:

for(i = 0; i < n; i++)
{
   for(j = 0; j < n; j++)
   {
       matrA[i][j] = sa[(i*n) + j];
       matrB[i][j] = sb[(i*n) + j];
   }
}

由于只有进程 0 已(分配并)填充了数组 sa 和 sb，因此其余进程正在尝试访问内存(sa[(i*n) + j] 和 sb[(i*n) + j]) 尚未分配。因此，这就是您出现段错误的原因。

顺便说一句，您的程序中还有另一个问题 - 您使用 MPI_Isend 启动非阻塞发送，但从未等待返回的请求句柄完成。 MPI 实现甚至不需要启动发送操作，直到发送操作正确完成为止，主要是通过调用等待或测试操作之一(MPI_Wait、MPI_Waitsome、 MPI_Waitall 等)。更糟糕的是，您重复使用相同的句柄变量request，实际上丢失了所有先前发起的请求的句柄，这使得它们不可等待/不可测试。请改用请求数组，并在发送循环后使用 MPI_Waitall 等待所有请求完成。

还要考虑一下这一点 - 您真的需要非阻塞操作来从工作人员发回数据吗？

关于c - 并行矩阵相乘的问题，我们在Stack Overflow上找到一个类似的问题： https://stackoverflow.com/questions/13655461/

c - 并行矩阵相乘的问题

上一篇：json - RestyGWT - JsonEncoderDecoder 是否可以使用列表？

下一篇：matlab - Matlab 中的平均值