C MPI 矩阵乘法错误

我正在使用 MPI 在 C 中进行一些矩阵乘法。它工作得很好，直到我尝试超过 15x15 并且我不明白为什么......

据我所知，该错误似乎主要发生在我看到“进程#发送...”打印之后，当从属进程将其数据发送回主进程时，就会发生这种情况。

错误消息:

[LEC-B125N4J:12183] *** Process received signal ***
[LEC-B125N4J:12183] Signal: Segmentation fault (11)
[LEC-B125N4J:12183] Signal code: Address not mapped (1)

代码:

#include <stdio.h>
#include <stdlib.h>
#include <limits.h>
#include <mpi.h>

//#define DIM 1000
#define DIM 15

/* 
* Statically allocate the matrices to make the rows 
* sequentially placed in memory. (This eases the task
* of distributing the problem among the slaves.)
* Make the matrices global to allow for larger 
* dimensions. 
*/
int A[DIM][DIM];
int B[DIM][DIM];
int C[DIM][DIM];
int D[DIM][DIM];

int correct_result(int A[DIM][DIM], int B[DIM][DIM])
{
    int i,j;
    for (i=0; i<DIM; ++i)
        for (j=0; j<DIM; ++j)
            if (A[i][j] != B[i][j])
                return 0;
    return 1;
}


int main (argc, argv)
int argc;
char *argv[];
{
    int rank=0, size;
    int i, j, k;
    int time1;
    volatile int tmp;
    int iOffset = 0;
    int iProblemSize = 0;

    MPI_Init(&argc, &argv);         /* starts MPI */
    MPI_Comm_rank(MPI_COMM_WORLD, &rank);   /* get current process id */
    MPI_Comm_size(MPI_COMM_WORLD, &size);   /* get number of processes */

    iProblemSize = (DIM / (size - 1));

    if(rank == 0) { //Master
        printf("Number of processes: %d (1 Master and %d slaves) - DIM: %d\n", size, (size - 1), DIM);

        //Fill matrices A and B with random numbers
        srand(timer(NULL));
        for(i=0; i<DIM; ++i)
        {
            for (j=0; j<DIM; ++j)
            {
                A[i][j] = random() % 100 - 50;
                B[i][j] = random() % 100 - 50;
                C[i][j] = 0;
            }
        }
    }

    MPI_Bcast(B, (DIM * DIM), MPI_INT, 0, MPI_COMM_WORLD);

    if(rank == 0) { //Master
        /* Calculate the true answer */
        for (i=0; i<DIM; ++i)
            for (k=0; k<DIM; ++k)
                for (j=0; j<DIM; ++j)
                    D[i][j] += A[i][k] * B[k][j];


        time1 = timer();


        //Send pieces of A to the slaves
        iOffset = 0;
        for(i = 1; i < size; i++) {
            MPI_Send(A[iOffset], (iProblemSize * DIM), MPI_INT, i, 0, MPI_COMM_WORLD);
            iOffset += iProblemSize;

            /*for(j = 0; j < iProblemSize; j++) {
                MPI_Send(A[iOffset + j], DIM, MPI_INT, i, 0, MPI_COMM_WORLD);
            }
            iOffset += iProblemSize;*/
        }

        //Take care of leftovers if needed (if uneven number of slaves)
        if((size - 1) % DIM != 0) {
            for(i = iOffset; i < DIM; i++) {
                for(k = 0; k < DIM; k++) {
                    for(j = 0; j < DIM; j++) {
                        C[i][j] += A[i][k] * B[k][j];
                    }
                }
            }
        }

        //Gather the results from the slaves
        iOffset = 0;
        for(i = 1; i < size; i++) {
            MPI_Recv(C[iOffset], (iProblemSize * DIM), MPI_INT, i, 0, MPI_COMM_WORLD, NULL);
            iOffset += iProblemSize;
            printf("Received from %d!\n", i);
        }
        printf("All received!\n");

        /* Error checking */
        time1 = timer() - time1;
        printf ("Your calculation is %scorrect.\n", correct_result(C,D) ? "" : "not ");
        printf ("Total runtime: %f seconds\n", time1/1000000.0);
    }
    else { //Slaves
        MPI_Recv(A, (iProblemSize * DIM), MPI_INT, 0, 0, MPI_COMM_WORLD, NULL);
        /*for(j = 0; j < iProblemSize; j++) {
            MPI_Recv(A[j], DIM, MPI_INT, 0, 0, MPI_COMM_WORLD, NULL);
        }*/

        //Do the calculations for C
        //printf("Process %d doing calculations...\n", rank);
        for (i = 0; i < (iProblemSize * DIM); ++i) {
            for (k = 0; k < DIM; ++k) {
                for (j = 0; j < DIM; ++j) {
                    C[i][j] += A[i][k] * B[k][j];
                }
                //printf("\n");
            }
        }
        //printf("Process %d finished doing the calculations!\n", rank);

        //Send the result to the master
        printf("Process %d sending...\n", rank);
        MPI_Send(C, (iProblemSize * DIM), MPI_INT, 0, 0, MPI_COMM_WORLD);
        printf("Process %d finished sending!\n", rank);
    }


    MPI_Finalize();

    return 0;
}

最佳答案

好的，我终于修复了这个错误。问题出在从机进行计算时的循环中......

for (i = 0; i < (iProblemSize * DIM); ++i) {

应该是

for (i = 0; i < iProblemSize; ++i) {

关于C MPI 矩阵乘法错误，我们在Stack Overflow上找到一个类似的问题： https://stackoverflow.com/questions/20329468/

C MPI 矩阵乘法错误

上一篇：c - 删除链表的唯一节点

下一篇：c - Valgrind 未显示有关内存区域重叠的错误