c - 为什么我的二维结果矩阵数组没有分配到我的大学集群上的所有进程?

标签 c parallel-processing mpi

我正在集群上运行乘法矩阵代码,我确信它工作得很好。我什至使用了 Scatterv,它完美地分散...但是我在每个进程上动态分配了我的结果矩阵,大小为 sendcount[world_rank] 行和 z 列,我检查了这些数字是正确的...只分配了一个进程数组和其余的只是打印一个地址或其他东西。

我的代码在这里进行矩阵乘法:

count = 0;
while(count < z){

    for (i = 0; i < sendcount[world_rank]; i++){

        for(j = 0; j < y; j++){
            //printf("matrix1[%d][%d] * matrix2[%d][%d] = ", i, j, j, count);
            resultmatrix[i][count] += matrix1[i][j] * matrix2[j][count];
            //printf("%d ", resultmatrix[i][count]);

        }
        //printf("rank: %d, i: %d count: %d resultmatrix: %d\n", world_rank, i, count, resultmatrix[i][count]);

    }
    count++;
    //printf("\n");
}

当我在每个进程打印结果矩阵进行测试时,只有我的根进程打印实际值...... 这可能是我的集群的问题还是我遗漏了什么?

这是我的完整代码:

#include <mpi.h>
#include <stdio.h>
#include <stdlib.h>

int main(int argc, char* argv[]){

int world_size, world_rank, i, j, tag = 777;
int root = 0;
int count = 0;

int x = atoi(argv[1]);
int y = atoi(argv[2]);
int z = atoi(argv[3]);


MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
MPI_Comm_size(MPI_COMM_WORLD, &world_size);
MPI_Status status;
int temp1_size = x*y;
int temp2_size = y*z;
//scatter v info
int sum = 0;
int rem1 = x%world_size;
int** matrix1;
int** matrix2;
int** resultmatrix;
int* tempM1;
int* tempM2;
int* recM1;
int* sendcount = (int*)malloc(sizeof(int) * world_size);
int* displs = (int*)malloc(sizeof(int) * world_size);

for(i = 0; i < world_size; i++){

    sendcount[i] = (x/world_size)*y;
    if(rem1 > 0){
        sendcount[i] += y;
        rem1--;
    }
    displs[i] = sum;
    sum += sendcount[i];
}
//to send my arrays over... first by bcast other by scatter...
tempM1 = (int*)malloc(sizeof(int) * temp1_size);
tempM2 = (int*)malloc(sizeof(int) * temp2_size);
// first is for 1d other is for matrix1;
//the array that will hold out scattered matrix1... 1d array...
recM1 = (int*)malloc(sizeof(int) * 500);
// matrix one at every process after scatter...




if (world_rank == root){

    FILE* Matrix1;
    FILE* Matrix2;

    Matrix1 = fopen("Matrix1.txt", "r");
    Matrix2 = fopen("Matrix2.txt", "r");
    int count = 0;
    while (count < temp1_size){
        fscanf(Matrix1, "%d", &tempM1[count]);
        count++;
    }
    //printf("count: %d, temp1_size: %d\n", count, temp1_size);
    count = 0;
    while(count < temp2_size){
        fscanf(Matrix2, "%d", &tempM2[count]);
        count++;
    }
    fclose(Matrix1);
    fclose(Matrix2);

    /*for(i = 0; i < world_size; i++)
        printf("sendcount: %d, displs: %d\n", sendcount[i], displs[i]);*/

}
MPI_Bcast(tempM2, temp2_size, MPI_INT, root, MPI_COMM_WORLD);
MPI_Scatterv(tempM1, sendcount, displs, MPI_INT, recM1, 500, MPI_INT, root, MPI_COMM_WORLD);

for(i = 0; i < world_size; i++){
    sendcount[i] /= y;
    //printf("%d at i: %d\n", sendcount[i], i);
}

matrix1 = (int**)malloc(sizeof(int*) * sendcount[world_rank]);
for(i = 0; i < sendcount[world_rank]; i++){
    matrix1[i] = (int*)malloc(sizeof(int) * y);
}
printf("%d\n", matrix1[0][0]);
// allocating 2d array which is my matrix 2...
matrix2 = (int**)malloc(sizeof(int*) * y);
for(i = 0; i < y; i++){
    matrix2[i] = (int*)malloc(sizeof(int) * z);
}
printf("%d\n", matrix2[0][0]);
// my result matrix which consists of my sendcount[world_rank] and z :)
resultmatrix = (int**)malloc(sizeof(int*) * x);
for(i = 0; i < x; i++){
    resultmatrix[i] = (int*)malloc(sizeof(int) * z);
}
printf("%d\n", resultmatrix[0][0]);

count = 0;
for(i = 0; i < sendcount[world_rank]; i++){
    for(j = 0; j < y; j++){
        matrix1[i][j] = recM1[count];
        count++;
        //printf("%d ", matrix1[i][j]);
    }
    //printf("\n");
    //printf("\n");
}
//printf("\n");
free(recM1);
count = 0;
for(i = 0; i < y; i++){
    for(j = 0; j < z; j++){
        matrix2[i][j] = tempM2[count];
        count++;
        //printf("%d ", matrix2[i][j]);
    }
    //printf("\n");
}
//printf("\n");
free(tempM2);
free(tempM1);
//printf("%d\n", resultmatrix[0][0]);
count = 0;
while(count < z){

    for (i = 0; i < sendcount[world_rank]; i++){

        for(j = 0; j < y; j++){
            //printf("matrix1[%d][%d] * matrix2[%d][%d] = ", i, j, j, count);
            resultmatrix[i][count] += matrix1[i][j] * matrix2[j][count];
            //printf("%d ", resultmatrix[i][count]);

        }
        //printf("rank: %d, i: %d count: %d resultmatrix: %d\n", world_rank, i, count, resultmatrix[i][count]);

    }
    count++;
    //printf("\n");
}
/*for(i = 0; i < sendcount[world_rank]; i++){
    for(j = 0; j < z; j++){
        printf("%d ", resultmatrix[i][j]);
    }
    printf("\n");
}   */
    //int khara = world_rank * scattered_rows;
    //while(i < scattered_rows){
    /*if(world_rank!=0){
        for (i = 0; i < sendcount[world_rank]; i++){
            MPI_Send(resultmatrix[i], z, MPI_INT, root, tag, MPI_COMM_WORLD);
        }
    }*/
        //MPI_Gather(resultmatrix, z*scattered_rows, MPI_INT, global_result, x * z, MPI_INT, root, MPI_COMM_WORLD);
        //i++;
        //khara++;
    //}

/*if(world_rank == root){
    int rank = 1;
    for(j = 0; j < sendcount[world_rank]; j++){
            global_result[j] = resultmatrix[j];
        }
    count = sendcount[world_rank];
    while(rank < world_size){
        for(i = 0; i < sendcount[rank]; i++){
            MPI_Recv(global_result[count++], z, MPI_INT, rank, tag, MPI_COMM_WORLD, &status);

        }
        rank++;
    }*/

    /*while (rank < world_size){
    for(i = scattered_rows i = 0; i < sendcount; i += scattered_rows){
        rank++;
        for(j = 0; j < scattered_rows; j++){
            printf("hello!!!\n");
            MPI_Recv(global_result[count++], z, MPI_INT, rank, tag, MPI_COMM_WORLD, &status);
        }
    }*/


    /*for(i = 0; i < x; i++){
        printf("\n");
        for(j = 0; j < z; j++){
            printf("%d ", global_result[i][j]);
        }
    }*/
    //printf("\nx = %d, y = %d, z= %d", x,y,z);

//}
free(matrix1);
free(matrix2);
MPI_Finalize();
return 0;
}

最佳答案

您为resultmatrix[i]分配了内存,但没有初始化它。这意味着它的内容将是不确定并且当您使用语句中的数据

resultmatrix[i][count] += ...;

您将读取并修改该不确定的数据,并且您将拥有 undefined behavior .

在分配循环中,您可以使用例如memset初始化分配的内存:

for(i = 0; i < x; i++){
    resultmatrix[i] = malloc(sizeof(int) * z);
    memset(resultmatrix[i], 0, sizeof(int) * z);
}

[请注意,我删除了 malloc 结果的转换,in C you should not do that ]

关于c - 为什么我的二维结果矩阵数组没有分配到我的大学集群上的所有进程?,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/29729966/

相关文章:

c - C中生产者-消费者的线程安全环形缓冲区

C 指向另一个结构体的指针

mysql - 如何并行更新 MySQL(MyISAM) 表?

fork - 使用 fork 和 MPI 进行多进程编程的区别

c - c 中的 mpi 程序可以编译但无法运行

c - 通过 LinkedList 进行线性搜索

c - block 范围内的 extern 与文件范围内的 typedef 不冲突?

c# - 在多线程应用程序中使用 Thread.Sleep 的原因是什么?

c++ - 并行骑士之旅算法

c - MPI_SEND 和 MPI_RECIEVE 没有编译引用