我尝试将矩阵逐行分散到所有处理器,但导致段错误。我不知道我做错了什么。这是我的代码
if(rank == 0) {
A_row = 10;
A_col = 10;
/* calculate the strip size */
strip_size = A_row / size;
/* genarate Matrix A */
A = (double **)malloc(sizeof(double*) * 10);
int k = 0;
for(i = 0; i < 10; i++) {
A[i] = (double*)malloc(sizeof(double) * 10);
for(j = 0; j < 10; j++) {
A[i][j] = k;
k++;
printf("%lf ", A[i][j]);
}
printf("\n");
}
}
/* Broadcasting the row, column size of Matrix A as well as strip size and Matrix B*/
MPI_Bcast(&A_row, 1, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Bcast(&A_col, 1, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Bcast(&strip_size, 1, MPI_INT, 0, MPI_COMM_WORLD);
/* defining a datatype for sub-matrix */
MPI_Type_vector(strip_size, A_col, A_col, MPI_DOUBLE, &strip);
MPI_Type_commit(&strip);
strip_A = (double **)malloc(sizeof(double*)*strip_size);
for(i= 0; i< strip_size; i++) {
strip_A[i] = (double*)malloc(sizeof(double)*A_col);
}
MPI_Scatter(&A[0][0], 1, strip, &strip_A[0][0], 1, strip, 0, MPI_COMM_WORLD);
for(i = 0; i < strip_size; i++) {
if(i == 0) {
printf("rank = %d\n", rank);
}
for(j = 0; j < A_col; j++) {
printf("%lf ", strip_A[i][j]);
}
printf("\n");
}
谁能告诉我怎么了...
这是我运行时的错误
mpirun -np 2 ./a.out
0.000000 1.000000 2.000000 3.000000 4.000000 5.000000 6.000000 7.000000 8.000000 9.000000
10.000000 11.000000 12.000000 13.000000 14.000000 15.000000 16.000000 17.000000 18.000000 19.000000
20.000000 21.000000 22.000000 23.000000 24.000000 25.000000 26.000000 27.000000 28.000000 29.000000
30.000000 31.000000 32.000000 33.000000 34.000000 35.000000 36.000000 37.000000 38.000000 39.000000
40.000000 41.000000 42.000000 43.000000 44.000000 45.000000 46.000000 47.000000 48.000000 49.000000
50.000000 51.000000 52.000000 53.000000 54.000000 55.000000 56.000000 57.000000 58.000000 59.000000
60.000000 61.000000 62.000000 63.000000 64.000000 65.000000 66.000000 67.000000 68.000000 69.000000
70.000000 71.000000 72.000000 73.000000 74.000000 75.000000 76.000000 77.000000 78.000000 79.000000
80.000000 81.000000 82.000000 83.000000 84.000000 85.000000 86.000000 87.000000 88.000000 89.000000
90.000000 91.000000 92.000000 93.000000 94.000000 95.000000 96.000000 97.000000 98.000000 99.000000
rank = 1
42.000000 43.000000 44.000000 45.000000 46.000000 47.000000 48.000000 49.000000 0.000000 0.000000
52.000000 53.000000 54.000000 55.000000 56.000000 57.000000 58.000000 59.000000 0.000000 0.000000
0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
[seismicmstm:07338] *** Process received signal *** [seismicmstm:07338] Signal:
Segmentation fault (11)
[seismicmstm:07338] Signal code:
(128) [seismicmstm:07338] Failing at
address: (nil)
-------------------------------------------------------------------------- mpirun noticed that process rank 1 with PID 7338 on node seismicmstm.cluster exited on signal 11 (Segmentation fault).
--------------------------------------------------------------------------
最佳答案
这里发生了几件事。好消息是最困难的事情——创建 mpi 数据类型,以及 MPI_Scatter 调用的基本结构——是正确的。
第一个问题是 MPI_Scatter 行使用 &(A[0][0]) ——但除了排名为零之外,您还没有将 A 设置为指向任何东西!所以你取消引用一个随机指针两次,这就是你的段错误。
如 suszterpatt 所建议的,一个更微妙的问题是,无法保证您分配的内存行是连续的,因此即使您修复了上述问题,您的分散操作也可能无法正常工作。您正在尝试将 strip_size * A_col double 从 A 中的某个位置发送到 strip_A,但 strip_A 可能不包含那么多连续的 double - 它可能是 A_col double ,然后是一些填充,然后是 A_col double - 或者实际上,各个行可以散落在内存中。三种修复方法是,为了方便(恕我直言):(a)通过创建整个数组,然后创建二维 C 数组以指向正确的位置,使数据在内存中连续; (b) 一次只发送一行;或 (c) 创建一个 MPI 数据类型,该类型实际上反射(reflect)了您的数据在内存中的映射方式(可能是随机映射)。
使用 (a) 的方法似乎有效(无论如何,对于 A_row 按大小均分)如下所示:
#include <stdio.h>
#include <mpi.h>
#include <stdlib.h>
int main(int argc, char** argv) {
int rank, size;
int strip_size, A_row, A_col;
double **A, **strip_A, *Adata, *stripdata;
MPI_Datatype strip;
int i,j;
MPI_Init(&argc,&argv) ;
MPI_Comm_rank(MPI_COMM_WORLD,&rank) ;
MPI_Comm_size(MPI_COMM_WORLD,&size) ;
if(rank == 0) {
A_row = 10;
A_col = 10;
/* calculate the strip size */
strip_size = A_row / size;
/* genarate Matrix A */
Adata = (double *)malloc(sizeof(double)*A_row*A_col);
A = (double **)malloc(sizeof(double*) * A_row);
for(i = 0; i < A_row; i++) {
A[i] = &(Adata[i*A_col]);
}
int k = 0;
for(i = 0; i < A_row; i++) {
for(j = 0; j < A_col; j++) {
A[i][j] = k;
k++;
}
}
}
/* Broadcasting the row, column size of Matrix A as well as strip size and Matrix B*/
MPI_Bcast(&A_row, 1, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Bcast(&A_col, 1, MPI_INT, 0, MPI_COMM_WORLD);
MPI_Bcast(&strip_size, 1, MPI_INT, 0, MPI_COMM_WORLD);
/* defining a datatype for sub-matrix */
MPI_Type_vector(strip_size, A_col, A_col, MPI_DOUBLE, &strip);
MPI_Type_commit(&strip);
stripdata = (double *)malloc(sizeof(double)*strip_size*A_col);
strip_A = (double **)malloc(sizeof(double*)*strip_size);
for(i= 0; i< strip_size; i++) {
strip_A[i] = &(stripdata[i*A_col]);
}
MPI_Scatter(Adata, 1, strip, &(strip_A[0][0]), 1, strip, 0, MPI_COMM_WORLD);
//MPI_Scatter(Adata, A_col*strip_size, MPI_DOUBLE, &(strip_A[0][0]), A_col*strip_size, MPI_DOUBLE, 0, MPI_COMM_WORLD);
for(i = 0; i < strip_size; i++) {
if(i == 0) {
printf("rank = %d\n", rank);
}
for(j = 0; j < A_col; j++) {
printf("%lf ", strip_A[i][j]);
}
printf("\n");
}
MPI_Type_free(&strip);
free(strip_A);
free(stripdata);
free(Adata);
free(A);
return 0;
}
关于matrix - 分散矩阵 - MPI,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/4702368/