这是关于我在这里编写的代码的基本信息。我最近开始学习和编写 MPI。此代码的目的是计算给定总进程数的计算时间。我将发布完整的代码。它有 97 行长,但我会指出我认为问题所在..
基本思路
(1) 用户输入一个常数(K)和一个整数(M),然后BCAST两个值。
(2) 三个一维数组(A,B,C)将被分配M个 block 。
(3) 一个子程序(init_random_seed) 将用 M 个随机数填充数组 A 和 B 并对其进行 BCAST。
(4) 数组 C 用零填充并发送到 process==1,在 process==1 时,将在数组 A 和 B 之间进行简单的数学计算。
(5) 每次迭代的结果将存储在数组 C 中,并通过 MPI_SEND 发送到 process==2。
(6) 最后在process==2处,将C的结果写入文本文件。
这里是代码,
MODULE MPI !!! I usually initialize all the variables here
INCLUDE 'mpif.h'
REAL :: U,V,K
REAL :: START,FINISH
INTEGER :: O,M,FILE
INTEGER :: MYID,TOTPS, IERR
REAL,ALLOCATABLE,DIMENSION(:) :: A,B,C
END MODULE MPI
PROGRAM CRAFT !!! main program
USE MPI
CALL MPIINIT
CALL CPU_TIME(START)
CALL TEST
CALL CPU_TIME(FINISH)
PRINT*, " TOTAL PROCESSING TIME = " , FINISH - START , "SECONDS AT PROCESS", MYID
CALL MPI_FINALIZE(IERR)
STOP
END PROGRAM CRAFT
SUBROUTINE MPIINIT
USE MPI
CALL MPI_INIT( IERR )
CALL MPI_COMM_RANK(MPI_COMM_WORLD,MYID,IERR)
CALL MPI_COMM_SIZE(MPI_COMM_WORLD,TOTPS,IERR)
RETURN
END SUBROUTINE MPIINIT
SUBROUTINE TEST
USE MPI
CALL INITIAL
CALL WORK
CALL COLLECT
END SUBROUTINE TEST
SUBROUTINE INITIAL !!! random number input and BCAST
USE MPI
CALL MPI_BARRIER(MPI_COMM_WORLD,IERR)
!! I'm not sure if a barrier is necessary or not here.
IF (MYID .EQ. 0) THEN
PRINT*, "ENTER A CONSTANT"
READ*, K
PRINT*, "HOW MANY TERMS?"
READ*, M
END IF
ALLOCATE(A(M),B(M),C(M))
CALL INIT_RANDOM_SEED() !!! see the very last subroutine
DO O =1,M
CALL RANDOM_NUMBER(U)
CALL RANDOM_NUMBER(V)
A(O) = U*10
B(O) = V*10
C(O) = 0.0
END DO
CALL MPI_BCAST(K,1,MPI_REAL,0,MPI_COMM_WORLD,IERR)
CALL MPI_BCAST(A,M,MPI_REAL,0,MPI_COMM_WORLD,IERR)
CALL MPI_BCAST(B,M,MPI_REAL,0,MPI_COMM_WORLD,IERR)
CALL MPI_BCAST(M,1,MPI_INTEGER,0,MPI_COMM_WORLD,IERR)
CALL MPI_SEND(C,M,MPI_REAL,1,0,MPI_COMM_WORLD,IERR) !! tag value is 0
END SUBROUTINE INITIAL
SUBROUTINE WORK !!! simple math calculations
USE MPI
IF(MYID .EQ. 1) THEN
CALL MPI_RECV(C,M,MPI_REAL,0,0,MPI_COMM_WORLD,MPISTTS,IERR)
DO O = 1,M
C(O) = (1/K)*( A(O)**K - K*B(0))
END DO
CALL MPI_SEND(C,M,MPI_REAL,2,1,MPI_COMM_WORLD,IERR) !! tag value is 1
END IF
END SUBROUTINE WORK
SUBROUTINE COLLECT !! writing txt files
USE MPI
IF (MYID .EQ. 2) THEN
CALL MPI_RECV(C,M,MPI_REAL,1,1,MPI_COMM_WORLD,MPISTTS,IERR)
OPEN(UNIT=11,FILE="ARRAY.TXT",ACTION="WRITE")
DO O =1,M
WRITE(11,'(I2,2X,F4.1,2X,F4.1,2X,F4.1)') O, A(0),B(O),C(O)
END DO
CLOSE(11)
END IF
END SUBROUTINE COLLECT
SUBROUTINE INIT_RANDOM_SEED() !! I found this subroutine on online
IMPLICIT NONE
INTEGER :: I,N,CLOCK
INTEGER, DIMENSION(:), ALLOCATABLE :: SEED
CALL RANDOM_SEED(SIZE=N)
ALLOCATE(SEED(N))
CALL SYSTEM_CLOCK(COUNT=CLOCK)
SEED = CLOCK + 37 * (/ (I - 1, I = 1, N) /)
CALL RANDOM_SEED(PUT = SEED)
DEALLOCATE(SEED)
END SUBROUTINE INIT_RANDOM_SEED
我来这里的原因
* 程序符合要求,但我认为我遇到了运行时错误。这是错误,
ENTER A CONSTANT
2
HOW MANY TERMS?
3
[sflogin0:11103] *** An error occurred in MPI_Bcast
[sflogin0:11103] *** on communicator MPI_COMM_WORLD
[sflogin0:11103] *** MPI_ERR_TRUNCATE: message truncated
[sflogin0:11103] *** MPI_ERRORS_ARE_FATAL (your MPI job will now abort)
TOTAL PROCESSING TIME = 2.9265954 SECONDS AT PROCESS 0
--------------------------------------------------------------------------
mpirun has exited due to process rank 1 with PID 11103 on
node sflogin0 exiting without calling "finalize". This may
have caused other processes in the application to be
terminated by signals sent by mpirun (as reported here).
--------------------------------------------------------------------------
[sflogin0:11099] 2 more process has sent help message help-mpi-errors.txt / mpi_errors_are_fatal
[sflogin0:11099] Set MCA parameter "orte_base_help_aggregate" to 0 to see all help / error messages
注意:我在运行时只给出了五个进程 mpirun -np 5 ./a.out
请看一下,帮帮我。谢谢
最佳答案
在 SUBROUTINE INITIAL
中,您必须:
在做分配前广播
M
;除了进程 0,没有其他进程真正分配。默认整数为零,他们分配的向量大小为零,这将是广播中的一个问题。CALL MPI_BCAST(M,1,MPI_INTEGER,0,MPI_COMM_WORLD,IERR) ALLOCATE(A(M),B(M),C(M))
下一个要点实际上并不是当前问题的一部分,而是您将面临的下一个问题。
如果只有一个进程在获取您描述和实现的随机数,则应在 if 中完成该部分,以确保只有该进程在执行该部分。
IF (MYID == 0) THEN !generate the numbers END IF
我强烈建议您将
MPI_SEND
和MPI_RECV
放在一起。在这方面,您应该将MPI_SEND
从SUBROUTINE INITIAL
移动到SUBROUTINE WORK
并将另一个从SUBROUTINE WORK
到SUBROUTINE COLLECT
。这将节省您调试的时间。不要忘记将第一个MPI_SEND
放在IF
语句中IF (MYID .EQ. 0) THEN CALL MPI_SEND(C,M,MPI_REAL,1,0,MPI_COMM_WORLD,IERR) !! tag value is 0 END IF
我还建议在进行并行时对所有输入使用文件而不是标准输入。
最后,只是为了给你一个关于代码组织的提示,完整的代码可能看起来像(见下文)。
O
用于变量名是一个非常糟糕的主意,我将其更改为 I 并将其设置为局部变量。现在,您要确保索引 0
确实是零,而不是您想要放置 O
的拼写错误。另外,我添加了 IMPLICIT NONE
来强制声明所有变量。我将所有 MPI 的子程序放入模块中,并将所有其他子程序放入 PROGRAM
的 CONTAINS
部分。
MODULE MPI !!! I usually initialize all the variables here
IMPLICIT NONE
INCLUDE 'mpif.h'
REAL :: U,V,K
REAL :: START,FINISH
INTEGER :: O,M,FILE
INTEGER :: MYID,TOTPS, IERR, MPISTTS
REAL,ALLOCATABLE,DIMENSION(:) :: A,B,C
CONTAINS
SUBROUTINE MPIINIT
IMPLICIT NONE
CALL MPI_INIT( IERR )
CALL MPI_COMM_RANK(MPI_COMM_WORLD,MYID,IERR)
CALL MPI_COMM_SIZE(MPI_COMM_WORLD,TOTPS,IERR)
RETURN
END SUBROUTINE MPIINIT
END MODULE MPI
PROGRAM CRAFT !!! main program
USE MPI
IMPLICIT NONE
CALL MPIINIT
CALL CPU_TIME(START)
CALL TEST
CALL CPU_TIME(FINISH)
PRINT*, " TOTAL PROCESSING TIME = " , FINISH - START , "SECONDS AT PROCESS", MYID
CALL MPI_FINALIZE(IERR)
STOP
CONTAINS
SUBROUTINE TEST
IMPLICIT NONE
CALL INITIAL
CALL WORK
CALL COLLECT
END SUBROUTINE TEST
SUBROUTINE INITIAL !!! random number input and BCAST
IMPLICIT NONE
CALL MPI_BARRIER(MPI_COMM_WORLD,IERR)
!! I'm not sure if a barrier is necessary or not here.
IF (MYID .EQ. 0) THEN
PRINT*, "ENTER A CONSTANT"
READ*, K
PRINT*, "HOW MANY TERMS?"
READ*, M
END IF
CALL MPI_BCAST(M,1,MPI_INTEGER,0,MPI_COMM_WORLD,IERR)
ALLOCATE(A(M),B(M),C(M))
IF (MYID .EQ. 0) THEN
CALL INIT_RANDOM_SEED() !!! see the very last subroutine
DO O =1,M
CALL RANDOM_NUMBER(U)
CALL RANDOM_NUMBER(V)
A(O) = U*10
B(O) = V*10
C(O) = 0.0
END DO
END IF
CALL MPI_BCAST(K,1,MPI_REAL,0,MPI_COMM_WORLD,IERR)
CALL MPI_BCAST(A,M,MPI_REAL,0,MPI_COMM_WORLD,IERR)
CALL MPI_BCAST(B,M,MPI_REAL,0,MPI_COMM_WORLD,IERR)
IF (MYID .EQ. 0) THEN
CALL MPI_SEND(C,M,MPI_REAL,1,0,MPI_COMM_WORLD,IERR) !! tag value is 0
END IF
END SUBROUTINE INITIAL
SUBROUTINE WORK !!! simple math calculations
IMPLICIT NONE
INTEGER I
IF(MYID .EQ. 1) THEN
CALL MPI_RECV(C,M,MPI_REAL,0,0,MPI_COMM_WORLD,MPISTTS,IERR)
DO I = 1,M
C(I) = (1/K)*( A(I)**K - K*B(0))
END DO
END IF
END SUBROUTINE WORK
SUBROUTINE COLLECT !! writing txt files
IMPLICIT NONE
INTEGER I
IF(MYID .EQ. 1) THEN
CALL MPI_SEND(C,M,MPI_REAL,2,1,MPI_COMM_WORLD,IERR) !! tag value is 1
ELSE IF (MYID .EQ. 2) THEN
CALL MPI_RECV(C,M,MPI_REAL,1,1,MPI_COMM_WORLD,MPISTTS,IERR)
OPEN(UNIT=11,FILE="ARRAY.TXT",ACTION="WRITE")
DO I =1,M
WRITE(11,'(I2,2X,F4.1,2X,F4.1,2X,F4.1)') I, A(0),B(I),C(I)
END DO
CLOSE(11)
END IF
END SUBROUTINE COLLECT
SUBROUTINE INIT_RANDOM_SEED() !! I found this subroutine on online
IMPLICIT NONE
INTEGER :: I,N,CLOCK
INTEGER, DIMENSION(:), ALLOCATABLE :: SEED
CALL RANDOM_SEED(SIZE=N)
ALLOCATE(SEED(N))
CALL SYSTEM_CLOCK(COUNT=CLOCK)
SEED = CLOCK + 37 * (/ (I - 1, I = 1, N) /)
CALL RANDOM_SEED(PUT = SEED)
DEALLOCATE(SEED)
END SUBROUTINE INIT_RANDOM_SEED
END PROGRAM CRAFT
关于compiler-errors - MPI FORTRAN 错误(在数组中生成随机数和简单计算),我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/32126705/