c - MPI 超额认购

标签 c parallel-processing mpi cpu hpc

我可以超额订阅 MPI 中的内核是否有限制?

这是我之前的帖子: Does the number of processes in MPI have a limit?

今天我运行了《Using MPI》一书中的另一个程序。它适用于 52 个进程,但挂起 53 个进程。

我的笔记本电脑是 4 核、超线程和 8G RAM。 MPI版本为openmpi 1.4.3。

谢谢。

代码如下:

#include <stdio.h>
#include <mpi.h>

#define ICTAG 0
#define SERVER_RANK 0
typedef enum { REQUEST, VALUE, GOAWAY } nxtval_msgtype;

/*
int MPI_Recv(void *buf, int count, MPI_Datatype datatype, int source, int tag, MPI_Comm comm, MPI_Status *status)
{
    int ret;
//  printf("Before MPI_Recv. tag=%d\n", tag);
    ret=PMPI_Recv(buf, count, datatype, source, tag, comm, status);
    printf("After  MPI_Recv. count=%d tag=%d source=%d\n", count, status->MPI_TAG, status->MPI_SOURCE);
    fflush(stdout);
    return ret;
}
*/
int MPI_Intercomm_create(MPI_Comm local_comm, int local_leader, MPI_Comm peer_comm, int remote_leader, int tag, MPI_Comm *newintercomm)
{
    int ret;
    printf("Before MPI_Intercomm_create\n");
    fflush(stdout);
    ret=PMPI_Intercomm_create(local_comm, local_leader, peer_comm, remote_leader, tag, newintercomm);
    printf("After  MPI_Intercomm_create\n");
    fflush(stdout);
    return ret;
}

int MPE_Counter_create_ic(MPI_Comm oldcomm, MPI_Comm *smaller_comm, MPI_Comm *counter_comm)
{
    int counter=0, message, done=0, myid, numprocs, server;
    int color, remote_leader_rank;
    MPI_Status status;
    MPI_Comm oldcommdup, splitcomm;

    MPI_Comm_dup(oldcomm, &oldcommdup);
    MPI_Comm_size(oldcommdup, &numprocs);
    MPI_Comm_rank(oldcommdup, &myid);
    server=numprocs-1;
    color=(myid==server);
    MPI_Comm_split(oldcomm, color, myid, &splitcomm);
    if(!color)
    {
        remote_leader_rank=server;
        *smaller_comm=splitcomm;
    }
    else
        remote_leader_rank=0;
    MPI_Intercomm_create(splitcomm, 0, oldcommdup, remote_leader_rank, ICTAG, counter_comm);
    MPI_Comm_free(&oldcommdup);

    if(myid==server)
    {
        while(!done)
        {
            MPI_Recv(NULL, 0, MPI_INT, MPI_ANY_SOURCE, MPI_ANY_TAG, *counter_comm, &status);
            if(status.MPI_TAG==REQUEST)
            {
                MPI_Send(&counter, 1, MPI_INT, status.MPI_SOURCE, VALUE, *counter_comm);
                ++counter;
            }
            else if(status.MPI_TAG==GOAWAY)
                done=1;
            else
            {
                fprintf(stderr, "bad tag %d sent to MPE counter\n", status.MPI_TAG);
                MPI_Abort(*counter_comm, 1);
            }
        }
    }
    return 0;
}

int MPE_Counter_nxtval_ic(MPI_Comm counter_comm, int *value)
{
    MPI_Status status;
    MPI_Send(NULL, 0, MPI_INT, SERVER_RANK, REQUEST, counter_comm);
    MPI_Recv(value, 1, MPI_INT, SERVER_RANK, VALUE, counter_comm, &status);
    return 0;
}

int MPE_Counter_free_ic(MPI_Comm *smaller_comm, MPI_Comm *counter_comm)
{
    int myid;
    MPI_Comm_rank(*smaller_comm, &myid);
    MPI_Barrier(*smaller_comm);
    if(myid==0)
        MPI_Send(NULL, 0, MPI_INT, SERVER_RANK, GOAWAY, *counter_comm);
    MPI_Comm_free(counter_comm);
    MPI_Comm_free(smaller_comm);
    return 0;
}

int main(int argc, char **argv) 
{
    int size, myid;
    MPI_Comm counter_comm, worker_comm;
    MPI_Init( &argc, &argv );
    MPE_Counter_create_ic( MPI_COMM_WORLD, &worker_comm, &counter_comm );
    MPI_Comm_size(MPI_COMM_WORLD, &size);
    MPI_Comm_rank(MPI_COMM_WORLD, &myid);
    if(myid!=size-1) {
        /* I am one of the workers; the master doesn't exit create until 
        MPE_Counter_free is called */ 
        int value, rank;
        MPI_Comm_rank( counter_comm, &rank );
        MPE_Counter_nxtval_ic( counter_comm, &value );
        printf( "[%d] received value %d\n", rank, value );
        MPE_Counter_free_ic( &worker_comm, &counter_comm );
    }
    MPI_Finalize();
    return 0;
}

这是它运行的结果:

shuang@phoebe:~/usingMPI$ mpiexec -n 8 ./nxtval_ic
Before MPI_Intercomm_create
After  MPI_Intercomm_create
[0] received value 3
Before MPI_Intercomm_create
After  MPI_Intercomm_create
[1] received value 1
Before MPI_Intercomm_create
After  MPI_Intercomm_create
[2] received value 2
Before MPI_Intercomm_create
After  MPI_Intercomm_create
[3] received value 5
Before MPI_Intercomm_create
After  MPI_Intercomm_create
[4] received value 6
Before MPI_Intercomm_create
After  MPI_Intercomm_create
[5] received value 0
Before MPI_Intercomm_create
After  MPI_Intercomm_create
[6] received value 4
Before MPI_Intercomm_create
After  MPI_Intercomm_create

挂起时是这样的

shuang@phoebe:~/usingMPI$ mpiexec -n 100 ./nxtval_ic
Before MPI_Intercomm_create
Before MPI_Intercomm_create
Before MPI_Intercomm_create
Before MPI_Intercomm_create
Before MPI_Intercomm_create
Before MPI_Intercomm_create
Before MPI_Intercomm_create
Before MPI_Intercomm_create
Before MPI_Intercomm_create
Before MPI_Intercomm_create
Before MPI_Intercomm_create
Before MPI_Intercomm_create
Before MPI_Intercomm_create
Before MPI_Intercomm_create
Before MPI_Intercomm_create
Before MPI_Intercomm_create
Before MPI_Intercomm_create
Before MPI_Intercomm_create
Before MPI_Intercomm_create
Before MPI_Intercomm_create
Before MPI_Intercomm_create
Before MPI_Intercomm_create
Before MPI_Intercomm_create
Before MPI_Intercomm_create
Before MPI_Intercomm_create
Before MPI_Intercomm_create
Before MPI_Intercomm_create
Before MPI_Intercomm_create
Before MPI_Intercomm_create
Before MPI_Intercomm_create
Before MPI_Intercomm_create
Before MPI_Intercomm_create
Before MPI_Intercomm_create
Before MPI_Intercomm_create
Before MPI_Intercomm_create
Before MPI_Intercomm_create
Before MPI_Intercomm_create
Before MPI_Intercomm_create
Before MPI_Intercomm_create
Before MPI_Intercomm_create
Before MPI_Intercomm_create
Before MPI_Intercomm_create
Before MPI_Intercomm_create
Before MPI_Intercomm_create
Before MPI_Intercomm_create
Before MPI_Intercomm_create
Before MPI_Intercomm_create
Before MPI_Intercomm_create
Before MPI_Intercomm_create
Before MPI_Intercomm_create
Before MPI_Intercomm_create
Before MPI_Intercomm_create
Before MPI_Intercomm_create
Before MPI_Intercomm_create
Before MPI_Intercomm_create
Before MPI_Intercomm_create
Before MPI_Intercomm_create
Before MPI_Intercomm_create
Before MPI_Intercomm_create
Before MPI_Intercomm_create
Before MPI_Intercomm_create
Before MPI_Intercomm_create
Before MPI_Intercomm_create
Before MPI_Intercomm_create
Before MPI_Intercomm_create
Before MPI_Intercomm_create
Before MPI_Intercomm_create
Before MPI_Intercomm_create
Before MPI_Intercomm_create
Before MPI_Intercomm_create
Before MPI_Intercomm_create
Before MPI_Intercomm_create
Before MPI_Intercomm_create
Before MPI_Intercomm_create
Before MPI_Intercomm_create
Before MPI_Intercomm_create
Before MPI_Intercomm_create
Before MPI_Intercomm_create
Before MPI_Intercomm_create
Before MPI_Intercomm_create
Before MPI_Intercomm_create
Before MPI_Intercomm_create
Before MPI_Intercomm_create
Before MPI_Intercomm_create
Before MPI_Intercomm_create
Before MPI_Intercomm_create
Before MPI_Intercomm_create
Before MPI_Intercomm_create
Before MPI_Intercomm_create
Before MPI_Intercomm_create
Before MPI_Intercomm_create
Before MPI_Intercomm_create
Before MPI_Intercomm_create
Before MPI_Intercomm_create
Before MPI_Intercomm_create
Before MPI_Intercomm_create
Before MPI_Intercomm_create
Before MPI_Intercomm_create
Before MPI_Intercomm_create
Before MPI_Intercomm_create
^Cmpiexec: killing job...

--------------------------------------------------------------------------
mpiexec was unable to cleanly terminate the daemons on the nodes shown
below. Additional manual cleanup may be required - please refer to
the "orte-clean" tool for assistance.
--------------------------------------------------------------------------

最佳答案

从 MPI 标准的角度来看,可以存在的 MPI 进程数量没有限制

您的 MPI 实现可能有限制,如果您的实现选择将 MPI 进程映射到 OS 进程(这很常见并且由 MPICH 和 OpenMPI 等完成),那么您也可能会遇到(OS)数量的上限) 您的操作系统可以支持的进程。

参见 Maximum number of processes in linux了解如何确定操作系统进程是否是一个问题。

在实践中,我发现在我的双核、四硬件线程笔记本电脑上运行超过 50 个 MPI 进程是站不住脚的,但我从来没有费心去弄清楚是什么限制了这一点。

关于c - MPI 超额认购,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/25281609/

相关文章:

c - 如何让 MPI 中的所有等级向等级 0 发送一个值,然后阻塞接收所有等级?

c - 将 Expokit 翻译成 C

android - 在 Android NDK 中将字符串传递给 C 代码

python - 与 Numba 并行循环——与 prange 不并行

matlab - 为什么 Matlab 2014a/b 中的 TreeBagger 只使用并行池中的几个 worker ?

c - 什么是 "Signal 15 received"

c - 如果我不包含头文件会发生什么

c - 使用套接字的文件传输服务器/客户端

scala - 创建括号平衡器的并行处理

c++ - MPI 中的动态内存分配