c - MPI_ERR_RANK : invalid rank with cluster

标签 c mpi openmpi bucket-sort

我正在为一个类做一个项目,我使用了来自互联网的顺序存储桶排序的代码,我正在尝试使用 OpenMPI 使其成为并行版本。 该代码将在集群系统上运行。当我测试它时,它给我以下错误:

"[cluster:5379] * An error occurred in MPI_Send [cluster:5379] on communicator MPI_COMM_WORLD [cluster:5379] MPI_ERR_RANK: invalid rank [cluster:5379] * MPI_ERRORS_ARE_FATAL: your MPI job will now abort "

有人可以建议我如何修复它吗?

ps。我的编码能力很差,所以我可能无法回答一些问题。

#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include "mpi.h"

struct bucket
{
    int count;
    int* value;
};

int compareIntegers(const void* first, const void* second)
{
    int x = *((int*)first), y = *((int*)second);
    if (x == y)
    {
        return 0;
    }
    else if (x < y)
    {
        return -1;
    }
    else
    {
        return 1;
    }
}

void bucketSort(int array[], int n)
{
    struct bucket buckets[3];
    int i, j, k;
    for (i = 0; i < 3; i++)
    {
        buckets[i].count = 0;
        buckets[i].value = (int*)malloc(sizeof(int) * n);
    }

    for (i = 0; i < n; i++)
    {
        if (array[i] < 0)
        {
            buckets[0].value[buckets[0].count++] = array[i];
        }
        else if (array[i] > 10)
        {
            buckets[2].value[buckets[2].count++] = array[i];
        }
        else
        {
            buckets[1].value[buckets[1].count++] = array[i];
        }
    }
    for (k = 0, i = 0; i < 3; i++)
    {
        // now using quicksort to sort the elements of buckets
        qsort(buckets[i].value, buckets[i].count, sizeof(int), &compareIntegers);
        for (j = 0; j < buckets[i].count; j++)
        {
            array[k + j] = buckets[i].value[j];
        }
        k += buckets[i].count;
        free(buckets[i].value);
    }

}

int main(char *argv[], int argc)
{
    int array[1000000];
    int i = 0, j, k, n;
    int num;
    //for MPI
    int numProc, rank;
    char procName[MPI_MAX_PROCESSOR_NAME];
    int nameLen;
    int chunksize;
    double start, end;
    int msgtag;

    //MPI
    MPI_Status stat;
    start = MPI_Wtime();    //timer start
    MPI_Init(&argc, &argv);

    MPI_Comm_rank(MPI_COMM_WORLD, &rank); //process rank ,comm_world = communication of the process
    MPI_Comm_size(MPI_COMM_WORLD, &numProc); //number of process
    msgtag = 1234;

    if (rank == 0)
    {
        printf("Enter number of element to be sort: ");
        scanf("%d", &num);

        for (i = 0; i < num; i++) //random num elements
        {
            array[i] = rand();
        }

        n = i;
        printf("\nBefore Sorting\n");
        for (j = 0; j < i; j++)
        {
            printf("%d ", array[j]);
        }
        MPI_Send(&array[j], j, MPI_INT, 1, msgtag, MPI_COMM_WORLD);
    }

    if (rank == 1)
    {
        MPI_Recv(&array[j], j, MPI_INT, 0, msgtag, MPI_COMM_WORLD, &stat);
        bucketSort(array, n);
        MPI_Send(&array, n, MPI_INT, 2, msgtag, MPI_COMM_WORLD);
    }

    if (rank == 2)
    {
        MPI_Recv(&array, n, MPI_INT, 1, msgtag, MPI_COMM_WORLD, &stat);
        printf("\nAfter Sorting\n");
        for (k = 0; k < i; k++)
        {
            printf("%d ", array[k]);
        }
    }
    //MPI END
    MPI_Finalize();
    end = MPI_Wtime();  // timer end   
    double time_spent = end - start;
    printf("\ntime used for this program was %f Sec.", time_spent);

    return 0;
}

最佳答案

你的代码中有不少错误。希望截止日期是星期一...

第一:

int main(int argc, char *argv[])

int main(int argc, char *argv[])工作得更好

第二:

进程0是指定读取要生成的元素数量的进程。
然后它必须将其广播到所有其他进程,否则其他进程将在变量 num 中拥有未定义的数字,对吗?

因此

if (rank == 0)
{
        printf("Enter number of element to be sort: ");
        fflush(stdout);
        scanf("%d", &num);
        for (i = 0; i < num; i++) //random num elements
    {
        array[i] = rand();
    }
        n = num;
        printf("\nBefore Sorting (%i)\n", n);
    for (j = 0; j < n; j++)
    {
        printf("%d ", array[j]);
    }
        fflush(stdout);
}
MPI_Bcast(&n, 1, MPI_INT, 0, MPI_COMM_WORLD);

第三:

避免重复使用循环中涉及的值。我明白了之后

for (j = 0; j < n; j++)
{
    printf("%d ", array[j]);
}

你有

j=n

但不是很清楚...

第四:

MPI_Send 或 receive 的第一个参数是数组中第一个元素的地址。就这么过去了

MPI_Send(&array[j], j, MPI_INT, 1, msgtag, MPI_COMM_WORLD);

自从j=n(见上面的评论)我猜你不会得到你想要的。

你需要的是

MPI_Send(&array[0], n, MPI_INT, 1, msgtag, MPI_COMM_WORLD);

第五:

MPI_Barrier 是您的 friend 。输出是一项关键操作,因此在输出操作之前,您可以(可选)确保所有进程都已到达此点。

if (rank == 2)
{
    MPI_Recv(&array, n, MPI_INT, 1, msgtag, MPI_COMM_WORLD, &stat);
    printf("\nAfter Sorting\n");
    for (k = 0; k < i; k++)
    {
        printf("%d ", array[k]);
    }
}

变成了

if (rank == 2)
{
    MPI_Recv(&(array[0]), n, MPI_INT, 1, msgtag, MPI_COMM_WORLD, &stat);
}
MPI_Barrier(MPI_COMM_WORLD);
if (rank == 2)
{
    printf("\nAfter Sorting\n");
    for (k = 0; k < n; k++)
    {
        printf("%d ", array[k]);
    }
}
    MPI_Barrier(MPI_COMM_WORLD);

结论:

#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include "mpi.h"

struct bucket
{
    int count;
    int* value;
};

int compareIntegers(const void* first, const void* second)
{
    int x = *((int*)first), y = *((int*)second);
    if (x == y)
    {
        return 0;
    }
    else if (x < y)
    {
        return -1;
    }
    else
    {
        return 1;
    }
}

void bucketSort(int array[], int n)
{
    struct bucket buckets[3];
    int i, j, k;
    for (i = 0; i < 3; i++)
    {
        buckets[i].count = 0;
        buckets[i].value = (int*)malloc(sizeof(int) * n);
    }

    for (i = 0; i < n; i++)
    {
        if (array[i] < 0)
        {
            buckets[0].value[buckets[0].count++] = array[i];
        }
        else if (array[i] > 10)
        {
            buckets[2].value[buckets[2].count++] = array[i];
        }
        else
        {
            buckets[1].value[buckets[1].count++] = array[i];
        }
    }
    for (k = 0, i = 0; i < 3; i++)
    {
        // now using quicksort to sort the elements of buckets
        qsort(buckets[i].value, buckets[i].count, sizeof(int), &compareIntegers);
        for (j = 0; j < buckets[i].count; j++)
        {
            array[k + j] = buckets[i].value[j];
        }
        k += buckets[i].count;
        free(buckets[i].value);
    }

}

int main(int argc, char *argv[])
{
    int array[1000000];
    int i = 0, j, k, n;
    int num;
    //for MPI
    int numProc, rank;
    char procName[MPI_MAX_PROCESSOR_NAME];
    int nameLen;
    int chunksize;
    double start, end;
    int msgtag;

    //MPI
    MPI_Status stat;
    start = MPI_Wtime();    //timer start
    MPI_Init(&argc, &argv);

    MPI_Comm_rank(MPI_COMM_WORLD, &rank); //process rank ,comm_world = communication of the process
    MPI_Comm_size(MPI_COMM_WORLD, &numProc); //number of process
    msgtag = 1234;

    if (rank == 0)
    {
        printf("Enter number of element to be sort: ");
        fflush(stdout);
        scanf("%d", &num);
        for (i = 0; i < num; i++) //random num elements
        {
            array[i] = rand();
        }
        n = num;
        printf("\nBefore Sorting\n");
        for (j = 0; j < n; j++)
        {
            printf("%d ", array[j]);
        }
        fflush(stdout);
    }
    MPI_Bcast(&n, 1, MPI_INT, 0, MPI_COMM_WORLD);
    if (rank == 0)
    {
        MPI_Send(&(array[0]), n, MPI_INT, 1, msgtag, MPI_COMM_WORLD);
    }
    if (rank == 1)
    {
        MPI_Recv(&(array[0]), n, MPI_INT, 0, msgtag, MPI_COMM_WORLD, &stat);
        bucketSort(array, n);
        MPI_Send(&(array[0]), n, MPI_INT, 2, msgtag, MPI_COMM_WORLD);
    }
    if (rank == 2)
    {
        MPI_Recv(&(array[0]), n, MPI_INT, 1, msgtag, MPI_COMM_WORLD, &stat);
    }
    MPI_Barrier(MPI_COMM_WORLD);
    if (rank == 2)
    {
        printf("\nAfter Sorting\n");
        for (k = 0; k < n; k++)
        {
            printf("%d ", array[k]);
        }
    }
    //MPI END
    MPI_Barrier(MPI_COMM_WORLD);
    MPI_Finalize();
    end = MPI_Wtime();  // timer end   
    double time_spent = end - start;
    printf("\ntime used for this program was %f Sec.", time_spent);

    return 0;
}

正在运行

mpirun -np 3 test_mpi.exe

输出

Enter number of element to be sort: 10

Before Sorting
1804289383 846930886 1681692777 1714636915 1957747793 424238335 719885386 1649760492 596516649 1189641421
After Sorting

424238335 596516649 719885386 846930886 1189641421 1649760492 1681692777 1714636915 1804289383 1957747793
time used for this program was 2.271976 Sec.time used for this program was 2.281183 Sec.
time used for this program was 2.277746 Sec.

关于c - MPI_ERR_RANK : invalid rank with cluster,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/52749059/

相关文章:

java - 打开 MPI 的 Java 绑定(bind)

c++ - 如何在 MPI 中使用共享的全局数据集?

无法让 execvp 执行文件

c - 在c中的函数中返回一个不同类型的值

C - 从 UDP 套接字缓冲区读取字节 (Linux)

C-MPI 发送创建的带有字符数组的 typedef 结构

c - MPI_Scatter() 错误

c++ - xterm 窗口无法在 Linux 上保持,它出现然后消失得非常快

python - 无法发送超过特定长度的 MPI 消息

php - 词典搜索