c++ - tbb矩阵乘法栈溢出错误c++

标签 c++ multithreading stack-overflow intel tbb

我正在尝试使用英特尔 tbb 中的任务进行矩阵乘法,我使用的算法是 Strassen 算法...

这是我的 main() 代码:

#include "Matrix.h"
#include "tbb/tick_count.h"

using namespace tbb;
using namespace std;

//Here is how I call MatTask class
Matrica callParallel(Matrx& A, Matrix& B, Matrix& C, int n){

    MatTask& t = *new (task::allocate_root ()) MatTask (A, B, &C, n);

    task::spawn_root_and_wait (t);

    return C;
}


int main(){
    int rows, columns;
    Matrix serialC;

    cout << "*******************\n" << "If rows and columns are < 6 you will enter the matric manualy\n" << "********************\n" <<endl;


    cout << "Enter rows for matrix A: ";
    cin >> rows;
    cout << "Enter columns for matrix A: ";
    cin >> columns;

    Matrix A(rows, columns);

    if(rows > 5 && columns > 5){
        A.createMatrixAutomatic();
    }else {
        A.createMatricManualy();
    }

    cout << "Enter rows for matrix B: ";
    cin >> rows;
    cout << "Enter columns for matrix B: ";
    cin >> columns;

    Matrix B(rows, columns);
    if(rows > 5 && columns > 5){
        B.createMatrixAutomatic();
    }else {
        B.createMatricManualy();
    }

    cout << "Matrix A: " << endl;
    A.printMatrix();
    cout << "Matrix B: " << endl;
    B.printMatrix();

    cout << "Matrix C: " << endl;

    tick_count start_time = tick_count::now();
    serialC.MultSerial(A, B);
    tick_count end_time = tick_count::now();

    cout << "\nTime for serial: " << (end_time - start_time).seconds() * 1000 << " ms" << endl; 

    serialC.printMatrix();

    //Creating matrix for result and calling the parallel algorithm
    Matrix parallelC(rows, columns);
    parallelC = callParallel(A, B, parallelC, rows);

    //This here prints the result matrix
    parallelC.printMatrix();

    system("PAUSE");
}

这是我的 Matrix.cpp 代码:

#include "Matrix.h"


Matrix::Matrix(){}

Matrix::Matrix(int rows, int columns){
    vr = rows;
    kol = columns;
}

void Matrix::createMatrixAutomatic(){
    for(int i = 0; i < vr; i++){
        for (int j = 0; j < kol; j++){
            int number = rand() % 5 + 1;
            matr[i][j] = number;
        }
    }
}

void Matrix::createMatricManualy(){
    cout << "Enter the elements: " << endl;
    for(int i = 0; i < vr; i++){
        for (int j = 0; j < kol; j++){
            cout << "Enter [" << i << "]" << "[" << j << "] element: ";
            cin >> matr[i][j];
        }
    }
}

void Matrix::printMatrix(){
    for (int i = 0; i < vr; i++){
        for (int j = 0; j < kol; j++){
            cout << matr[i][j] << "  ";
        }
        cout << endl << endl;
    }
}

void Matrix::MultSerial(Matrix& A, Matrix& B){
    for(int i = 0; i < A.vr; i++){
        for(int j = 0; j < B.kol; j++){
            matr[i][j] = 0;
            for(int k = 0; k < B.vr; k++){
                matr[i][j] += (A.matr[i][k] * B.matr[k][j]);
                vr = A.vr;
                kol = B.kol;
            }
        }
    }
}

void Matrix::substract(Matrix& A, Matrix& B, int dim){
    for (int i = 0; i < dim; i++) {
        for (int j = 0; j < dim; j++) {
            matr[i][j] = A.matr[i][j] - B.matr[i][j];
        }
    }  
}

void Matrix::Add(Matrix& A, Matrix& B, int dim){
    for (int i = 0; i < dim; i++) {
        for (int j = 0; j < dim; j++) {
            matr[i][j] = A.matr[i][j] + B.matr[i][j];
        }
    }
}

这是我的 MatTask 类和 Matrica.h 类

    #pragma once

    #include <iostream>
    #include <tbb/task.h>

    using namespace tbb;
    using namespace std;

    class Matrix{

public:


    int vr, kol;

    int matr[100][100];
    Matrix();
    Matrix(int rows, int columns);

    void createMatrixAutomatic();
    void createMatricManualy();
    void printMatrix();

    void MultSerial(Matrix&, Matrix&);

    void Add(Matrix& A, Matrix& B, int dim);

    void substract(Matrix& A, Matrix& B, int dim);
};

class MatTask: public task{
public:
    Matrix A, B;
    Matrix* C;
    int dimension;

    MatTask(Matrix& _A, Matrix& _B, Matrix* _C, int dim):
    A(_A), B(_B), C(_C), dimension(dim){}

    task* execute(){
        if(dimension == 1){
            C->MultSerial(A, B);
        } else {

            int newDimension = dimension/2;

            task_list list;

            int count = 1;
            Matrica a11(newDimension, newDimension), a12(newDimension, newDimension), a21(newDimension, newDimension), a22(newDimension, newDimension),
                    b11(newDimension, newDimension), b12(newDimension, newDimension), b21(newDimension, newDimension), b22(newDimension, newDimension),
                    *c11, *c12, *c21, *c22,
                    p1(newDimension, newDimension), *p2, *p3, *p4, *p5, *p6, *p7,
                    aResult(newDimension, newDimension), bResult(newDimension, newDimension);

            //Delimo matrice u 4 podmatrice
            for(int i = 0; i < newDimension; i++){
                for(int j = 0; j < newDimension; j++){
                    (a11).matr[i][j] = A.matr[i][j];
                    (a12).matr[i][j] = A.matr[i][j + newDimension];
                    (a21).matr[i][j] = A.matr[i + newDimension][j];
                    (a22).matr[i][j] = A.matr[i + newDimension][j + newDimension];

                    (b11).matr[i][j] = B.matr[i][j];
                    (b12).matr[i][j] = B.matr[i][j + newDimension];
                    (b21).matr[i][j] = B.matr[i + newDimension][j];
                    (b22).matr[i][j] = B.matr[i + newDimension][j + newDimension];
                }
            }

            //RACUNAMO p1...p7

            //p1 = (a11 + a22) * (b11 + b22)
            aResult.Add(a11, a22, newDimension); //a11 + a22
            bResult.Add(b11, b22, newDimension); //b11 + b22
            count++;
            //MatTask& a = *new( allocate_child() ) MatTask(aResult, bResult, &p1, newDimension); 
            //lista.push_back(a);
            lista.push_back(*new (allocate_child()) MatTask(aResult, bResult, &p1, newDimension));

            //p2 = (a21 + a22) * b11
            //aResult.Add(a21, a22, newDimension); //a21 + a22
            //count++;
            ////lista.push_back(*new (allocate_child()) MatTask(aResult, b11, p2, newDimension));

            ////p3 = a11 * (b12 - b22)
            //bResult.substract(b12, b22, newDimension); // b12 - b22
            //count++;
            ////lista.push_back(*new (allocate_child()) MatTask(a11, bResult, p3, newDimension));

            ////p4 = a22 * (b21 - b11)
            //bResult.substract(b21, b11, newDimension); // b21 - b11
            //count++;
            ////lista.push_back(*new (allocate_child()) MatTask(a22, bResult, p4, newDimension));

            ////p5 = (a11 + a12) * b22
            //aResult.Add(a11, a12, newDimension); // a11 + a12
            //count++;
            ////lista.push_back(*new (allocate_child()) MatTask(aResult, b22, p5, newDimension));

            ////p6 = (a21 - a11) * (b11 + b12)
            //bResult.Add(b11, b12, newDimension); //b11 + b12
            //aResult.substract(a21, a11, newDimension); //a21 - a11
            //count++;
            ////lista.push_back(*new (allocate_child()) MatTask(aResult, bResult, p6, newDimension));

            ////p7 = (a12 - a22) * (b21 + b22)
            //bResult.Add(b21, b22, newDimension); //b21 + b22
            //aResult.substract(a12, a22, newDimension); //a12 - a22
            //count++;
            ////lista.push_back(*new (allocate_child()) MatTask(aResult, bResult, p7, newDimension)); 

            set_ref_count(count);
            //spawn(a);
            spawn_and_wait_for_all(list);
            //spawn_and_wait_for_all(a);

            //RACUNAMO d11, d12, d21, d22

             //c11 = p1 + p4 - p5 + p7
            //aResult.Add(p1, p4, newDimension); // p1 + p4
            //bResult.Add(aResult, p7, newDimension); // p1 + p4 + p7

            //c11.oduzmi(bResult, p5, newDimension); // c11 = p1 + p4 + p7 - p5

            //// c12 = p3 + p5
            //c12.Add(p3, p5, newDimension);
            //
            //// c21 = p2 + p4
            //c21.Add(p2, p4, newDimension);

            //// c22 = p1 + p3 - p2 + p6
            //aResult.Add(p1, p3, newDimension); //p1 + p3
            //bResult.Add(aResult, p6, newDimension); //p1 + p3 + p6
            //c22.substract(bResult, p2, newDimension); // c22 = p1 + p3 + p6 - p2

            //Grouping the results obtained in a single matrix:
            //for (int i = 0; i < novaDimenzija ; i++) {
            //  for (int j = 0 ; j < novaDimenzija ; j++) {
            //      C.matr[i][j] = c11.matr[i][j];
            //      C.matr[i][j + newDimension] = c12.matr[i][j];
            //      C.matr[i + newDimension][j] = c21.matr[i][j];
            //      C.matr[i + newDimension][j + newDimension] = c22.matr[i][j];
            //  }
            //}
        }
        return NULL;
    }
};

如您所见,函数和类的名称不是英文的,但我认为这不会成为问题,因为代码非常简单。

我得到错误:

Unhandled exception at 0x01193787 in MnozenjeMatrica.exe: 0xC00000FD: Stack overflow.

我认为错误发生在 spawn_and_wait_for_all(lista) 行中,但我不确定。

能否请您看一下我的代码并帮助我解决问题。也许我没有正确调用函数,我真的不知道,请帮忙。谢谢

最佳答案

这是阻塞式并行加上大量使用矩阵堆栈导致堆栈溢出。因此,您的每个任务都为其数据保留一些堆栈,然后调用 spawn_root_and_wait_for_all,后者依次执行同一任务的另一个实例,递归地保持堆栈增长。

使用continuation-style programming并避免在堆栈上分配大量数据(如果可能,在任务内部 - 它会降低任务分配器的效率)。

关于c++ - tbb矩阵乘法栈溢出错误c++,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/30641874/

相关文章:

c++ - 在 QDialog 中创建线程并向 Qt 中的 QDialog 发出信号

c++ - 聚合初始化的 C++17 扩展是否使大括号初始化变得危险?

c# - 可观察对象的同步机制

go - 如何在golang中捕获堆栈溢出错误

c - Valgrind 报告排序算法分区函数的堆栈溢出 : can't figure out why

c++ - Jenkins 中的文件差异与 ClearCase

java - 如何让多个线程和类写入 Java 中的同一个日志文件?

C++ - 无法弄清楚如何使用互斥锁计算线程数

c - 返回 libc - 问题

c++ - boost 的 shared_ptr(shared_ptr<Y> const & r, T * p) 是做什么用的?