c++ - 如何提高多线程效率?

标签 c++ multithreading performance

简要回顾:我有一个C++多线程数独求解器,我想提高效率,我需要你的帮助。

我在 C++ 的多线程中实现了一个强力数独求解器。主要结构是一棵树,所有逻辑是这样的:我开始读取输入中的初始矩阵,这将是我的根节点,然后我找到第一个空单元格,我找到所有可能适合该位置的数字,然后对于每种可能性,我都创建了父节点的子节点,这样每个节点都会为每个可能的数字创建一个子节点。树继续以这种方式增长,直到遇到一个解决方案,即一个节点没有更多的空闲单元格(因此它是满的),并且节点网格满足数独的所有规则。

我尝试在多线程中像这样实现这个算法:我按顺序遵循与上面完全相同的逻辑,但每次都迈出一步,存储直到那一刻我遇到的所有 child (每个 child 都是一条路径,等等一棵树)在一个 vector 中。如果 child 少于用户选择的线程,那么我按顺序解决它们,然后再进行一步( child 会长大)。当我的 child 多于线程时,我会为每个线程拆分 child ,然后用他的部分(即树)启动每个线程。

现在,考虑到“蛮力”方法和“仅标准库”要求是强制性的,所以我不能用不同的方式来做,但我当然可以改变如何实现这个的逻辑.

问题是:如何提高这个程序的效率?欢迎所有仅使用标准库的建议。

#define UNASSIGNED 0
#define N 9
#define ERROR_PAIR std::make_pair(-1, -1)

using namespace std;

atomic<bool> solutionFound{false};

//Each node has a sudokuMatrix and some sub-trees
struct Node {
    vector<vector<int>> grid;
    vector<Node *> child;
};


Node *newNode(const vector<vector<int>> &newGrid) {
    Node *temp = new Node;
    temp->grid = newGrid;
    return temp;
}

//Check if a number can be inserted in a given position
bool canInsert(const int &val, const int &row_, const int &col_,
               const vector<vector<int>> &grid) {
    // Check column
    for (int row = 0; row < N; row++) {
        if (grid[row][col_] == val) return false;
    }
    // Check row
    for (int col = 0; col < N; col++) {
        if (grid[row_][col] == val) return false;
    }
    // check box
    for (int row = 0; row < N; row++) {
        for (int col = 0; col < N; col++) {
            if (row / 3 == row_ / 3 &&
                col / 3 == col_ / 3) {  // they are in the same square 3x3
                if ((grid[row][col] == val)) return false;
            }
        }
    }
    return true;
}

//Check if the sudoku is solved
bool isSafe(const vector<vector<int>> &grid) 
{
    // Hashmap for row column and boxes
    unordered_map<int, int> row_[9], column_[9], box[3][3];
    for (int row = 0; row < N; row++) {
        for (int col = 0; col < N; col++) {
            // mark the element in row column and box
            row_[row][grid[row][col]] += 1;
            column_[col][grid[row][col]] += 1;
            box[row / 3][col / 3][grid[row][col]] += 1;

            // if an element is already
            // present in the hashmap
            if (box[row / 3][col / 3][grid[row][col]] > 1 ||
                column_[col][grid[row][col]] > 1 ||
                row_[row][grid[row][col]] > 1)
                return false;
        }
    }
    return true;
}
//Find the first empty cell
pair<int, int> findCell(const vector<vector<int>> &grid) {
    for (int i = 0; i < N; i++) {
        for (int j = 0; j < N; j++) {
            if (grid[i][j] == UNASSIGNED) {
                return make_pair(i, j);
            }
        }
    }
    return ERROR_PAIR;
}

//Find all the numbers i can insert in a given position, and update the matrix with that number. Return
//the set of all the matrixes(one for each possibility).
list<vector<vector<int>>> getChoices(const int &row, const int &col,
                                     const vector<vector<int>> &grid) {
    list<vector<vector<int>>> choices;
    for (int i = 1; i < 10; i++) {
        if (canInsert(i, row, col, grid)) {
            // cout << "posso inserire =" << i << endl;
            vector<vector<int>> tmpGrid = grid;
            tmpGrid[row][col] = i;
            choices.push_back(tmpGrid);
        }
    }
    return choices;
}

//Update the childreen of a node.
void addChoices(list<vector<vector<int>>> &choices, Node &node) {
    while (!choices.empty()) {
        node.child.push_back(newNode(choices.front()));
        choices.pop_front();
    }
    return;
}

//Compute one step of computation for each node in input, and put all the childreen in the task vector.
void solveOneStep(vector<Node *> &nodes, const int &nw, vector<Node *> &tasks) {
    if (solutionFound) return;
    for (Node *&n : nodes) {
        if (findCell(n->grid) != ERROR_PAIR) {
            pair<int, int> freeCell = findCell(n->grid);
            list<vector<vector<int>>> choices =
                getChoices(freeCell.first, freeCell.second, n->grid);
            if (choices.empty()) {
                continue;
            }
            addChoices(choices, *n);
            for (Node *&n : n->child) {
                tasks.push_back(n);
            }
            continue;
        } else if (isSafe(n->grid)) {
            if (!solutionFound.load()) {
                solutionFound.store(true);
                printGrid(n->grid);
                cout << "That's the first solution found !" << endl;
            }
            return;
        }
    }
}

//Compute step by step the computation until you reach a level of the entire tree of nodes where
//the nodes of that level are more than the number of worker(NW) choose by the user. 
vector<Node *> splitChunks(Node *root, const int &nw) {
    vector<Node *> tasks;
    vector<Node *> nodes;
    nodes.push_back(root);

    while ((int)tasks.size() < nw && !solutionFound) {
        tasks.clear();
        solveOneStep(nodes, nw, tasks);
        nodes = tasks;
    }
    return tasks;
}

//Solve recursively all the sub-trees of all the nodes given in input, until a solution is found or no
//solution exist.
void solveSubTree(vector<Node *> &nodes, const int &nw,) {
    if (solutionFound) return;
    for (Node *&n : nodes) {
        if (findCell(n->grid) != ERROR_PAIR) {
            pair<int, int> freeCell = findCell(n->grid);
            list<vector<vector<int>>> choices =
                getChoices(freeCell.first, freeCell.second, n->grid);
            if (choices.empty()) {
                continue;
            }
            addChoices(choices, *n);
            solveSubTree(n->child, nw);
        } else if (isSafe(n->grid)) {
            if (!solutionFound.load()) {
                solutionFound.store(true);
                printGrid(n->grid);
                std::cout << "That's the first solution found !" << endl;
            }
            return;
        }
    }
}


int main(int argc, char *argv[]) {
    if (argc != 2) {
        cout << "Usage is: nw " << endl;
        return (-1);
    }
//A test matrix.
    vector<vector<int>> grid = 
                            { { 0, 1, 0, 0, 0, 0, 0, 0, 0 }, 
                              { 0, 0, 0, 0, 0, 0, 0, 0, 0 }, 
                              { 0, 8, 0, 0, 0, 0, 0, 0, 0 }, 
                              { 0, 0, 0, 0, 0, 0, 0, 0, 0 }, 
                              { 0, 0, 0, 0, 0, 0, 0, 0, 0 }, 
                              { 0, 0, 0, 0, 0, 0, 0, 0, 0 }, 
                              { 0, 0, 0, 0, 0, 0, 0, 0, 0 }, 
                              { 0, 0, 0, 0, 0, 0, 0, 0, 0 }, 
                              { 0, 0, 0, 0, 0, 0, 0, 0, 0 } };
    
    Node *root = newNode(grid);
    vector<thread> tids;
    const int nw = atoi(argv[1]); //Number of worker
    vector<vector<Node *>> works(nw, vector<Node *>()); 
    vector<Node *> tasks = splitChunks(root, nw);

//Split the tasks for each thread, the main thread takes the last part of the work.
    for (int i = 0; i < nw; i++) {
        int limit = 0;
        i == nw - 1 ? limit = tasks.size() : limit = tasks.size() / nw;
        for (int j = 0; j < limit; j++) {
            works[i].push_back(tasks.back());
            tasks.pop_back();
        }
    }

//Start each thread, and then the main thread start his computation.
    for (int i = 0; i < nw - 1; i++) {
        tids.push_back(thread(solveSubTree, ref(works[i]), ref(nw)));
    }
    solveSubTree(works[nw - 1], nw, t1);  // Main thread do last part of the work

    for (thread &t : tids) {
        t.join();
    }

    std::cout << "end" << endl;
    return (0);
}

最佳答案

这里有几点可以提高引用实现的效率:

  • 使用 vector<vector<int>> for 2D array 远非高效:它在内存中不连续并导致许多缓慢的分配。应该首选大扁平阵列。
  • unordered_map<int, int>因为集合中的整数在一个小的(连续的)范围内,所以不需要类似集合的操作。使用简单数组要快得多。
  • 有些拷贝是不需要的,可以用std::move删除。 .
  • 由于网格中的整数很小,char可用于 int (减少内存占用,将数据保存在 CPU 缓存中,并尽可能加快分配速度)。
  • 我看到一个new但没有delete在代码中...
  • 线程之间的工作在许多情况下似乎明显不平衡,导致较慢的并行执行,应该执行负载平衡以更好地扩展。一种方法是使用任务安排
  • 可以使用启发式方法来大大加快探索速度。首先,我建议您查看 constraint satisfaction problem (CSP),因为众所周知,CSP 求解器非常擅长求解它。更多一般和理论信息可以在书中找到 Artificial Intelligence: a modern approach .

这是应用第一条注释的代码,在我的机器上执行速度提高了 5 倍(请注意网格已在 main 中修改):

#define UNASSIGNED 0
#define N 9
#define ERROR_PAIR std::make_pair(-1, -1)

using namespace std;

void printGrid(const array<char, N*N>& grid)
{
    for (int row = 0; row < N; row++)
    {
        for (int col = 0; col < N; col++)
        {
            cout << (int)grid[row*N+col] << " ";
        }
        cout << endl;
    }
}

atomic<bool> solutionFound{false};

//Each node has a sudokuMatrix and some sub-trees
struct Node {
    array<char, N*N> grid;
    vector<Node *> child;
};


Node *newNode(const array<char, N*N> &newGrid) {
    Node *temp = new Node;
    temp->grid = newGrid;
    return temp;
}

//Check if a number can be inserted in a given position
bool canInsert(const int &val, const int &row_, const int &col_,
               const array<char, N*N> &grid) {
    // Check column
    for (int row = 0; row < N; row++) {
        if (grid[row*N+col_] == val) return false;
    }
    // Check row
    for (int col = 0; col < N; col++) {
        if (grid[row_*N+col] == val) return false;
    }
    // check box
    for (int row = 0; row < N; row++) {
        for (int col = 0; col < N; col++) {
            if (row / 3 == row_ / 3 &&
                col / 3 == col_ / 3) {  // they are in the same square 3x3
                if ((grid[row*N+col] == val)) return false;
            }
        }
    }
    return true;
}

//Check if the sudoku is solved
bool isSafe(const array<char, N*N> &grid) 
{
    // No need for a hashmap for row column and boxes, 
    // just an array since associated values are small integer
    char row_[9][N+1] = {0};
    char column_[9][N+1] = {0};
    char box[3][3][N+1] = {0};

    for (int row = 0; row < N; row++) {
        for (int col = 0; col < N; col++) {
            // mark the element in row column and box
            row_[row][grid[row*N+col]] += 1;
            column_[col][grid[row*N+col]] += 1;
            box[row / 3][col / 3][grid[row*N+col]] += 1;

            // if an element is already
            // present in the hashmap
            if (box[row / 3][col / 3][grid[row*N+col]] > 1 ||
                column_[col][grid[row*N+col]] > 1 ||
                row_[row][grid[row*N+col]] > 1)
                return false;
        }
    }
    return true;
}
//Find the first empty cell
pair<int, int> findCell(const array<char, N*N> &grid) {
    for (int i = 0; i < N; i++) {
        for (int j = 0; j < N; j++) {
            if (grid[i*N+j] == UNASSIGNED) {
                return make_pair(i, j);
            }
        }
    }
    return ERROR_PAIR;
}

//Find all the numbers i can insert in a given position, and update the matrix with that number. Return
//the set of all the matrixes(one for each possibility).
list<array<char, N*N>> getChoices(const int &row, const int &col,
                                     const array<char, N*N> &grid) {
    list<array<char, N*N>> choices;
    for (int i = 1; i < 10; i++) {
        if (canInsert(i, row, col, grid)) {
            // cout << "posso inserire =" << i << endl;
            array<char, N*N> tmpGrid = grid;
            tmpGrid[row*N+col] = i;
            choices.push_back(std::move(tmpGrid));
        }
    }
    return choices;
}

//Update the childreen of a node.
void addChoices(list<array<char, N*N>> &choices, Node &node) {
    while (!choices.empty()) {
        node.child.push_back(newNode(choices.front()));
        choices.pop_front();
    }
    return;
}

//Compute one step of computation for each node in input, and put all the childreen in the task vector.
void solveOneStep(vector<Node *> &nodes, const int &nw, vector<Node *> &tasks) {
    if (solutionFound) return;
    for (Node *&n : nodes) {
        if (findCell(n->grid) != ERROR_PAIR) {
            pair<int, int> freeCell = findCell(n->grid);
            list<array<char, N*N>> choices =
                getChoices(freeCell.first, freeCell.second, n->grid);
            if (choices.empty()) {
                continue;
            }
            addChoices(choices, *n);
            for (Node *&n : n->child) {
                tasks.push_back(n);
            }
            continue;
        } else if (isSafe(n->grid)) {
            if (!solutionFound.load()) {
                solutionFound.store(true);
                printGrid(n->grid);
                cout << "That's the first solution found !" << endl;
            }
            return;
        }
    }
}

//Compute step by step the computation until you reach a level of the entire tree of nodes where
//the nodes of that level are more than the number of worker(NW) choose by the user. 
vector<Node *> splitChunks(Node *root, const int &nw) {
    vector<Node *> tasks;
    vector<Node *> nodes;
    nodes.push_back(root);

    while ((int)tasks.size() < nw && !solutionFound) {
        tasks.clear();
        solveOneStep(nodes, nw, tasks);
        nodes = tasks;
    }
    return tasks;
}

//Solve recursively all the sub-trees of all the nodes given in input, until a solution is found or no
//solution exist.
void solveSubTree(vector<Node *> &nodes, const int &nw) {
    if (solutionFound) return;
    for (Node *&n : nodes) {
        if (findCell(n->grid) != ERROR_PAIR) {
            pair<int, int> freeCell = findCell(n->grid);
            list<array<char, N*N>> choices =
                getChoices(freeCell.first, freeCell.second, n->grid);
            if (choices.empty()) {
                continue;
            }
            addChoices(choices, *n);
            solveSubTree(n->child, nw);
        } else if (isSafe(n->grid)) {
            if (!solutionFound.load()) {
                solutionFound.store(true);
                printGrid(n->grid);
                std::cout << "That's the first solution found !" << endl;
            }
            return;
        }
    }
}


int main(int argc, char *argv[]) {
    if (argc != 2) {
        cout << "Usage is: nw " << endl;
        return (-1);
    }
//A test matrix.
    array<char, N*N> grid = 
                            { 0, 0, 0, 0, 0, 0, 2, 0, 0,
                              0, 8, 0, 0, 0, 7, 0, 9, 0,
                              6, 0, 2, 0, 0, 0, 5, 0, 0,
                              0, 7, 0, 0, 6, 0, 0, 0, 0,
                              0, 0, 0, 9, 0, 1, 0, 0, 0,
                              0, 0, 0, 0, 2, 0, 0, 4, 0,
                              0, 0, 5, 0, 0, 0, 6, 0, 3,
                              0, 9, 0, 4, 0, 0, 0, 7, 0,
                              0, 0, 6, 0, 0, 0, 0, 0, 0 };
    
    Node *root = newNode(grid);
    vector<thread> tids;
    const int nw = atoi(argv[1]); //Number of worker
    vector<vector<Node *>> works(nw, vector<Node *>()); 
    vector<Node *> tasks = splitChunks(root, nw);

//Split the tasks for each thread, the main thread takes the last part of the work.
    for (int i = 0; i < nw; i++) {
        int limit = 0;
        i == nw - 1 ? limit = tasks.size() : limit = tasks.size() / nw;
        for (int j = 0; j < limit; j++) {
            works[i].push_back(tasks.back());
            tasks.pop_back();
        }
    }

//Start each thread, and then the main thread start his computation.
    for (int i = 0; i < nw - 1; i++) {
        tids.push_back(thread(solveSubTree, ref(works[i]), ref(nw)));
    }
    solveSubTree(works[nw - 1], nw);  // Main thread do last part of the work

    for (thread &t : tids) {
        t.join();
    }

    std::cout << "end" << endl;
    return (0);
}

关于c++ - 如何提高多线程效率?,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/62967304/

相关文章:

iphone - 为什么 UIKit 操作必须在主线程上执行?

javascript - 如何让处理器密集型功能更新节拍器的每个滴答声并使用网络 worker 绘制到 Canvas 上?

c++ - 用于空间和性能提升的位对齐

sql - 结果集比较实用程序

c++ - 在编译时验证 std::initializer_list 的内容

c++ - QWebSocket Hello World示例

c++ - Gaffer游戏时间步长:std::chrono实现

c++ - 使用条件运算符递归计算模板值或函数时出现错误 C1202(堆栈溢出)

java - 在eclipse中调试多个线程

mysql - 使用大量连接优化慢速 MySQL 查询