c++ - 如何提高多线程效率？

简要回顾:我有一个C++多线程数独求解器，我想提高效率，我需要你的帮助。

我在 C++ 的多线程中实现了一个强力数独求解器。主要结构是一棵树，所有逻辑是这样的:我开始读取输入中的初始矩阵，这将是我的根节点，然后我找到第一个空单元格，我找到所有可能适合该位置的数字，然后对于每种可能性，我都创建了父节点的子节点，这样每个节点都会为每个可能的数字创建一个子节点。树继续以这种方式增长，直到遇到一个解决方案，即一个节点没有更多的空闲单元格(因此它是满的)，并且节点网格满足数独的所有规则。

我尝试在多线程中像这样实现这个算法:我按顺序遵循与上面完全相同的逻辑，但每次都迈出一步，存储直到那一刻我遇到的所有 child (每个 child 都是一条路径，等等一棵树)在一个 vector 中。如果 child 少于用户选择的线程，那么我按顺序解决它们，然后再进行一步( child 会长大)。当我的 child 多于线程时，我会为每个线程拆分 child ，然后用他的部分(即树)启动每个线程。

现在，考虑到“蛮力”方法和“仅标准库”要求是强制性的，所以我不能用不同的方式来做，但我当然可以改变如何实现这个的逻辑.

问题是:如何提高这个程序的效率？欢迎所有仅使用标准库的建议。

#define UNASSIGNED 0
#define N 9
#define ERROR_PAIR std::make_pair(-1, -1)

using namespace std;

atomic<bool> solutionFound{false};

//Each node has a sudokuMatrix and some sub-trees
struct Node {
    vector<vector<int>> grid;
    vector<Node *> child;
};


Node *newNode(const vector<vector<int>> &newGrid) {
    Node *temp = new Node;
    temp->grid = newGrid;
    return temp;
}

//Check if a number can be inserted in a given position
bool canInsert(const int &val, const int &row_, const int &col_,
               const vector<vector<int>> &grid) {
    // Check column
    for (int row = 0; row < N; row++) {
        if (grid[row][col_] == val) return false;
    }
    // Check row
    for (int col = 0; col < N; col++) {
        if (grid[row_][col] == val) return false;
    }
    // check box
    for (int row = 0; row < N; row++) {
        for (int col = 0; col < N; col++) {
            if (row / 3 == row_ / 3 &&
                col / 3 == col_ / 3) {  // they are in the same square 3x3
                if ((grid[row][col] == val)) return false;
            }
        }
    }
    return true;
}

//Check if the sudoku is solved
bool isSafe(const vector<vector<int>> &grid) 
{
    // Hashmap for row column and boxes
    unordered_map<int, int> row_[9], column_[9], box[3][3];
    for (int row = 0; row < N; row++) {
        for (int col = 0; col < N; col++) {
            // mark the element in row column and box
            row_[row][grid[row][col]] += 1;
            column_[col][grid[row][col]] += 1;
            box[row / 3][col / 3][grid[row][col]] += 1;

            // if an element is already
            // present in the hashmap
            if (box[row / 3][col / 3][grid[row][col]] > 1 ||
                column_[col][grid[row][col]] > 1 ||
                row_[row][grid[row][col]] > 1)
                return false;
        }
    }
    return true;
}
//Find the first empty cell
pair<int, int> findCell(const vector<vector<int>> &grid) {
    for (int i = 0; i < N; i++) {
        for (int j = 0; j < N; j++) {
            if (grid[i][j] == UNASSIGNED) {
                return make_pair(i, j);
            }
        }
    }
    return ERROR_PAIR;
}

//Find all the numbers i can insert in a given position, and update the matrix with that number. Return
//the set of all the matrixes(one for each possibility).
list<vector<vector<int>>> getChoices(const int &row, const int &col,
                                     const vector<vector<int>> &grid) {
    list<vector<vector<int>>> choices;
    for (int i = 1; i < 10; i++) {
        if (canInsert(i, row, col, grid)) {
            // cout << "posso inserire =" << i << endl;
            vector<vector<int>> tmpGrid = grid;
            tmpGrid[row][col] = i;
            choices.push_back(tmpGrid);
        }
    }
    return choices;
}

//Update the childreen of a node.
void addChoices(list<vector<vector<int>>> &choices, Node &node) {
    while (!choices.empty()) {
        node.child.push_back(newNode(choices.front()));
        choices.pop_front();
    }
    return;
}

//Compute one step of computation for each node in input, and put all the childreen in the task vector.
void solveOneStep(vector<Node *> &nodes, const int &nw, vector<Node *> &tasks) {
    if (solutionFound) return;
    for (Node *&n : nodes) {
        if (findCell(n->grid) != ERROR_PAIR) {
            pair<int, int> freeCell = findCell(n->grid);
            list<vector<vector<int>>> choices =
                getChoices(freeCell.first, freeCell.second, n->grid);
            if (choices.empty()) {
                continue;
            }
            addChoices(choices, *n);
            for (Node *&n : n->child) {
                tasks.push_back(n);
            }
            continue;
        } else if (isSafe(n->grid)) {
            if (!solutionFound.load()) {
                solutionFound.store(true);
                printGrid(n->grid);
                cout << "That's the first solution found !" << endl;
            }
            return;
        }
    }
}

//Compute step by step the computation until you reach a level of the entire tree of nodes where
//the nodes of that level are more than the number of worker(NW) choose by the user. 
vector<Node *> splitChunks(Node *root, const int &nw) {
    vector<Node *> tasks;
    vector<Node *> nodes;
    nodes.push_back(root);

    while ((int)tasks.size() < nw && !solutionFound) {
        tasks.clear();
        solveOneStep(nodes, nw, tasks);
        nodes = tasks;
    }
    return tasks;
}

//Solve recursively all the sub-trees of all the nodes given in input, until a solution is found or no
//solution exist.
void solveSubTree(vector<Node *> &nodes, const int &nw,) {
    if (solutionFound) return;
    for (Node *&n : nodes) {
        if (findCell(n->grid) != ERROR_PAIR) {
            pair<int, int> freeCell = findCell(n->grid);
            list<vector<vector<int>>> choices =
                getChoices(freeCell.first, freeCell.second, n->grid);
            if (choices.empty()) {
                continue;
            }
            addChoices(choices, *n);
            solveSubTree(n->child, nw);
        } else if (isSafe(n->grid)) {
            if (!solutionFound.load()) {
                solutionFound.store(true);
                printGrid(n->grid);
                std::cout << "That's the first solution found !" << endl;
            }
            return;
        }
    }
}


int main(int argc, char *argv[]) {
    if (argc != 2) {
        cout << "Usage is: nw " << endl;
        return (-1);
    }
//A test matrix.
    vector<vector<int>> grid = 
                            { { 0, 1, 0, 0, 0, 0, 0, 0, 0 }, 
                              { 0, 0, 0, 0, 0, 0, 0, 0, 0 }, 
                              { 0, 8, 0, 0, 0, 0, 0, 0, 0 }, 
                              { 0, 0, 0, 0, 0, 0, 0, 0, 0 }, 
                              { 0, 0, 0, 0, 0, 0, 0, 0, 0 }, 
                              { 0, 0, 0, 0, 0, 0, 0, 0, 0 }, 
                              { 0, 0, 0, 0, 0, 0, 0, 0, 0 }, 
                              { 0, 0, 0, 0, 0, 0, 0, 0, 0 }, 
                              { 0, 0, 0, 0, 0, 0, 0, 0, 0 } };
    
    Node *root = newNode(grid);
    vector<thread> tids;
    const int nw = atoi(argv[1]); //Number of worker
    vector<vector<Node *>> works(nw, vector<Node *>()); 
    vector<Node *> tasks = splitChunks(root, nw);

//Split the tasks for each thread, the main thread takes the last part of the work.
    for (int i = 0; i < nw; i++) {
        int limit = 0;
        i == nw - 1 ? limit = tasks.size() : limit = tasks.size() / nw;
        for (int j = 0; j < limit; j++) {
            works[i].push_back(tasks.back());
            tasks.pop_back();
        }
    }

//Start each thread, and then the main thread start his computation.
    for (int i = 0; i < nw - 1; i++) {
        tids.push_back(thread(solveSubTree, ref(works[i]), ref(nw)));
    }
    solveSubTree(works[nw - 1], nw, t1);  // Main thread do last part of the work

    for (thread &t : tids) {
        t.join();
    }

    std::cout << "end" << endl;
    return (0);
}

最佳答案

这里有几点可以提高引用实现的效率:

使用 vector<vector<int>> for 2D array 远非高效:它在内存中不连续并导致许多缓慢的分配。应该首选大扁平阵列。
unordered_map<int, int>因为集合中的整数在一个小的(连续的)范围内，所以不需要类似集合的操作。使用简单数组要快得多。
有些拷贝是不需要的，可以用std::move删除。 .
由于网格中的整数很小，char可用于 int (减少内存占用，将数据保存在 CPU 缓存中，并尽可能加快分配速度)。
我看到一个new但没有delete在代码中...
线程之间的工作在许多情况下似乎明显不平衡，导致较慢的并行执行，应该执行负载平衡以更好地扩展。一种方法是使用任务安排。
可以使用启发式方法来大大加快探索速度。首先，我建议您查看 constraint satisfaction problem (CSP)，因为众所周知，CSP 求解器非常擅长求解它。更多一般和理论信息可以在书中找到 Artificial Intelligence: a modern approach .

这是应用第一条注释的代码，在我的机器上执行速度提高了 5 倍(请注意网格已在 main 中修改):

#define UNASSIGNED 0
#define N 9
#define ERROR_PAIR std::make_pair(-1, -1)

using namespace std;

void printGrid(const array<char, N*N>& grid)
{
    for (int row = 0; row < N; row++)
    {
        for (int col = 0; col < N; col++)
        {
            cout << (int)grid[row*N+col] << " ";
        }
        cout << endl;
    }
}

atomic<bool> solutionFound{false};

//Each node has a sudokuMatrix and some sub-trees
struct Node {
    array<char, N*N> grid;
    vector<Node *> child;
};


Node *newNode(const array<char, N*N> &newGrid) {
    Node *temp = new Node;
    temp->grid = newGrid;
    return temp;
}

//Check if a number can be inserted in a given position
bool canInsert(const int &val, const int &row_, const int &col_,
               const array<char, N*N> &grid) {
    // Check column
    for (int row = 0; row < N; row++) {
        if (grid[row*N+col_] == val) return false;
    }
    // Check row
    for (int col = 0; col < N; col++) {
        if (grid[row_*N+col] == val) return false;
    }
    // check box
    for (int row = 0; row < N; row++) {
        for (int col = 0; col < N; col++) {
            if (row / 3 == row_ / 3 &&
                col / 3 == col_ / 3) {  // they are in the same square 3x3
                if ((grid[row*N+col] == val)) return false;
            }
        }
    }
    return true;
}

//Check if the sudoku is solved
bool isSafe(const array<char, N*N> &grid) 
{
    // No need for a hashmap for row column and boxes, 
    // just an array since associated values are small integer
    char row_[9][N+1] = {0};
    char column_[9][N+1] = {0};
    char box[3][3][N+1] = {0};

    for (int row = 0; row < N; row++) {
        for (int col = 0; col < N; col++) {
            // mark the element in row column and box
            row_[row][grid[row*N+col]] += 1;
            column_[col][grid[row*N+col]] += 1;
            box[row / 3][col / 3][grid[row*N+col]] += 1;

            // if an element is already
            // present in the hashmap
            if (box[row / 3][col / 3][grid[row*N+col]] > 1 ||
                column_[col][grid[row*N+col]] > 1 ||
                row_[row][grid[row*N+col]] > 1)
                return false;
        }
    }
    return true;
}
//Find the first empty cell
pair<int, int> findCell(const array<char, N*N> &grid) {
    for (int i = 0; i < N; i++) {
        for (int j = 0; j < N; j++) {
            if (grid[i*N+j] == UNASSIGNED) {
                return make_pair(i, j);
            }
        }
    }
    return ERROR_PAIR;
}

//Find all the numbers i can insert in a given position, and update the matrix with that number. Return
//the set of all the matrixes(one for each possibility).
list<array<char, N*N>> getChoices(const int &row, const int &col,
                                     const array<char, N*N> &grid) {
    list<array<char, N*N>> choices;
    for (int i = 1; i < 10; i++) {
        if (canInsert(i, row, col, grid)) {
            // cout << "posso inserire =" << i << endl;
            array<char, N*N> tmpGrid = grid;
            tmpGrid[row*N+col] = i;
            choices.push_back(std::move(tmpGrid));
        }
    }
    return choices;
}

//Update the childreen of a node.
void addChoices(list<array<char, N*N>> &choices, Node &node) {
    while (!choices.empty()) {
        node.child.push_back(newNode(choices.front()));
        choices.pop_front();
    }
    return;
}

//Compute one step of computation for each node in input, and put all the childreen in the task vector.
void solveOneStep(vector<Node *> &nodes, const int &nw, vector<Node *> &tasks) {
    if (solutionFound) return;
    for (Node *&n : nodes) {
        if (findCell(n->grid) != ERROR_PAIR) {
            pair<int, int> freeCell = findCell(n->grid);
            list<array<char, N*N>> choices =
                getChoices(freeCell.first, freeCell.second, n->grid);
            if (choices.empty()) {
                continue;
            }
            addChoices(choices, *n);
            for (Node *&n : n->child) {
                tasks.push_back(n);
            }
            continue;
        } else if (isSafe(n->grid)) {
            if (!solutionFound.load()) {
                solutionFound.store(true);
                printGrid(n->grid);
                cout << "That's the first solution found !" << endl;
            }
            return;
        }
    }
}

//Compute step by step the computation until you reach a level of the entire tree of nodes where
//the nodes of that level are more than the number of worker(NW) choose by the user. 
vector<Node *> splitChunks(Node *root, const int &nw) {
    vector<Node *> tasks;
    vector<Node *> nodes;
    nodes.push_back(root);

    while ((int)tasks.size() < nw && !solutionFound) {
        tasks.clear();
        solveOneStep(nodes, nw, tasks);
        nodes = tasks;
    }
    return tasks;
}

//Solve recursively all the sub-trees of all the nodes given in input, until a solution is found or no
//solution exist.
void solveSubTree(vector<Node *> &nodes, const int &nw) {
    if (solutionFound) return;
    for (Node *&n : nodes) {
        if (findCell(n->grid) != ERROR_PAIR) {
            pair<int, int> freeCell = findCell(n->grid);
            list<array<char, N*N>> choices =
                getChoices(freeCell.first, freeCell.second, n->grid);
            if (choices.empty()) {
                continue;
            }
            addChoices(choices, *n);
            solveSubTree(n->child, nw);
        } else if (isSafe(n->grid)) {
            if (!solutionFound.load()) {
                solutionFound.store(true);
                printGrid(n->grid);
                std::cout << "That's the first solution found !" << endl;
            }
            return;
        }
    }
}


int main(int argc, char *argv[]) {
    if (argc != 2) {
        cout << "Usage is: nw " << endl;
        return (-1);
    }
//A test matrix.
    array<char, N*N> grid = 
                            { 0, 0, 0, 0, 0, 0, 2, 0, 0,
                              0, 8, 0, 0, 0, 7, 0, 9, 0,
                              6, 0, 2, 0, 0, 0, 5, 0, 0,
                              0, 7, 0, 0, 6, 0, 0, 0, 0,
                              0, 0, 0, 9, 0, 1, 0, 0, 0,
                              0, 0, 0, 0, 2, 0, 0, 4, 0,
                              0, 0, 5, 0, 0, 0, 6, 0, 3,
                              0, 9, 0, 4, 0, 0, 0, 7, 0,
                              0, 0, 6, 0, 0, 0, 0, 0, 0 };
    
    Node *root = newNode(grid);
    vector<thread> tids;
    const int nw = atoi(argv[1]); //Number of worker
    vector<vector<Node *>> works(nw, vector<Node *>()); 
    vector<Node *> tasks = splitChunks(root, nw);

//Split the tasks for each thread, the main thread takes the last part of the work.
    for (int i = 0; i < nw; i++) {
        int limit = 0;
        i == nw - 1 ? limit = tasks.size() : limit = tasks.size() / nw;
        for (int j = 0; j < limit; j++) {
            works[i].push_back(tasks.back());
            tasks.pop_back();
        }
    }

//Start each thread, and then the main thread start his computation.
    for (int i = 0; i < nw - 1; i++) {
        tids.push_back(thread(solveSubTree, ref(works[i]), ref(nw)));
    }
    solveSubTree(works[nw - 1], nw);  // Main thread do last part of the work

    for (thread &t : tids) {
        t.join();
    }

    std::cout << "end" << endl;
    return (0);
}

关于c++ - 如何提高多线程效率？，我们在Stack Overflow上找到一个类似的问题： https://stackoverflow.com/questions/62967304/

c++ - 如何提高多线程效率？

上一篇：c++ - 如何将非静态成员函数用于模板参数？

下一篇：c++ - 访问未初始化的对象。这怎么可能？