简要回顾:我有一个C++多线程数独求解器,我想提高效率,我需要你的帮助。
我在 C++ 的多线程中实现了一个强力数独求解器。主要结构是一棵树,所有逻辑是这样的:我开始读取输入中的初始矩阵,这将是我的根节点,然后我找到第一个空单元格,我找到所有可能适合该位置的数字,然后对于每种可能性,我都创建了父节点的子节点,这样每个节点都会为每个可能的数字创建一个子节点。树继续以这种方式增长,直到遇到一个解决方案,即一个节点没有更多的空闲单元格(因此它是满的),并且节点网格满足数独的所有规则。
我尝试在多线程中像这样实现这个算法:我按顺序遵循与上面完全相同的逻辑,但每次都迈出一步,存储直到那一刻我遇到的所有 child (每个 child 都是一条路径,等等一棵树)在一个 vector 中。如果 child 少于用户选择的线程,那么我按顺序解决它们,然后再进行一步( child 会长大)。当我的 child 多于线程时,我会为每个线程拆分 child ,然后用他的部分(即树)启动每个线程。
现在,考虑到“蛮力”方法和“仅标准库”要求是强制性的,所以我不能用不同的方式来做,但我当然可以改变如何实现这个的逻辑.
问题是:如何提高这个程序的效率?欢迎所有仅使用标准库的建议。
#define UNASSIGNED 0
#define N 9
#define ERROR_PAIR std::make_pair(-1, -1)
using namespace std;
atomic<bool> solutionFound{false};
//Each node has a sudokuMatrix and some sub-trees
struct Node {
vector<vector<int>> grid;
vector<Node *> child;
};
Node *newNode(const vector<vector<int>> &newGrid) {
Node *temp = new Node;
temp->grid = newGrid;
return temp;
}
//Check if a number can be inserted in a given position
bool canInsert(const int &val, const int &row_, const int &col_,
const vector<vector<int>> &grid) {
// Check column
for (int row = 0; row < N; row++) {
if (grid[row][col_] == val) return false;
}
// Check row
for (int col = 0; col < N; col++) {
if (grid[row_][col] == val) return false;
}
// check box
for (int row = 0; row < N; row++) {
for (int col = 0; col < N; col++) {
if (row / 3 == row_ / 3 &&
col / 3 == col_ / 3) { // they are in the same square 3x3
if ((grid[row][col] == val)) return false;
}
}
}
return true;
}
//Check if the sudoku is solved
bool isSafe(const vector<vector<int>> &grid)
{
// Hashmap for row column and boxes
unordered_map<int, int> row_[9], column_[9], box[3][3];
for (int row = 0; row < N; row++) {
for (int col = 0; col < N; col++) {
// mark the element in row column and box
row_[row][grid[row][col]] += 1;
column_[col][grid[row][col]] += 1;
box[row / 3][col / 3][grid[row][col]] += 1;
// if an element is already
// present in the hashmap
if (box[row / 3][col / 3][grid[row][col]] > 1 ||
column_[col][grid[row][col]] > 1 ||
row_[row][grid[row][col]] > 1)
return false;
}
}
return true;
}
//Find the first empty cell
pair<int, int> findCell(const vector<vector<int>> &grid) {
for (int i = 0; i < N; i++) {
for (int j = 0; j < N; j++) {
if (grid[i][j] == UNASSIGNED) {
return make_pair(i, j);
}
}
}
return ERROR_PAIR;
}
//Find all the numbers i can insert in a given position, and update the matrix with that number. Return
//the set of all the matrixes(one for each possibility).
list<vector<vector<int>>> getChoices(const int &row, const int &col,
const vector<vector<int>> &grid) {
list<vector<vector<int>>> choices;
for (int i = 1; i < 10; i++) {
if (canInsert(i, row, col, grid)) {
// cout << "posso inserire =" << i << endl;
vector<vector<int>> tmpGrid = grid;
tmpGrid[row][col] = i;
choices.push_back(tmpGrid);
}
}
return choices;
}
//Update the childreen of a node.
void addChoices(list<vector<vector<int>>> &choices, Node &node) {
while (!choices.empty()) {
node.child.push_back(newNode(choices.front()));
choices.pop_front();
}
return;
}
//Compute one step of computation for each node in input, and put all the childreen in the task vector.
void solveOneStep(vector<Node *> &nodes, const int &nw, vector<Node *> &tasks) {
if (solutionFound) return;
for (Node *&n : nodes) {
if (findCell(n->grid) != ERROR_PAIR) {
pair<int, int> freeCell = findCell(n->grid);
list<vector<vector<int>>> choices =
getChoices(freeCell.first, freeCell.second, n->grid);
if (choices.empty()) {
continue;
}
addChoices(choices, *n);
for (Node *&n : n->child) {
tasks.push_back(n);
}
continue;
} else if (isSafe(n->grid)) {
if (!solutionFound.load()) {
solutionFound.store(true);
printGrid(n->grid);
cout << "That's the first solution found !" << endl;
}
return;
}
}
}
//Compute step by step the computation until you reach a level of the entire tree of nodes where
//the nodes of that level are more than the number of worker(NW) choose by the user.
vector<Node *> splitChunks(Node *root, const int &nw) {
vector<Node *> tasks;
vector<Node *> nodes;
nodes.push_back(root);
while ((int)tasks.size() < nw && !solutionFound) {
tasks.clear();
solveOneStep(nodes, nw, tasks);
nodes = tasks;
}
return tasks;
}
//Solve recursively all the sub-trees of all the nodes given in input, until a solution is found or no
//solution exist.
void solveSubTree(vector<Node *> &nodes, const int &nw,) {
if (solutionFound) return;
for (Node *&n : nodes) {
if (findCell(n->grid) != ERROR_PAIR) {
pair<int, int> freeCell = findCell(n->grid);
list<vector<vector<int>>> choices =
getChoices(freeCell.first, freeCell.second, n->grid);
if (choices.empty()) {
continue;
}
addChoices(choices, *n);
solveSubTree(n->child, nw);
} else if (isSafe(n->grid)) {
if (!solutionFound.load()) {
solutionFound.store(true);
printGrid(n->grid);
std::cout << "That's the first solution found !" << endl;
}
return;
}
}
}
int main(int argc, char *argv[]) {
if (argc != 2) {
cout << "Usage is: nw " << endl;
return (-1);
}
//A test matrix.
vector<vector<int>> grid =
{ { 0, 1, 0, 0, 0, 0, 0, 0, 0 },
{ 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 0, 8, 0, 0, 0, 0, 0, 0, 0 },
{ 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 0, 0, 0, 0, 0, 0, 0, 0, 0 } };
Node *root = newNode(grid);
vector<thread> tids;
const int nw = atoi(argv[1]); //Number of worker
vector<vector<Node *>> works(nw, vector<Node *>());
vector<Node *> tasks = splitChunks(root, nw);
//Split the tasks for each thread, the main thread takes the last part of the work.
for (int i = 0; i < nw; i++) {
int limit = 0;
i == nw - 1 ? limit = tasks.size() : limit = tasks.size() / nw;
for (int j = 0; j < limit; j++) {
works[i].push_back(tasks.back());
tasks.pop_back();
}
}
//Start each thread, and then the main thread start his computation.
for (int i = 0; i < nw - 1; i++) {
tids.push_back(thread(solveSubTree, ref(works[i]), ref(nw)));
}
solveSubTree(works[nw - 1], nw, t1); // Main thread do last part of the work
for (thread &t : tids) {
t.join();
}
std::cout << "end" << endl;
return (0);
}
最佳答案
这里有几点可以提高引用实现的效率:
- 使用
vector<vector<int>>
for 2D array 远非高效:它在内存中不连续并导致许多缓慢的分配。应该首选大扁平阵列。 -
unordered_map<int, int>
因为集合中的整数在一个小的(连续的)范围内,所以不需要类似集合的操作。使用简单数组要快得多。 - 有些拷贝是不需要的,可以用
std::move
删除。 . - 由于网格中的整数很小,
char
可用于int
(减少内存占用,将数据保存在 CPU 缓存中,并尽可能加快分配速度)。 - 我看到一个
new
但没有delete
在代码中... - 线程之间的工作在许多情况下似乎明显不平衡,导致较慢的并行执行,应该执行负载平衡以更好地扩展。一种方法是使用任务安排。
- 可以使用启发式方法来大大加快探索速度。首先,我建议您查看 constraint satisfaction problem (CSP),因为众所周知,CSP 求解器非常擅长求解它。更多一般和理论信息可以在书中找到 Artificial Intelligence: a modern approach .
这是应用第一条注释的代码,在我的机器上执行速度提高了 5 倍(请注意网格已在 main
中修改):
#define UNASSIGNED 0
#define N 9
#define ERROR_PAIR std::make_pair(-1, -1)
using namespace std;
void printGrid(const array<char, N*N>& grid)
{
for (int row = 0; row < N; row++)
{
for (int col = 0; col < N; col++)
{
cout << (int)grid[row*N+col] << " ";
}
cout << endl;
}
}
atomic<bool> solutionFound{false};
//Each node has a sudokuMatrix and some sub-trees
struct Node {
array<char, N*N> grid;
vector<Node *> child;
};
Node *newNode(const array<char, N*N> &newGrid) {
Node *temp = new Node;
temp->grid = newGrid;
return temp;
}
//Check if a number can be inserted in a given position
bool canInsert(const int &val, const int &row_, const int &col_,
const array<char, N*N> &grid) {
// Check column
for (int row = 0; row < N; row++) {
if (grid[row*N+col_] == val) return false;
}
// Check row
for (int col = 0; col < N; col++) {
if (grid[row_*N+col] == val) return false;
}
// check box
for (int row = 0; row < N; row++) {
for (int col = 0; col < N; col++) {
if (row / 3 == row_ / 3 &&
col / 3 == col_ / 3) { // they are in the same square 3x3
if ((grid[row*N+col] == val)) return false;
}
}
}
return true;
}
//Check if the sudoku is solved
bool isSafe(const array<char, N*N> &grid)
{
// No need for a hashmap for row column and boxes,
// just an array since associated values are small integer
char row_[9][N+1] = {0};
char column_[9][N+1] = {0};
char box[3][3][N+1] = {0};
for (int row = 0; row < N; row++) {
for (int col = 0; col < N; col++) {
// mark the element in row column and box
row_[row][grid[row*N+col]] += 1;
column_[col][grid[row*N+col]] += 1;
box[row / 3][col / 3][grid[row*N+col]] += 1;
// if an element is already
// present in the hashmap
if (box[row / 3][col / 3][grid[row*N+col]] > 1 ||
column_[col][grid[row*N+col]] > 1 ||
row_[row][grid[row*N+col]] > 1)
return false;
}
}
return true;
}
//Find the first empty cell
pair<int, int> findCell(const array<char, N*N> &grid) {
for (int i = 0; i < N; i++) {
for (int j = 0; j < N; j++) {
if (grid[i*N+j] == UNASSIGNED) {
return make_pair(i, j);
}
}
}
return ERROR_PAIR;
}
//Find all the numbers i can insert in a given position, and update the matrix with that number. Return
//the set of all the matrixes(one for each possibility).
list<array<char, N*N>> getChoices(const int &row, const int &col,
const array<char, N*N> &grid) {
list<array<char, N*N>> choices;
for (int i = 1; i < 10; i++) {
if (canInsert(i, row, col, grid)) {
// cout << "posso inserire =" << i << endl;
array<char, N*N> tmpGrid = grid;
tmpGrid[row*N+col] = i;
choices.push_back(std::move(tmpGrid));
}
}
return choices;
}
//Update the childreen of a node.
void addChoices(list<array<char, N*N>> &choices, Node &node) {
while (!choices.empty()) {
node.child.push_back(newNode(choices.front()));
choices.pop_front();
}
return;
}
//Compute one step of computation for each node in input, and put all the childreen in the task vector.
void solveOneStep(vector<Node *> &nodes, const int &nw, vector<Node *> &tasks) {
if (solutionFound) return;
for (Node *&n : nodes) {
if (findCell(n->grid) != ERROR_PAIR) {
pair<int, int> freeCell = findCell(n->grid);
list<array<char, N*N>> choices =
getChoices(freeCell.first, freeCell.second, n->grid);
if (choices.empty()) {
continue;
}
addChoices(choices, *n);
for (Node *&n : n->child) {
tasks.push_back(n);
}
continue;
} else if (isSafe(n->grid)) {
if (!solutionFound.load()) {
solutionFound.store(true);
printGrid(n->grid);
cout << "That's the first solution found !" << endl;
}
return;
}
}
}
//Compute step by step the computation until you reach a level of the entire tree of nodes where
//the nodes of that level are more than the number of worker(NW) choose by the user.
vector<Node *> splitChunks(Node *root, const int &nw) {
vector<Node *> tasks;
vector<Node *> nodes;
nodes.push_back(root);
while ((int)tasks.size() < nw && !solutionFound) {
tasks.clear();
solveOneStep(nodes, nw, tasks);
nodes = tasks;
}
return tasks;
}
//Solve recursively all the sub-trees of all the nodes given in input, until a solution is found or no
//solution exist.
void solveSubTree(vector<Node *> &nodes, const int &nw) {
if (solutionFound) return;
for (Node *&n : nodes) {
if (findCell(n->grid) != ERROR_PAIR) {
pair<int, int> freeCell = findCell(n->grid);
list<array<char, N*N>> choices =
getChoices(freeCell.first, freeCell.second, n->grid);
if (choices.empty()) {
continue;
}
addChoices(choices, *n);
solveSubTree(n->child, nw);
} else if (isSafe(n->grid)) {
if (!solutionFound.load()) {
solutionFound.store(true);
printGrid(n->grid);
std::cout << "That's the first solution found !" << endl;
}
return;
}
}
}
int main(int argc, char *argv[]) {
if (argc != 2) {
cout << "Usage is: nw " << endl;
return (-1);
}
//A test matrix.
array<char, N*N> grid =
{ 0, 0, 0, 0, 0, 0, 2, 0, 0,
0, 8, 0, 0, 0, 7, 0, 9, 0,
6, 0, 2, 0, 0, 0, 5, 0, 0,
0, 7, 0, 0, 6, 0, 0, 0, 0,
0, 0, 0, 9, 0, 1, 0, 0, 0,
0, 0, 0, 0, 2, 0, 0, 4, 0,
0, 0, 5, 0, 0, 0, 6, 0, 3,
0, 9, 0, 4, 0, 0, 0, 7, 0,
0, 0, 6, 0, 0, 0, 0, 0, 0 };
Node *root = newNode(grid);
vector<thread> tids;
const int nw = atoi(argv[1]); //Number of worker
vector<vector<Node *>> works(nw, vector<Node *>());
vector<Node *> tasks = splitChunks(root, nw);
//Split the tasks for each thread, the main thread takes the last part of the work.
for (int i = 0; i < nw; i++) {
int limit = 0;
i == nw - 1 ? limit = tasks.size() : limit = tasks.size() / nw;
for (int j = 0; j < limit; j++) {
works[i].push_back(tasks.back());
tasks.pop_back();
}
}
//Start each thread, and then the main thread start his computation.
for (int i = 0; i < nw - 1; i++) {
tids.push_back(thread(solveSubTree, ref(works[i]), ref(nw)));
}
solveSubTree(works[nw - 1], nw); // Main thread do last part of the work
for (thread &t : tids) {
t.join();
}
std::cout << "end" << endl;
return (0);
}
关于c++ - 如何提高多线程效率?,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/62967304/