我不确定这是一个错误,还是我只是犯了一个简单的错误,但将 OpenCL 缓冲区读入二维 vector 似乎会导致一些奇怪的行为。
我的意思是,在调用 queue.enqueueReadBuffer 之后,它会出现段错误或崩溃,并出现诸如“损坏的双链表”或“free():无效大小”之类的消息。如果有帮助,我可以提供回溯/内存映射。
读入一维 vector 按预期工作。
我正在使用 AMD Catalyst 13.25.5 运行 Linux 3.8.0-35 (x86_64),我使用以下代码编译它:g++ -I/opt/AMDAPP/include main.cpp OpenCl.cpp -lOpenCL
最小工作示例:
main.cpp
// System headers
#include <iostream>
#include <vector>
// Third-party headers
#include <CL/cl.hpp>
// Project headers
#include "OpenCl.h"
int main(int argc, char* argv[])
{
OpenCl opencl;
const unsigned int num_rows = 241;
const unsigned int num_cols = 886;
const unsigned int num_elements = num_rows * num_cols;
const size_t array_sz = num_elements * sizeof(cl_float);
const std::vector<cl_float> A_1d(num_elements, 1.2345f);
std::vector<cl_float> B_1d(num_elements, 0);
const std::vector<std::vector<cl_float> > A_2d(num_rows, std::vector<cl_float>(num_cols, 1.2345f));
std::vector<std::vector<cl_float> > B_2d(num_rows, std::vector<cl_float>(num_cols, 0));
// Works as expected
std::cout << "START 1D TEST\n";
opencl.test1D(A_1d, B_1d, array_sz);
std::cout << "1D TEST COMPLETE\n";
// Crashes
std::cout << "START 2D TEST\n";
opencl.test2D(A_2d, B_2d, array_sz);
std::cout << "2D TEST COMPLETE\n";
return 0;
}
OpenCl.h
#pragma once
#define __CL_ENABLE_EXCEPTIONS
// Third-party headers
#include <CL/cl.hpp>
class OpenCl {
public:
OpenCl();
void test1D(const std::vector<cl_float> &A,
std::vector<cl_float> &B,
const size_t array_sz);
void test2D(const std::vector<std::vector<cl_float> > &A,
std::vector<std::vector<cl_float> > &B,
const size_t array_sz);
private:
cl::Context context;
cl::CommandQueue queue;
};
OpenCl.cpp
// Class header
#include "OpenCl.h"
// System headers
#include <iostream>
#include <vector>
// Third-party headers
#include <CL/cl.hpp>
OpenCl::OpenCl()
{
// Get available platforms
std::vector<cl::Platform> platforms;
cl::Platform::get(&platforms);
// Select the default platform and create a context using the GPU
cl_context_properties cps[] = {
CL_CONTEXT_PLATFORM,
(cl_context_properties)(platforms[0])(),
0
};
context = cl::Context(CL_DEVICE_TYPE_GPU, cps);
// Get a list of devices on this platform
std::vector<cl::Device> devices = context.getInfo<CL_CONTEXT_DEVICES>();
// Create a command queue and use the first device
queue = cl::CommandQueue(context, devices[0]);
}
void OpenCl::test1D(const std::vector<cl_float> &A,
std::vector<cl_float> &B,
const size_t array_sz)
{
try {
// Initialize device buffer
cl::Buffer A_d = cl::Buffer(context, CL_MEM_READ_ONLY, array_sz);
// Transfer data to device
queue.enqueueWriteBuffer(A_d, CL_TRUE, 0, array_sz, &A[0]);
// Transfer data from device
std::cout << "B[0]: " << B[0] << "\n";
queue.enqueueReadBuffer(A_d, CL_TRUE, 0, array_sz, &B[0]);
std::cout << "B[0]: " << B[0] << "\n";
} catch(cl::Error &error) {
std::cout << error.what() << "(" << error.err() << ")" << std::endl;
std::cout << "Program failed!\n";
}
}
void OpenCl::test2D(const std::vector<std::vector<cl_float> > &A,
std::vector<std::vector<cl_float> > &B,
const size_t array_sz)
{
try {
// Initialize device buffer
cl::Buffer A_d = cl::Buffer(context, CL_MEM_READ_ONLY, array_sz);
// Transfer data to device
queue.enqueueWriteBuffer(A_d, CL_TRUE, 0, array_sz, &A[0][0]);
// Transfer data from device
std::cout << "B[0][0]: " << B[0][0] << "\n";
queue.enqueueReadBuffer(A_d, CL_TRUE, 0, array_sz, &B[0][0]);
std::cout << "B[0][0]: " << B[0][0] << "\n";
} catch(cl::Error &error) {
std::cout << error.what() << "(" << error.err() << ")" << std::endl;
std::cout << "Program failed!\n";
}
}
最佳答案
std::vector< std::vector<float> >
持有的内存不是连续的,所以你不能在一次操作中复制它。你必须
size_t row_size = A[0].size() * sizeof(A[0][0]);
for(size_t row = 0; row < A.size(); ++row)
queue.enqueueWriteBuffer(A_d, CL_TRUE, /*offset=*/row * row_size, /*size=*/row_size, &A[row][0]);
但是,如果您的数据以连续数组的形式放置(如您的 test1D
),您的性能会更好。
关于c++ - 为什么 OpenCL 无法将 enqueueReadBuffer 放入二维 vector 中?,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/23608080/