c++ - CUDA 图像不显示输出

标签 c++ image opencv cuda


#include< iostream>
#include< cstdio>
#include < opencv2/core.hpp>
#include < opencv2/imgcodecs.hpp>
#include < opencv2/highgui.hpp>
#include< cuda_runtime.h >

using std::cout;
using std::endl;

__global__ void mirror( unsigned char* input, unsigned char* output, int numRows, int numCols)
    //2D Index of current thread
    const int col = blockIdx.x * blockDim.x + threadIdx.x;
    const int row = blockIdx.y * blockDim.y + threadIdx.y;
    if ( col >= numCols || row >= numRows ) return;

    int thread_x = blockDim.x * blockIdx.x + threadIdx.x;
    int thread_y = blockDim.y * blockIdx.y + threadIdx.y;
    int thread_x_new = numCols-thread_x;
    int thread_y_new = thread_y;
    int mId = thread_y * numCols + thread_x;
    int mId_new = thread_y_new * numCols + thread_x_new;
    output[mId_new] = input[mId]; 

 void convert_to_mirror(const cv::Mat& input, cv::Mat& output,int numrows,int numcols)
    const dim3 blockSize(1024,1,1);
    int a=numcols/blockSize.x, b=numrows/blockSize.y;   
    const dim3 gridSize(a+1,b+1,1);
    const size_t numPixels = numrows * numcols;
    unsigned char *d_input, *d_output;

    cudaMalloc<unsigned char>(&d_input, numPixels);
    cudaMalloc<unsigned char>(&d_output,numPixels);
    //Copy data from OpenCV input image to device memory
    cudaMemcpy(d_input,input.ptr(), numPixels,cudaMemcpyHostToDevice);
    //Call mirror kernel.
    mirror<<<gridSize, blockSize>>>(d_input,d_output, numrows, numcols);
    //copy output from device to host
    cudaMemcpy(output.ptr(), d_output,numPixels, cudaMemcpyDeviceToHost);

int main()
    //Read input image from the disk
    cv::Mat input = cv::imread("C:/a.jpg", cv::IMREAD_COLOR);
    const int rows = input.rows;
    const int cols = input.cols;
        std::cout<<"Image Not Found!"<<std::endl;
        return -1;

    //Create output image
    cv::Mat output(rows,cols,CV_8UC3);

    //Call the wrapper function

    //Show the input and output

    //Wait for key press
    return 0;





输入图像为8位RGB图像,因此其理论占用字节数等于width x height x number_of_channels。在这种情况下,它应该是 numRows * numCols * 3。但实际上,OpenCV 分配 aligned memory for image data ,因此无论图像类型和 channel 数如何,图像字节总数都应计算为 image.step * numrows。话虽这么说,cudaMalloccudaMemcpy 调用期望我们分别要分配或复制的字节总数。按如下方式更正调用(根据@micehlson 的回答改编代码):

const size_t numBytes = input.step * numrows;
cudaMalloc<unsigned char>(&d_input, numBytes);
cudaMalloc<unsigned char>(&d_output, numBytes);

//Copy data from OpenCV input image to device memory
cudaMemcpy(d_input, input.ptr(), numBytes, cudaMemcpyHostToDevice);

//copy output from device to host
cudaMemcpy(output.ptr(), d_output, numBytes, cudaMemcpyDeviceToHost);


由于图像内存是对齐的,因此应该使用 Mat 对象的 step 参数计算像素的实际索引。计算 OpenCV Mat 中像素起始索引的通用公式如下:

index = row * step/bytes_per_pixel_component + (channels * column)

对于8位的RGB图像,一个RGB像素的单个分量占用的字节数为1字节。这意味着单个 R 或 G 或 B 占用 1 个字节,而整个 RGB 像素为 3 个字节。所以起始索引计算为

int index = row * step + 3 * column;

由于这是起始索引,因此可以通过将此索引递增至 channel 数来访问此特定像素的每个单独 channel ,如下所示:

int R = index;
int G = index + 1;
int B = index + 2;


int flipped_index = row * step + 3 * (numCols - column - 1);



__global__ void mirror( unsigned char* input, unsigned char* output, int numRows, int numCols, int channels, int step)
    //2D Index of current thread
    const int col = blockIdx.x * blockDim.x + threadIdx.x;
    const int row = blockIdx.y * blockDim.y + threadIdx.y;

    if ( col >= numCols || row >= numRows ) return;

    const int tid = row * step + (channels * col);
    const int tid_flipped = row * step + (channels * (numCols - col - 1)); //Flip about y axis

    //Copy each component of the current pixel
    for(int i=0; i<channels; i++)
        output[tid_flipped + i] = input[tid + i]; 



using std::cout;
using std::endl;    

__global__ void mirror( unsigned char* input, unsigned char* output, int numRows, int numCols, int channels, int step)
    //2D index of current thread
    const int col = blockIdx.x * blockDim.x + threadIdx.x;
    const int row = blockIdx.y * blockDim.y + threadIdx.y;

    if ( col >= numCols || row >= numRows ) return;

    const int tid = row * step + (3 * col);
    const int tid_new = row * step + (3 * (numCols - col - 1)); //Flip about y axis

    //Copy each component of the current pixel
    for(int i=0; i<channels; i++)
        output[tid_new + i] = input[tid + i]; 

 void convert_to_mirror(const cv::Mat& input, cv::Mat& output,int numrows,int numcols)
    const dim3 blockSize(1024,1,1);

    int a=numcols/blockSize.x, b=numrows/blockSize.y;   

    const dim3 gridSize(a+1,b+1,1);

    const size_t numBytes = input.step * input.rows;

    unsigned char *d_input, *d_output;

    cudaMalloc<unsigned char>(&d_input, numBytes);
    cudaMalloc<unsigned char>(&d_output,numBytes);

    //Copy data from OpenCV input image to device memory
    cudaMemcpy(d_input,input.ptr(), numBytes, cudaMemcpyHostToDevice);

    //Call mirror kernel.
    mirror<<<gridSize, blockSize>>>(d_input,d_output, numrows, numcols, input.channels(), input.step);

    assert(cudaSuccess == cudaDeviceSynchronize()); 

    //copy output from device to host
    cudaMemcpy(output.ptr(), d_output,numBytes, cudaMemcpyDeviceToHost);



 int main()
    //Read input image from the disk
    cv::Mat input = cv::imread("C:/a.jpg", cv::IMREAD_COLOR);
    const int rows = input.rows;
    const int cols = input.cols;

        std::cout<<"Image Not Found!"<<std::endl;
        return -1;

    //Create output image
    cv::Mat output(rows,cols,CV_8UC3);

    //Call the wrapper function

    //Show the input and output

    //Wait for key press

    return 0;


nvcc -o mirror -std=c++11 mirror.cu -I/usr/local/include/opencv4 -L/usr/local/lib -lopencv_core -lopencv_imgcodecs -lopencv_highgui

在 Ubuntu 16.04 上使用 OpenCV 4.0 和 CUDA 9 测试

关于c++ - CUDA 图像不显示输出,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/53791074/


c++ - 网格引擎集群+OpenCV : strange behaviour

c++ - 如何找到鼠标事件的来源?

c++ - 从信号处理程序中引发异常

c++ - Qt中的透明窗口

html - 调整窗口大小时保持横幅图像居中?

python-3.x - 使用 cv2.imshow 时获取此黑色窗口而不是图片

c++ - 将 eigen::matrixXf 映射到数组

javascript - 幻灯片中的图像偏移

javascript - HTML - 单击图像时如何播放和暂停音频?

opencv - 查找图像中最大的 blob