c++ - 从设备到主机的 Cuda Memcpy 崩溃

标签 c++ image-processing cuda gpgpu

我试图在 15 x 15 的补丁大小附近找到最小的 RGB

在source.cpp文件中

SAFE_CALL(cudaMemcpy(Dark_h, Dark_d, size2, cudaMemcpyDeviceToHost));

程序崩溃了 这是我的代码片段

黑暗先验.h

#ifndef DARKPRIOR_H_INCLUDED
#define DARKPRIOR_H_INCLUDED

 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include "cuda.h"
 #include "cuda_runtime.h"
 #include "device_launch_parameters.h"
 #include <iostream>
 #include "opencv2/opencv.hpp"

 #define SAFE_CALL(call)                                                                                                            \
 do                                                                                                                          \
    {                                                                                                                           \
    cudaError_t err = (call);                                                                                               \
    if(cudaSuccess != err)                                                                                                  \
            {                                                                                                                       \
        fprintf(stderr,"CUDA Error:\nFile = %s\nLine = %d\nReason = %s\n", __FILE__, __LINE__, cudaGetErrorString(err));    \
        cudaDeviceReset();                                                                                                  \
        exit(EXIT_FAILURE);                                                                                                 \
            }                                                                                                                       \
    }                                                                                                                           \
        while (0)


    void dark_channel(float *image_d, float *rgbmin_d, int height, int width);



   #endif

源.cpp

#include "DarkPrior.h"
#include <opencv2/opencv.hpp>

using namespace std;
using namespace cv;

int main()
{
    //load the image
    Mat src = imread("foggy_river.jpg");

    //check whether image loaded is empty or not.
    if (src.empty())
    {
         cerr << "no image"; return -1;
    }

    //Mat rgbMin(src.size(), CV_MAKETYPE(src.depth(), 1));
   //   int step = src.step;
    float *image_h = NULL;
    float *image_d = NULL;
     float *Dark_d = NULL;
    float *Dark_h = NULL;
   //Mat rgbmin(src.size(), CV_MAKETYPE(src.depth(), 1));

   size_t size1 = src.step * src.rows * sizeof(float);
   size_t size2 = src.cols * src.rows * sizeof(float);

   image_h = (float *)malloc(size1);
   Dark_h = (float *)malloc(size1);

   SAFE_CALL(cudaMalloc((void**)&image_d, size1));
   SAFE_CALL(cudaMalloc((void**)&Dark_d, size2));

   //convert image from CV::MAT to float*.
   Mat dst;
   src.convertTo(dst, CV_32F);
   image_h = dst.ptr<float>();

   SAFE_CALL(cudaMemcpy(image_d, image_h, size1, cudaMemcpyHostToDevice));

   cout << "Calculating Minimum of RGB ..." << endl;
   dark_channel(image_d, Dark_d, src.rows, src.cols);

   SAFE_CALL(cudaMemcpy(Dark_h, Dark_d, size2, cudaMemcpyDeviceToHost));

   Mat Dark_out(src.rows, src.cols, CV_32FC1, Dark_h);
   imwrite("MinRGB.jpg", Dark_out);

   cudaFree(image_d);
   cudaFree(Dark_d);

   //free(image_h);
   //free(rgbmin_h);

   return 0;
}

minRGB.cu

#include "DarkPrior.h"

//#define min(x,y) ((x<y)?x:y)

__device__ float safe_get(float *rgbMin, int width, int height, int x, int y)
{

 // Clamp indices to image boundaries
 x = min( max(0, x), width - 1);
 y = min( max(0, y), height - 1);

 // Translate 2D index into 1D index
 const int idx = y * width + x ;

 return rgbMin[idx];
}

 __device__ float  estimate_minimum_patch(float *rgbMin, int width, int  height, int radius, int x, int y, float Minval)
{
   for(int i = -radius; i <= radius; i++)
  {
    for(int j = -radius; j <= radius; j++)
    {
        float val = safe_get(rgbMin, width, height, x+i, y+j);

        Minval = min (val, Minval);
     }
   }

}

  __global__ void kernel_darkChannel (float *rgbMin, float *darkCh, int height,    int width)
 {
  int radius  = 7;

int x = blockIdx.x; // Current column
int y = blockIdx.y; // Current row
int tid = y * width + x;

float Minval = 255.0;

estimate_minimum_patch(rgbMin, width, height, radius, x, y, Minval);

darkCh[tid] = Minval;
}

__global__ void kernel_findMinRGB (float3 *image, float *tmp_min, int height, int width)
{
int x = blockIdx.x; // Current column
int y = blockIdx.y; // Current row
int i = y * width + x;

if(x > height && y > width)
{
    return;
}

tmp_min[i] = min(image[i].x, min(image[i].y, image[i].z));

}

 void dark_channel(float *image_d, float *Dark_d, int height, int width)
 {
dim3 grid(width, height);

float *tmp_min;
cudaMalloc((void **)(&tmp_min), sizeof(float)*height*width);

kernel_findMinRGB <<<grid, 1>>> ((float3 *)image_d, tmp_min, height, width);
printf("RGB min is found\n");

kernel_darkChannel <<<grid, 1>>> (tmp_min, Dark_d, height, width);
printf("patch of minimum is also found\n");


return;
}

我的代码在 source.cpp 的第 45 行因未知错误而崩溃

我完全不知道是什么原因,也许你能帮上忙。

最佳答案

指针 Dark_h指向 size1 的主机内存段字节。指针 Dark_d指向 size2 的设备内存段字节。如果size1 < size2电话:

cudaMemcpy(Dark_h, Dark_d, size2, cudaMemcpyDeviceToHost)

会很麻烦,因为您将写入非法内存(不属于 Dark_h 指向的数组段的内存,也许您会得到 SEGFAULT)。我没有尝试过,但我敢打赌这就是崩溃背后的原因。

关于c++ - 从设备到主机的 Cuda Memcpy 崩溃,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/33605696/

相关文章:

c++ - 程序已停止工作?如何使 int 数组大小超过 1,000,000?

c++ - std::greater<double> 和 std::less<double> 使用安全吗?

image-processing - 用于 3D 重建的图切割算法类型

java - 如何在 TiffOutputSet 中嵌入 ICC_Profile

cuda - Thrust cuda中使用float4时出现内存问题

c++ - 从 Thrust::device_vector 到原始指针并返回?

c - 为什么我的c程序突然用了30g的虚拟内存?

c++ - 以编程方式复制 Windows 10 上的桌面

c++ - 如何解决在 C++ header 中声明 typedef 的问题

python - 使用 Python 图像处理分割生物样本的照片以提取感兴趣的圆形区域