build - OpenCL中有两个几乎完全相同的内核(有些制表符而不是空格)，但会生成一个，而不会

忽略代码，这是另一个问题。我看了看规格，没有看到任何迹象表明空格会破坏构建或其他。所以我的问题是wtf？这是AMD驱动程序的问题吗？他们的CodeXL肯定是一个有问题的pos，因为调试器似乎永远无法正常工作。

无论如何，问题是第一个，我使用了所有制表符进行缩进，而在构建的第二个中，混用了制表符和三个空格。

这是构建日志:

line 44: warning: variable "idx" was declared but never referenced
int idx = row * rows + column ;
    ^

line 48: warning: this declaration has no storage class or type specifier
umn ;
^

line 48: error: global variable must be declared in addrSpace constant
umn ;
^

line 49: error: identifier "column" is undefined
outputImage[column] = sum.x ;
            ^

line 49: error: identifier "sum" is undefined
outputImage[column] = sum.x ;
                      ^

line 50: error: expected a declaration
}
^

4 errors detected in the compilation of OCL2836T5.cl.
Frontend phase failed compilation.

这个不会建立:

__kernel
void convolution(
    __read_only  image2d_t  sourceImage,
    __global float *outputImage,
    int rows,
    int cols,
    sampler_t sampler)
{
    // Store each work-item’s unique row and column
    int column = get_global_id(0);
    int row    = get_global_id(1);
    int2 coords;  // Coordinates for accessing the image
    coords.x = column ;
    coords.y = row ;

    // All accesses to images return data as four-element vector 
    // (i.e., float4), although only the 'x' component will contain 
    // meaningful data in this code
    float4 sum = {0.0f, 0.0f, 0.0f, 0.0f};

    uint4 pixel;
    // Read a pixel from the image.  A single channel image 
    // stores the pixel in the 'x' coordinate of the returned
    // vector.
    pixel = read_imageui(sourceImage, sampler, coords);
    read_mem_fence(CLK_GLOBAL_MEM_FENCE) ;
    sum.x += pixel.x;
    write_mem_fence(CLK_GLOBAL_MEM_FENCE) ;
    write_mem_fence(CLK_LOCAL_MEM_FENCE) ;
    sum.x += pixel.y;
    write_mem_fence(CLK_GLOBAL_MEM_FENCE) ;
    write_mem_fence(CLK_LOCAL_MEM_FENCE) ;
    sum.x += pixel.z;
    write_mem_fence(CLK_GLOBAL_MEM_FENCE) ;
    write_mem_fence(CLK_LOCAL_MEM_FENCE) ;


    // Copy the data to the output image if the
    // work-item is in bounds
    if(row < rows && column < cols) {
        //coords.x = row * rows + column ;
        //coords.y = 0;
        //write_imagef(outputImage, coords, sum);
        int idx = row * rows + column ;
        outputImage[column] = sum.x ;
    }
}

这将:

__kernel
void convolution(
   __read_only  image2d_t  sourceImage,
   __global float *outputImage, 
   int rows,
   int cols,
   sampler_t sampler) 
{
   // Store each work-item’s unique row and column
   int column = get_global_id(0);
   int row    = get_global_id(1);
   int2 coords;  // Coordinates for accessing the image
   coords.x = column ;
   coords.y = row ;

   // All accesses to images return data as four-element vector 
   // (i.e., float4), although only the 'x' component will contain 
   // meaningful data in this code
   float4 sum = {0.0f, 0.0f, 0.0f, 0.0f};


    uint4 pixel;
    // Read a pixel from the image.  A single channel image 
    // stores the pixel in the 'x' coordinate of the returned
    // vector.
    pixel = read_imageui(sourceImage, sampler, coords);
    read_mem_fence(CLK_GLOBAL_MEM_FENCE) ;
    sum.x += pixel.x;
    write_mem_fence(CLK_GLOBAL_MEM_FENCE) ;
    write_mem_fence(CLK_LOCAL_MEM_FENCE) ;
    sum.x += pixel.y;
    write_mem_fence(CLK_GLOBAL_MEM_FENCE) ;
    write_mem_fence(CLK_LOCAL_MEM_FENCE) ;
    sum.x += pixel.z;
    write_mem_fence(CLK_GLOBAL_MEM_FENCE) ;
    write_mem_fence(CLK_LOCAL_MEM_FENCE) ;


   // Copy the data to the output image if the
   // work-item is in bounds
   if(row < rows && column < cols) {
      //coords.x = row * rows + column ;
      //coords.y = 0;
      //write_imagef(outputImage, coords, sum);
      int idx = row * rows + column ;
      outputImage[column] = sum.x ;
   } 
}

最佳答案

进一步查看看来，问题出在Windows函数中，无法打开.cl文件。

_sopen_s(＆fd，文件名，_O_RDONLY，_SH_DENYRW，_S_IREAD);

更改为:

_sopen_s(＆fd，文件名，_O_RDONLY | O_BINARY，_SH_DENYRW，_S_IREAD);

多田问题解决了。

这是不使用O_BINARY标志就搞砸的地方:

    outputImage[idx] = sum.x ;
    } 
}
lumn ;

    outputImage[idx] = sum.x ;

   } 

}

并修复了它应有的外观:

    outputImage[idx] = sum.x ;

   } 

}

为什么会这样呢？不知道。

编辑:
在阅读了Sharpneli和jprice的评论之后，我决定进行更多的挖掘，以找出确切的问题是什么。

实际不需要O_BINARY标志，因为我追究了两者之间的差异的根本原因:

*filesize = (int64_t *)_filelengthi64(fd)

和

bytesRead = _read( fileDescriptor, *fileContents, (UINT)filesize ) ;

文件大小为765，而bytesRead为733。

问题最终产生的原始路线还差一点:

//make sure string is null terminated
(*fileContents)[(int)filesize -1] = '\0' ;

并更改为:

(*fileContents)[(int)bytesRead-1] = '\0' ;

瞧

关于build - OpenCL中有两个几乎完全相同的内核(有些制表符而不是空格)，但会生成一个，而不会，我们在Stack Overflow上找到一个类似的问题： https://stackoverflow.com/questions/23232747/

build - OpenCL中有两个几乎完全相同的内核(有些制表符而不是空格)，但会生成一个，而不会

上一篇：actionscript-3 - TypeError : Error #1006: value is not a function - What am I doing wrong?

下一篇：java - 我的Java程序正在创建27个无效错误