忽略代码,这是另一个问题。我看了看规格,没有看到任何迹象表明空格会破坏构建或其他。所以我的问题是wtf?这是AMD驱动程序的问题吗?他们的CodeXL肯定是一个有问题的pos,因为调试器似乎永远无法正常工作。
无论如何,问题是第一个,我使用了所有制表符进行缩进,而在构建的第二个中,混用了制表符和三个空格。
这是构建日志:
line 44: warning: variable "idx" was declared but never referenced
int idx = row * rows + column ;
^
line 48: warning: this declaration has no storage class or type specifier
umn ;
^
line 48: error: global variable must be declared in addrSpace constant
umn ;
^
line 49: error: identifier "column" is undefined
outputImage[column] = sum.x ;
^
line 49: error: identifier "sum" is undefined
outputImage[column] = sum.x ;
^
line 50: error: expected a declaration
}
^
4 errors detected in the compilation of OCL2836T5.cl.
Frontend phase failed compilation.
这个不会建立:
__kernel
void convolution(
__read_only image2d_t sourceImage,
__global float *outputImage,
int rows,
int cols,
sampler_t sampler)
{
// Store each work-item’s unique row and column
int column = get_global_id(0);
int row = get_global_id(1);
int2 coords; // Coordinates for accessing the image
coords.x = column ;
coords.y = row ;
// All accesses to images return data as four-element vector
// (i.e., float4), although only the 'x' component will contain
// meaningful data in this code
float4 sum = {0.0f, 0.0f, 0.0f, 0.0f};
uint4 pixel;
// Read a pixel from the image. A single channel image
// stores the pixel in the 'x' coordinate of the returned
// vector.
pixel = read_imageui(sourceImage, sampler, coords);
read_mem_fence(CLK_GLOBAL_MEM_FENCE) ;
sum.x += pixel.x;
write_mem_fence(CLK_GLOBAL_MEM_FENCE) ;
write_mem_fence(CLK_LOCAL_MEM_FENCE) ;
sum.x += pixel.y;
write_mem_fence(CLK_GLOBAL_MEM_FENCE) ;
write_mem_fence(CLK_LOCAL_MEM_FENCE) ;
sum.x += pixel.z;
write_mem_fence(CLK_GLOBAL_MEM_FENCE) ;
write_mem_fence(CLK_LOCAL_MEM_FENCE) ;
// Copy the data to the output image if the
// work-item is in bounds
if(row < rows && column < cols) {
//coords.x = row * rows + column ;
//coords.y = 0;
//write_imagef(outputImage, coords, sum);
int idx = row * rows + column ;
outputImage[column] = sum.x ;
}
}
这将:
__kernel
void convolution(
__read_only image2d_t sourceImage,
__global float *outputImage,
int rows,
int cols,
sampler_t sampler)
{
// Store each work-item’s unique row and column
int column = get_global_id(0);
int row = get_global_id(1);
int2 coords; // Coordinates for accessing the image
coords.x = column ;
coords.y = row ;
// All accesses to images return data as four-element vector
// (i.e., float4), although only the 'x' component will contain
// meaningful data in this code
float4 sum = {0.0f, 0.0f, 0.0f, 0.0f};
uint4 pixel;
// Read a pixel from the image. A single channel image
// stores the pixel in the 'x' coordinate of the returned
// vector.
pixel = read_imageui(sourceImage, sampler, coords);
read_mem_fence(CLK_GLOBAL_MEM_FENCE) ;
sum.x += pixel.x;
write_mem_fence(CLK_GLOBAL_MEM_FENCE) ;
write_mem_fence(CLK_LOCAL_MEM_FENCE) ;
sum.x += pixel.y;
write_mem_fence(CLK_GLOBAL_MEM_FENCE) ;
write_mem_fence(CLK_LOCAL_MEM_FENCE) ;
sum.x += pixel.z;
write_mem_fence(CLK_GLOBAL_MEM_FENCE) ;
write_mem_fence(CLK_LOCAL_MEM_FENCE) ;
// Copy the data to the output image if the
// work-item is in bounds
if(row < rows && column < cols) {
//coords.x = row * rows + column ;
//coords.y = 0;
//write_imagef(outputImage, coords, sum);
int idx = row * rows + column ;
outputImage[column] = sum.x ;
}
}
最佳答案
进一步查看看来,问题出在Windows函数中,无法打开.cl文件。
_sopen_s(&fd,文件名,_O_RDONLY,_SH_DENYRW,_S_IREAD);
更改为:
_sopen_s(&fd,文件名,_O_RDONLY | O_BINARY,_SH_DENYRW,_S_IREAD);
多田问题解决了。
这是不使用O_BINARY标志就搞砸的地方:
outputImage[idx] = sum.x ;
}
}
lumn ;
outputImage[idx] = sum.x ;
}
}
并修复了它应有的外观:
outputImage[idx] = sum.x ;
}
}
为什么会这样呢?不知道。
编辑:
在阅读了Sharpneli和jprice的评论之后,我决定进行更多的挖掘,以找出确切的问题是什么。
实际不需要O_BINARY标志,因为我追究了两者之间的差异的根本原因:
*filesize = (int64_t *)_filelengthi64(fd)
和
bytesRead = _read( fileDescriptor, *fileContents, (UINT)filesize ) ;
文件大小为765,而bytesRead为733。
问题最终产生的原始路线还差一点:
//make sure string is null terminated
(*fileContents)[(int)filesize -1] = '\0' ;
并更改为:
(*fileContents)[(int)bytesRead-1] = '\0' ;
瞧
关于build - OpenCL中有两个几乎完全相同的内核(有些制表符而不是空格),但会生成一个,而不会,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/23232747/