作为更大应用程序的一部分,我需要计算以下代码:
ax2 += (int)(25 + 0.5);
ay2 += (int)(25 + 0.5);
bx2 += (int)(25 + 0.5);
by2 += (int)(25 + 0.5);
cx2 += (int)(25 + 0.5);
cy2 += (int)(25 + 0.5);
for (int ix = -1; ix <= 1; ix++){
for (int iy = -1; iy <= 1; iy++){
suma += (grayImage.at<uchar>(ay2 + iy, ax2 + ix) - grayImage.at<uchar>(by2 + iy, bx2 + ix))* grayImage.at<uchar>(ay2 + iy, ax2 + ix) - grayImage.at<uchar>(by2 + iy, bx2 + ix));
}
}
它主要计算两个 3X3 block 的平方差之和。
运行速度极慢。有什么办法可以加快速度吗?
编辑:
我改成如下版本:
for (int ix = -1; ix <= 1; ix++){
for (int iy = -1; iy <= 1; iy++){
double difa = grayImage.at<uchar>(ay2 + iy, ax2 + ix) - grayImage.at<uchar>(by2 + iy, bx2 + ix);
suma += (difa)*(difa);
}
}
而且它运行得更快,但是有什么办法可以进一步改进它吗?
谢谢,
吉尔。
编辑:根据评论和答案,我没有使用以下代码:
//int iy = -1;
Mi_a = grayImage.ptr<uchar>(ay2 - 1);
Mi_b = grayImage.ptr<uchar>(by2 - 1);
Mi_c = grayImage.ptr<uchar>(cy2 - 1);
difa = Mi_a[ax2 - 1] - Mi_b[bx2 - 1];
suma += (difa)*(difa);
difc = Mi_c[cx2 - 1] - Mi_b[bx2 - 1];
sumc += (difc)*(difc);
difa = Mi_a[ax2 + 0] - Mi_b[bx2 + 0];
suma += (difa)*(difa);
difc = Mi_c[cx2 + 0] - Mi_b[bx2 + 0];
sumc += (difc)*(difc);
difa = Mi_a[ax2 + 1] - Mi_b[bx2 + 1];
suma += (difa)*(difa);
difc = Mi_c[cx2 + 1] - Mi_b[bx2 + 1];
sumc += (difc)*(difc);
//int iy=0;
Mi_a = grayImage.ptr<uchar>(ay2 + 0);
Mi_b = grayImage.ptr<uchar>(by2 + 0);
Mi_c = grayImage.ptr<uchar>(cy2 + 0);
difa = Mi_a[ax2 - 1] - Mi_b[bx2 - 1];
suma += (difa)*(difa);
difc = Mi_c[cx2 - 1] - Mi_b[bx2 - 1];
sumc += (difc)*(difc);
difa = Mi_a[ax2 + 0] - Mi_b[bx2 + 0];
suma += (difa)*(difa);
difc = Mi_c[cx2 + 0] - Mi_b[bx2 + 0];
sumc += (difc)*(difc);
difa = Mi_a[ax2 + 1] - Mi_b[bx2 + 1];
suma += (difa)*(difa);
difc = Mi_c[cx2 + 1] - Mi_b[bx2 + 1];
sumc += (difc)*(difc);
//int iy=1
Mi_a = grayImage.ptr<uchar>(ay2 + 1);
Mi_b = grayImage.ptr<uchar>(by2 + 1);
Mi_c = grayImage.ptr<uchar>(cy2 + 1);
difa = Mi_a[ax2 - 1] - Mi_b[bx2 - 1];
suma += (difa)*(difa);
difc = Mi_c[cx2 - 1] - Mi_b[bx2 - 1];
sumc += (difc)*(difc);
difa = Mi_a[ax2 + 0] - Mi_b[bx2 + 0];
suma += (difa)*(difa);
difc = Mi_c[cx2 + 0] - Mi_b[bx2 + 0];
sumc += (difc)*(difc);
difa = Mi_a[ax2 + 1] - Mi_b[bx2 + 1];
suma += (difa)*(difa);
difc = Mi_c[cx2 + 1] - Mi_b[bx2 + 1];
sumc += (difc)*(difc);
或者它是具有相同运行时间的循环版本:
for (int iy = -1; iy <= 1; iy++)
{
const uchar * Mi_a = grayImage.ptr<uchar>(ay2 + iy);
const uchar * Mi_b = grayImage.ptr<uchar>(by2 + iy);
const uchar * Mi_c = grayImage.ptr<uchar>(cy2 + iy);
for (int ix = -1; ix <= 1; ix++)
{
double difa = Mi_a[ax2 + ix] - Mi_b[bx2 + ix];
suma += (difa)*(difa);
double difc = Mi_c[cx2 + ix] - Mi_b[bx2 + ix];
sumc += (difc)*(difc);
}
}
有什么方法可以进一步加速吗?
谢谢
吉尔
最佳答案
If you need to process a whole row of a 2D array, the most efficient way is to get the pointer to the row first, and then just use the plain C operator []
所以:
for (int iy = -1; iy <= 1; iy++)
{
const uchar * Mi_a = grayImage.ptr<uchar>(ay2 + iy);
const uchar * Mi_b = grayImage.ptr<uchar>(by2 + iy);
for (int ix = -1; ix <= 1; ix++)
{
double difa = Mi_a[ax2 + ix] - Mi_b[bx2 + ix]
suma += (difa)*(difa);
}
}
关于c++ - OpenCV - 加速 3x3 补丁的 SSD 计算,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/24286624/