double diff, dsq = 0;
double *descr1, *descr2;
int i, d;
for (i = 0; i < d; ++i)
{
diff = descr1[i] - descr2[i];
dsq += diff * diff;
}
return dsq;
我想优化这部分代码,它在我的程序中花费了最多的时间。 如果这个双倍乘法以优化的方式执行,我的程序可以运行得非常快。 是否有其他乘法方式而不是使用 * 运算符导致程序运行速度更快? 非常感谢。
最佳答案
这绝对是 Duff's Device 的情况.
这是我的实现,基于 Duff 的设备。
(注意:仅经过轻微测试...这必须在调试器中逐步执行以确保正确的行为)
void fnc(void)
{
double dsq = 0.0;
double diff[8] = {0.0};
double descr1[115];
double descr2[115];
double* pD1 = descr1;
double* pD2 = descr2;
int d = 115;
//Fill with random data for testing
for(int i=0; i<d; ++i)
{
descr1[i] = (double)rand() / (double)rand();
descr2[i] = (double)rand() / (double)rand();
}
// Duff's Device: Step through this in a debugger, its AMAZING.
int c = (d + 7) / 8;
switch(d % 8) {
case 0: do { diff[0] = *pD1++ - *pD2++; diff[0] *= diff[0];
case 7: diff[7] = *pD1++ - *pD2++; diff[7] *= diff[7];
case 6: diff[6] = *pD1++ - *pD2++; diff[6] *= diff[6];
case 5: diff[5] = *pD1++ - *pD2++; diff[5] *= diff[5];
case 4: diff[4] = *pD1++ - *pD2++; diff[4] *= diff[4];
case 3: diff[3] = *pD1++ - *pD2++; diff[3] *= diff[3];
case 2: diff[2] = *pD1++ - *pD2++; diff[2] *= diff[2];
case 1: diff[1] = *pD1++ - *pD2++; diff[1] *= diff[1];
dsq += diff[0] + diff[1] + diff[2] + diff[3] + diff[4] + diff[5] + diff[6] + diff[7];
} while(--c > 0);
}
}
说明
正如其他人所说,您几乎无法优化浮点运算。 但是,在您的原始代码中,程序花费了大量时间来检查
i
的值。
执行步骤大致是:
Is i < d? ==> Yes
Do some math.
Is i < d? ==> Yes
Do some math.
Is i < d? ==> Yes
Do some math.
Is i < d? ==> Yes
Do some math.
您可以看到每隔一个步骤都在检查 i
。
使用 Duff 的设备,您可以在检查计数器(在本例中为 c
)之前进行 8 次操作。
现在执行步骤大致是:
Is c > 0? ==> Yes
Do some math.
Do some math.
Do some math.
Do some math.
Do some math.
Do some math.
Do some math.
Do some math.
Is c > 0? ==> Yes
Do some math.
Do some math.
Do some math.
Do some math.
Do some math.
Do some math.
Do some math.
Do some math.
Is c > 0? ==> Yes
[...]
换句话说,您实际完成工作所花费的 CPU 大约是原来的 8 倍,而检查计数器值的时间则少得多。这是一个大的胜利。
我怀疑您甚至可以将循环进一步展开到 16 或 32 次操作以获得更大的胜利。这实际上取决于代码中 d
的可能值。
请测试和分析这段代码,让我知道它是如何为你工作的。
我有一种强烈的感觉,这将是一个很大的改进。
关于c - 如何优化这段代码?,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/18682725/