接听another question ,我编写了下面的程序来比较排序数组中的不同搜索方法。基本上我比较了插值搜索的两种实现和二分搜索的一种。我通过计算不同变体所花费的周期(使用相同的数据集)来比较性能。
不过,我确信有办法优化这些功能,让它们变得更快。有人对如何使此搜索功能更快有任何想法吗? C 或 C++ 中的解决方案是可以接受的,但我需要它来处理具有 100000 个元素的数组。
#include <stdlib.h>
#include <stdio.h>
#include <time.h>
#include <stdint.h>
#include <assert.h>
static __inline__ unsigned long long rdtsc(void)
{
unsigned long long int x;
__asm__ volatile (".byte 0x0f, 0x31" : "=A" (x));
return x;
}
int interpolationSearch(int sortedArray[], int toFind, int len) {
// Returns index of toFind in sortedArray, or -1 if not found
int64_t low = 0;
int64_t high = len - 1;
int64_t mid;
int l = sortedArray[low];
int h = sortedArray[high];
while (l <= toFind && h >= toFind) {
mid = low + (int64_t)((int64_t)(high - low)*(int64_t)(toFind - l))/((int64_t)(h-l));
int m = sortedArray[mid];
if (m < toFind) {
l = sortedArray[low = mid + 1];
} else if (m > toFind) {
h = sortedArray[high = mid - 1];
} else {
return mid;
}
}
if (sortedArray[low] == toFind)
return low;
else
return -1; // Not found
}
int interpolationSearch2(int sortedArray[], int toFind, int len) {
// Returns index of toFind in sortedArray, or -1 if not found
int low = 0;
int high = len - 1;
int mid;
int l = sortedArray[low];
int h = sortedArray[high];
while (l <= toFind && h >= toFind) {
mid = low + ((float)(high - low)*(float)(toFind - l))/(1+(float)(h-l));
int m = sortedArray[mid];
if (m < toFind) {
l = sortedArray[low = mid + 1];
} else if (m > toFind) {
h = sortedArray[high = mid - 1];
} else {
return mid;
}
}
if (sortedArray[low] == toFind)
return low;
else
return -1; // Not found
}
int binarySearch(int sortedArray[], int toFind, int len)
{
// Returns index of toFind in sortedArray, or -1 if not found
int low = 0;
int high = len - 1;
int mid;
int l = sortedArray[low];
int h = sortedArray[high];
while (l <= toFind && h >= toFind) {
mid = (low + high)/2;
int m = sortedArray[mid];
if (m < toFind) {
l = sortedArray[low = mid + 1];
} else if (m > toFind) {
h = sortedArray[high = mid - 1];
} else {
return mid;
}
}
if (sortedArray[low] == toFind)
return low;
else
return -1; // Not found
}
int order(const void *p1, const void *p2) { return *(int*)p1-*(int*)p2; }
int main(void) {
int i = 0, j = 0, size = 100000, trials = 10000;
int searched[trials];
srand(-time(0));
for (j=0; j<trials; j++) { searched[j] = rand()%size; }
while (size > 10){
int arr[size];
for (i=0; i<size; i++) { arr[i] = rand()%size; }
qsort(arr,size,sizeof(int),order);
unsigned long long totalcycles_bs = 0;
unsigned long long totalcycles_is_64 = 0;
unsigned long long totalcycles_is_float = 0;
unsigned long long totalcycles_new = 0;
int res_bs, res_is_64, res_is_float, res_new;
for (j=0; j<trials; j++) {
unsigned long long tmp, cycles = rdtsc();
res_bs = binarySearch(arr,searched[j],size);
tmp = rdtsc(); totalcycles_bs += tmp - cycles; cycles = tmp;
res_is_64 = interpolationSearch(arr,searched[j],size);
assert(res_is_64 == res_bs || arr[res_is_64] == searched[j]);
tmp = rdtsc(); totalcycles_is_64 += tmp - cycles; cycles = tmp;
res_is_float = interpolationSearch2(arr,searched[j],size);
assert(res_is_float == res_bs || arr[res_is_float] == searched[j]);
tmp = rdtsc(); totalcycles_is_float += tmp - cycles; cycles = tmp;
}
printf("----------------- size = %10d\n", size);
printf("binary search = %10llu\n", totalcycles_bs);
printf("interpolation uint64_t = %10llu\n", totalcycles_is_64);
printf("interpolation float = %10llu\n", totalcycles_is_float);
printf("new = %10llu\n", totalcycles_new);
printf("\n");
size >>= 1;
}
}
最佳答案
如果您对数据的内存布局有一定的控制权,您可能需要查看 Judy 数组。
或者说一个更简单的想法:二分搜索总是将搜索空间减半。可以通过插值找到最佳切点(切点不应该是预期关键所在的位置,而是最小化下一步搜索空间的统计期望的点)。这最大限度地减少了步骤的数量,但......并非所有步骤都具有相同的成本。如果可以保持局部性,分层存储器允许在单个测试的同时执行多个测试。由于二分搜索的前 M 步最多只涉及 2**M 个唯一元素,将这些存储在一起可以更好地减少每个缓存行获取(而不是每次比较)的搜索空间,这在现实世界中具有更高的性能。
n-ary 树在此基础上工作,然后 Judy 数组添加了一些不太重要的优化。
底线:即使是“随机存取存储器”(RAM)在顺序访问时也比随机访问要快。搜索算法应该充分利用这一事实。
关于c++ - 排序数组最快的搜索方法是什么?,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/4753977/