假设我有以下三个数组:
[
[100, 110, 200, 300, 400],
[40, 90, 99, 150, 200],
[101, 202, 404, 505]
]
我将如何着手编写一个函数 getIntersectingRanges(arrays, range)
,它将返回 range
的最大“宽度”的所有范围,其中包含来自所有元素的 1 个或多个元素数组。
所以,如果 range=15
那么它将返回 [[100, 90, 99, 101], [100, 110, 99, 101], [200, 200, 202 ]]
。这些数组中的每一个都包含每个输入范围中的至少一个元素,并且范围的总“宽度”小于或等于 15(第一个是 11,第二个也是 11,第三个只有 3)。
它在概念上非常简单,但我一直很难弄清楚如何编写这样的函数。就像我不是在寻找一个完全充实的解决方案一样,我所需要的只是允许我这样做的算法基础(尽管我显然也很乐意接受一个完整的书面函数)。
由于有些人似乎难以理解这一点,让我举几个更简单的例子(虽然手写这些有点困难,如果我有什么地方出错了,请原谅):
input:
[[10, 20, 30, 40, 50, 60, 70, 80, 90], [55, 84]]
range:5
output:[[50, 55], [55, 60], [80, 84]]
input:
[[10, 20, 30], [40, 50]]
range:10
output:[[30, 40]]
input:
[[15, 30, 699], [16, 800], [10, 801], [11, 803]]
range:10
output:[[15, 16, 10, 11]]
所以我的方法是首先只取前两个数组,然后在第二个数组±范围内搜索第一个数组中的所有元素。到目前为止,它似乎是有道理的,但鉴于此开始,似乎不可能同时匹配上面示例中的第一个和第二个结果...因此,我们将不胜感激任何帮助。
最佳答案
此解决方案以一个对象为特征,该对象的值作为键,值作为给定数组的数组索引。
另一种方法是加快查找项的速度,如果索引位于可能发现的区域之外,则查找项会短路。
Example
Given array:
[ [100, 110, 200, 300, 400], [40, 90, 99, 150, 200], [101, 202, 404, 505] ]
Given range: 15
First sort the given values ascending.
Then iterate from the smallest value to the highest and look if values in range are in all arrays.
Array Values Comment ----- ---------------------------------------------------- -------------------------------------- 0 100 110 200 300 400 1 40 90 99 150 200 2 101 202 404 505 1 here is no other value in this range 1 1 0 2 <- first group, values are in all arrays 1 0 2 0 <- second group, values are in all arrays 0 2 0 only values of two arrays 2 0 only values of two arrays 1 here is no other value in this range 01 2 <- third group, values are in all arrays 2 here is no other value in this range 0 here is no other value in this range 0 2 only values of two arrays 2 here is no other value in this range
Result:
[ [[100], [90, 99], [101]], [[100, 110], [99], [101]], [[200], [200], [202]] ]
function intersection(arrays, range) {
var result = [], // result array
items = [], // all distinct items from arrays
indices = {}, // object for the array indices
temp, // temp array, for pushing a result
i, // counter for pushing values to temp
left = 0, pos = 0, lastPos, // look up range
allArrays, // temporary array for indicating if index is included
arraysLength = arrays.length, // arrays length
itemLength, // length of all items
leftValue, // literally the value from the left range
emptyArrays; // template for the test if all arrays are used
emptyArrays = Array.apply(Array, { length: arraysLength });
arrays.forEach(function (a, i) {
a.forEach(function (item) {
indices[item] = indices[item] || [];
indices[item].push(i);
});
});
items = Object.keys(indices).map(Number).sort(function (a, b) { return a - b; });
itemLength = items.length;
do {
temp = [];
allArrays = emptyArrays.slice(0);
leftValue = items[left];
pos = left;
while (pos < itemLength && items[pos] <= range + leftValue) {
temp.push(items[pos]);
indices[items[pos]].forEach(function (i) {
allArrays[i] = true;
});
pos++;
}
pos !== lastPos && allArrays.every(function (a) { return a; }) && result.push(temp);
left++;
lastPos = pos;
} while (pos < itemLength);
return result;
}
function test(arrays, range) {
var result = intersection(arrays, range);
document.write("<br>arrays:", JSON.stringify(arrays));
document.write("<br>range:", range);
document.write("<br>result:", JSON.stringify(result));
document.write("<br>---");
}
test([[100, 110, 200, 300, 400], [40, 90, 99, 150, 200], [101, 202, 404, 505]], 15);
test([[10, 20, 30, 40, 50, 60, 70, 80, 90], [55, 84]], 5);
test([[10, 20, 30], [40, 50]], 10);
test([[15, 30, 699], [16, 800], [10, 801], [11, 803]], 10);
// taken from the answer of http://stackoverflow.com/a/32868439/1447675 from DzinX
var LARGE_TEST_SIZE = 1000,
largeTest = function () {
var array = [];
for (var i = 0; i < LARGE_TEST_SIZE; ++i) {
var innerArray = [];
for (var j = 0; j < LARGE_TEST_SIZE; ++j) {
innerArray.push((i + j) * 10);
}
array.push(innerArray);
}
return array;
}(),
startTime;
startTime = Date.now();
document.write('<br>' + intersection(largeTest, 20).length + '<br>');
document.write('Duration [ms]: ' + (Date.now() - startTime) + '<br>');
与DzinX的解决方案比较
我刚刚更改了 console.log
至 document.write('<br>' ...
.
请收看Duration
在结果窗口中。
function findRanges(arrays, range) {
// Gather all items into one array:
var items = [];
arrays.forEach(function (array, arrayNumber) {
array.forEach(function (item) {
items.push({
value: item,
arrayNumber: arrayNumber
});
});
});
items.sort(function (left, right) {
return left.value - right.value;
});
var countByArray = [];
arrays.forEach(function () {
countByArray.push(0);
});
var arraysIncluded = 0;
var i = 0,
j = 0, // inclusive
spread = 0,
arrayCount = arrays.length,
itemCount = items.length,
result = [];
function includeItem(pos) {
var arrayNumber = items[pos].arrayNumber;
++countByArray[arrayNumber];
if (countByArray[arrayNumber] === 1) {
++arraysIncluded;
}
}
function excludeItem(pos) {
var arrayNumber = items[pos].arrayNumber;
--countByArray[arrayNumber];
if (countByArray[arrayNumber] === 0) {
--arraysIncluded;
}
}
function allArraysIncluded() {
return arraysIncluded === arrayCount;
}
function extractValue(item) {
return item.value;
}
function saveSpread(start, end) {
result.push(items.slice(start, end).map(extractValue));
}
// First item is already included.
includeItem(0);
while (j < (itemCount - 1)) {
// grow j while you can
while ((spread <= range) && (j < (itemCount - 1))) {
++j;
spread += items[j].value - items[j - 1].value;
includeItem(j);
}
if (spread <= range) {
// We ran out of items and the range is still OK, break out early:
break;
}
// Don't include the last item for checking:
excludeItem(j);
if (allArraysIncluded()) {
saveSpread(i, j);
}
// Include the violating item back and try to reduce the spread:
includeItem(j);
while ((spread > range) && (i < j)) {
spread -= items[i + 1].value - items[i].value;
excludeItem(i);
++i;
}
}
// last check after exiting the loop (j === (itemCount - 1))
if (allArraysIncluded()) {
saveSpread(i, j + 1);
}
return result;
}
function test(arrays, range) {
var result = findRanges(arrays, range);
document.write("<br>arrays:", JSON.stringify(arrays));
document.write("<br>range:", range);
document.write("<br>result:", JSON.stringify(result));
document.write("<br>---");
}
test([[100, 110, 200, 300, 400], [40, 90, 99, 150, 200], [101, 202, 404, 505]], 15);
test([[10, 20, 30, 40, 50, 60, 70, 80, 90], [55, 84]], 5);
test([[10, 20, 30], [40, 50]], 10);
test([[15, 30, 699], [16, 800], [10, 801], [11, 803]], 10);
// A large test (1 million items):
var LARGE_TEST_SIZE = 1000;
var largeTest = (function () {
var array = [];
for (var i = 0; i < LARGE_TEST_SIZE; ++i) {
var innerArray = [];
for (var j = 0; j < LARGE_TEST_SIZE; ++j) {
innerArray.push((i + j) * 10);
}
array.push(innerArray);
}
return array;
})();
var startTime
startTime = Date.now();
document.write('<br>' + findRanges(largeTest, 20).length); // 3
document.write('<br>Duration [ms]: ' + (Date.now() - startTime));
速度比较,不同浏览器
机器:Win 7/64,Core i7-2600 3.40 GHz
Version IE 11 Chrome 45.0 Firefox 40.0.3
------- -------------- -------------- --------------
DzinX 375 ms 688 ms 1323 ms
Nina 335 ms 122 ms 393 ms
关于javascript - n 个数组的基于范围的交集,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/32677714/