我有一组项目,我想将它们放入 N 个不同的桶中。每个项目都有一个与之关联的属性(大小),我希望每个存储桶中该属性的总和大致相等。确定这一点的最佳方法是什么?请注意,项目的大小范围相当大,在我使用的数据集中,最小大小为 1,最大大小为 325,220。
示例:
Item A - size 5
Item B - size 10
Item C - size 8
Item D - size 16
Item E - size 7
如果我想将它们分成 3 个桶,我会想要
Bucket 1: A, B
Bucket 2: C, E
Bucket 3: D
最佳答案
我最终实现了 paper 中描述的完整贪心算法。由 Joe Farrel 链接。我使用的完整 C# 代码如下:
public class Item
{
public int Id { get; }
public int Size { get; }
public Item(int id, int size)
{
Id = id;
size = size;
}
}
public class Partition
{
public int Index { get; }
public ImmutableList<Item> Items { get; } = ImmutableList<Item>.Empty;
public int Sum { get; }
public Partition(int index)
{
Index = index;
}
private Partition(int index, ImmutableList<Item> items, int sum)
{
Index = index;
Item = items;
Sum = sum;
}
public Partition Add(Item item) => new Partition(Index, Items.Add(item), Sum + item.Size);
public static double AverageDifference(ImmutableList<Partition> partitions)
{
var differences = new List<int>();
for (var i = 0; i < partitions.Count; i++)
{
var partition = partitions[i];
var otherPartitions = partitions.RemoveAt(i);
foreach (var otherPartition in otherPartitions)
{
differences.Add(Math.Abs(partition.Sum - otherPartition.Sum));
}
}
return differences.Average();
}
}
public class Node
{
public Item Item { get; set; }
public int Partition { get; set; }
public Node[] Children { get; set; }
}
private (Node tree, int totalSum) InitTree(IEnumerable<Item> items)
{
var root = new Node();
var totalSum = 0;
Node[] previousLevel = {root};
foreach (var item in items.OrderByDescending(i => i.Size))
{
totalSum += item.Size;
var currentLevel = new Node[_numPartitions];
for (var i = 0; i < _numPartitions; i++)
{
currentLevel[i] = new Node
{
Item = item,
Partition = i
};
}
foreach (var node in previousLevel)
{
node.Children = currentLevel;
}
previousLevel = currentLevel;
}
return (root, totalSum);
}
private ImmutableList<Partition> GetPartitions(Node tree, int totalSum)
{
var partitions = ImmutableList<Partition>.Empty;
for (var i = 0; i < _numPartitions; i++)
{
partitions = partitions.Add(new Partition(i));
}
return TraverseTree(tree, partitions, totalSum, double.MaxValue, ImmutableList<Partition>.Empty);
}
private ImmutableList<Partition> TraverseTree(Node node, ImmutableList<Partition> partitions, int totalSum, double bestDifference, ImmutableList<Partition> bestPartitions)
{
var currentPartitions = partitions;
if (node.Item != null) // skip root
{
// place item into its partition
var updatedPartition = currentPartitions[node.Partition].Add(node.Item);
currentPartitions = currentPartitions.SetItem(node.Partition, updatedPartition);
}
// if this is a leaf, partition is complete
if (node.Children == null)
{
return currentPartitions;
}
// terminate path if partition is sufficiently bad
var largestSum = currentPartitions.Max(p => p.Sum);
if (largestSum - (totalSum - largestSum) / (_numPartitions - 1) >= bestDifference)
{
return null;
}
// contintue to traverse tree in ascending partition size order
foreach (var partition in currentPartitions.OrderBy(p => p.Sum))
{
var nextNode = node.Children[partition.Index];
var nextPartitions = TraverseTree(nextNode, currentPartitions, totalSum, bestDifference, bestPartitions);
if (nextPartitions == null) // path was terminated
{
continue;
}
// if we hit a perfect parition set, return it
var nextDifference = Partition.AverageDifference(nextPartitions);
if (nextDifference <= 1)
{
return nextPartitions;
}
// hold on to the best partition
if (nextDifference < bestDifference)
{
bestDifference = nextDifference;
bestPartitions = nextPartitions;
}
}
return bestPartitions;
}
_numPartitions = 4
var items = GetItems()
var (tree, totalSum) = InitTree(items);
var partitions = GetPartitions(tree, totalSum);
关于基于桶总和对数字集进行桶化的算法,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/51252461/