python - 在 Python 中获取相差 N 或更多的最小坐标

标签 python database numpy scipy

假设我有一个坐标列表:

data = [
    [(10, 20), (100, 120), (0, 5), (50, 60)],
    [(13, 20), (300, 400), (100, 120), (51, 62)]
]

并且我想获取数据中每个列表中出现的所有元组,或者与列表中除其自身以外的所有元组相差 3 或更少的任何元组。我怎样才能在 Python 中有效地做到这一点?

对于上面的例子,结果应该是:

[[(100, 120), # since it occurs in both lists
  (10, 20), (13, 20), # since they differ by only 3 
  (50, 60), (51, 60)]]

(0, 5) 和 (300, 400) 不会被包括在内,因为它们没有出现在两个列表中,并且与列表中的元素除了它们自己的元素相差 3 或更少。

如何计算?谢谢。

最佳答案

这个的简单实现会很慢:O(n^2),针对每个节点测试每个节点。使用树来加速它。

此实现使用简单的四叉树来提高搜索效率。这不会尝试平衡树,因此排序不当的点列表可能会使其效率非常低。对于很多用途,简单地洗牌列表可能就足够了。请确保不要向它传递大量按坐标排序的项目,因为这会将其缩减为链表。

这里的优化很简单:如果我们在某个点的 3 个单位的欧几里德距离内寻找项目,并且我们知道子树中的所有项目都至少向右 3 个单位,那么任何点都不可能该区域可能距离不到 3 个单位。

此代码是公共(public)领域。尽量不要将其作为家庭作业上交。

#!/usr/bin/python
import math

def euclidean_distance(pos1, pos2):
    x = math.pow(pos1[0] - pos2[0], 2)
    y = math.pow(pos1[1] - pos2[1], 2)
    return math.sqrt(x + y)

class QuadTreeNode(object):
    def __init__(self, pos):
        """
        Create a QuadTreeNode at the specified position.  pos must be an (x, y) tuple.
        Children are classified by quadrant. 
        """
        # Children of this node are ordered TL, TR, BL, BL (origin top-left).
        self.children = [None, None, None, None]
        self.pos = pos

    def classify_node(self, pos):
        """
        Return which entry in children can contain pos.  If pos is equal to this
        node, return None.

        >>> node = QuadTreeNode((10, 20))
        >>> node.classify_node((10, 20)) == None
        True
        >>> node.classify_node((2, 2))
        0
        >>> node.classify_node((50, 2))
        1
        >>> node.classify_node((2, 50))
        2
        >>> node.classify_node((50, 50))
        3

        X boundary condition:
        >>> node.classify_node((10, 2))
        0
        >>> node.classify_node((10, 50))
        2

        Y boundary conditoin:
        >>> node.classify_node((2, 20))
        0
        >>> node.classify_node((50, 20))
        1
        """
        if pos == self.pos:
            return None
        if pos[0] <= self.pos[0]:       # Left
            if pos[1] <= self.pos[1]:   # Top-left
                return 0
            else:                       # Bottom-left
                return 2
        else:                           # Right
            if pos[1] <= self.pos[1]:   # Top-right
                return 1
            else:                       # Bottom-right
                return 3
        assert False, "not reached"

    def add_node(self, node):
        """
        Add a specified point under this node.
        """
        type = self.classify_node(node.pos)
        if type is None:
            # node is equal to self, so this is a duplicate node.  Ignore it.
            return

        if self.children[type] is None:
            self.children[type] = node
        else:
            # We already have a node there; recurse and add it to the child.
            self.children[type].add_node(node)

    @staticmethod
    def CreateQuadTree(data):
        """
        Create a quad tree from the specified list of points.
        """
        root = QuadTreeNode(data[0])
        for val in data[1:]:
            node = QuadTreeNode(val)
            root.add_node(node)

        return root

    def distance_from_pos(self, pos):
        return euclidean_distance(self.pos, pos)

    def __str__(self): return str(self.pos)

    def find_point_within_range(self, pos, distance):
        """
        If a point exists within the specified Euclidean distance of the specified
        point, return it.  Otherwise, return None.
        """
        if self.distance_from_pos(pos) <= distance:
            return self

        for axis in range(0, 4):
            if self.children[axis] is None:
                # We don't have a node on this axis.
                continue

            # If moving forward on this axis would permanently put us out of range of
            # the point, short circuit the search on that axis.
            if axis in (0, 2): # axis moves left on X
                if self.pos[0] < pos[0] - distance:
                    continue
            if axis in (1, 3): # axis moves right on X
                if self.pos[0] > pos[0] + distance:
                    continue
            if axis in (0, 1): # axis moves up on Y
                if self.pos[1] < pos[1] - distance:
                    continue
            if axis in (2, 3): # axis moves down on Y
                if self.pos[1] > pos[1] + distance:
                    continue
            node = self.children[axis].find_point_within_range(pos, distance)
            if node is not None:
                return node
        return None

    @staticmethod
    def find_point_in_range_for_all_trees(point, trees, distance):
        """
        If all QuadTreeNodes in trees contain a a point within the specified distance
        of point, return True,  Otherwise, return False.
        """
        for tree in trees:
            if tree.find_point_within_range(point, distance) is None:
                return False
        return True

def test_naive(data, distance):
    def find_point_in_list(iter, point):
        for i in iter:
            if euclidean_distance(i, point) <= distance:
                return True
        return False

    def find_point_in_all_lists(point):
        for d in data:
            if not find_point_in_list(d, point):
                return False
        return True

    results = []
    for d in data:
        for point in d:
            if find_point_in_all_lists(point):
                results.append(point)
    return set(results)

def test_tree(data, distance):
    trees = [QuadTreeNode.CreateQuadTree(d) for d in data]
    results = []
    for d in data:
        for point in d:
            if QuadTreeNode.find_point_in_range_for_all_trees(point, trees, 3):
                results.append(point)
    return set(results)

def test():
    sample_data = [
            [(10, 20), (100, 120), (0, 5), (50, 60)],
            [(13, 20), (300, 400), (100, 120), (51, 62)]
    ]
    result1 = test_naive(sample_data, 3)
    result2 = test_tree(sample_data, 3)
    print result1
    assert result1 == result2

    # Loosely validate the tree algorithm against a lot of sample data, and compare
    # performance while we're at it:
    def random_data():
        import random
        return [(random.randint(0,1000), random.randint(0,1000)) for d in range(0,500)]
    data = [random_data() for x in range(0,10)]

    print "Searching (naive)..."
    result1 = test_naive(data, 3)

    print "Searching (tree)..."
    result2 = test_tree(data, 3)
    assert result1 == result2


if __name__ == "__main__":
    test()

    import doctest
    doctest.testmod()

关于python - 在 Python 中获取相差 N 或更多的最小坐标,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/2953878/

相关文章:

mysql - 如何在 SQL 中使用 NULL 存储缺失的 FLOAT 值

python - Pandas 时间序列,在一列上分箱并累积在分箱中花费的时间

python - "Invalid parameter type"(numpy.int64) 使用 executemany() 插入行时

php - 如何通过数据库中的 Bootstrap 创建动态模态窗口?

php - 将用户分配到我的 PHP 应用程序中的多个项目,最佳方法?

python - 对绘图中的拟合设置限制

python - 为什么在 numpy `nan == nan` 中为 False 而 [nan] 中的 nan 为 True?

python - 如何创建一个包含每个项目复选框的组合框?

python - scikits-learn pca 降维问题

python - 如何将 (3,) numpy 向量转换为 (2,2,3) 矩阵?