我想对 python implementation 进行 cythonise Sutherland-Hogman algorithm的。该算法根据非常简单的规则(在边的内部或外部等)更新顶点列表,但细节并不重要。这是Python版本,它接受顺时针方向的多边形顶点列表。例如那些:
sP=[(50, 150), (200, 50), (350, 150), (350, 300), (250, 300), (200, 250), (150, 350),(100, 250), (100, 200)]
cP=[(100, 100), (300, 100), (300, 300), (100, 300)]
并计算它们的交集:
inter=clip(sP, cP)
这是在rosettacode 上找到的代码,稍加修改即可在没有交集的情况下返回空列表。
def clip(subjectPolygon, clipPolygon):
def inside(p):
return(cp2[0]-cp1[0])*(p[1]-cp1[1]) > (cp2[1]-cp1[1])*(p[0]-cp1[0])
def computeIntersection():
dc = [ cp1[0] - cp2[0], cp1[1] - cp2[1] ]
dp = [ s[0] - e[0], s[1] - e[1] ]
n1 = cp1[0] * cp2[1] - cp1[1] * cp2[0]
n2 = s[0] * e[1] - s[1] * e[0]
n3 = 1.0 / (dc[0] * dp[1] - dc[1] * dp[0])
return [(n1*dp[0] - n2*dc[0]) * n3, (n1*dp[1] - n2*dc[1]) * n3]
outputList = subjectPolygon
cp1 = clipPolygon[-1]
for clipVertex in clipPolygon:
cp2 = clipVertex
inputList = outputList
outputList = []
s = inputList[-1]
for subjectVertex in inputList:
e = subjectVertex
if inside(e):
if not inside(s):
outputList.append(computeIntersection())
outputList.append(e)
elif inside(s):
outputList.append(computeIntersection())
s = e
if len(outputList)<1:
return []
cp1 = cp2
return(outputList)
这个函数对于我的应用程序来说非常慢,所以我尝试使用 numpy 对其进行 cythonize。这是我的 cython 版本。我必须在剪辑外部定义两个函数,因为我收到有关缓冲区输入的错误消息。
cython1
cimport cython
import numpy as np
cimport numpy as np
def clip(np.ndarray[np.float32_t, ndim=2] subjectPolygon,np.ndarray[np.float32_t, ndim=2] clipPolygon):
outputList = list(subjectPolygon)
cdef np.ndarray[np.float32_t, ndim=1] cp1 = clipPolygon[-1,:]
cdef np.ndarray[np.float32_t, ndim=1] cp2
for i in xrange(clipPolygon.shape[0]):
cp2 = clipPolygon[i]
inputList = outputList
outputList = []
s = inputList[-1]
for subjectVertex in inputList:
e = subjectVertex
if inside(e, cp1, cp2):
if not inside(s, cp1, cp2):
outputList.append(computeIntersection(cp1, cp2, e, s))
outputList.append(e)
elif inside(s, cp1, cp2):
outputList.append(computeIntersection(cp1, cp2, e, s))
s = e
if len(outputList)<1:
return []
cp1 = cp2
return(outputList)
def computeIntersection(np.ndarray[np.float32_t, ndim=1] cp1, np.ndarray[np.float32_t, ndim=1] cp2, np.ndarray[np.float32_t, ndim=1] e, np.ndarray[np.float32_t, ndim=1] s):
cdef np.ndarray[np.float32_t, ndim=1] dc = cp1-cp2
cdef np.ndarray[np.float32_t, ndim=1] dp = s-e
cdef np.float32_t n1 = cp1[0] * cp2[1] - cp1[1] * cp2[0]
cdef np.float32_t n2 = s[0] * e[1] - s[1] * e[0]
cdef np.float32_t n3 = 1.0 / (dc[0] * dp[1] - dc[1] * dp[0])
cdef np.ndarray[np.float32_t, ndim=1] res=np.array([(n1*dp[0] - n2*dc[0]) * n3, (n1*dp[1] - n2*dc[1]) * n3], dtype=np.float32)
return res
def inside(np.ndarray[np.float32_t, ndim=1] p, np.ndarray[np.float32_t, ndim=1] cp1, np.ndarray[np.float32_t, ndim=1] cp2):
cdef bint b=(cp2[0]-cp1[0])*(p[1]-cp1[1]) > (cp2[1]-cp1[1])*(p[0]-cp1[0])
return b
当我对两个版本进行计时时,我只获得了两倍的加速,我至少需要 10 倍(或 100 倍!)。有事可做吗? 如何使用 Cython 处理列表?
编辑1:我遵循@DavidW的建议,我分配numpy数组并修剪它们,而不是使用列表,我现在使用cdef函数,这应该会带来10倍的速度,不幸的是我是根本没有看到加速!
cython2
cimport cython
import numpy as np
cimport numpy as np
@cython.boundscheck(False)
@cython.wraparound(False)
def clip(np.ndarray[np.float32_t, ndim=2] subjectPolygon,np.ndarray[np.float32_t, ndim=2] clipPolygon):
return clip_in_c(subjectPolygon, clipPolygon)
@cython.boundscheck(False)
@cython.wraparound(False)
cdef np.ndarray[np.float32_t, ndim=2] clip_in_c(np.ndarray[np.float32_t, ndim=2] subjectPolygon,np.ndarray[np.float32_t, ndim=2] clipPolygon):
cdef int cp_size=clipPolygon.shape[0]
cdef int outputList_effective_size=subjectPolygon.shape[0]
cdef int inputList_effective_size=outputList_effective_size
#We allocate a fixed size array of size
cdef int max_size_inter=outputList_effective_size*cp_size
cdef int k=-1
cdef np.ndarray[np.float32_t, ndim=2] outputList=np.empty((max_size_inter,2), dtype=np.float32)
cdef np.ndarray[np.float32_t, ndim=2] inputList=np.empty((max_size_inter,2), dtype=np.float32)
cdef np.ndarray[np.float32_t, ndim=1] cp1 = clipPolygon[cp_size-1,:]
cdef np.ndarray[np.float32_t, ndim=1] cp2=np.empty((2,), dtype=np.float32)
outputList[:outputList_effective_size]=subjectPolygon
for i in xrange(cp_size):
cp2 = clipPolygon[i]
inputList[:outputList_effective_size] = outputList[:outputList_effective_size]
inputList_effective_size=outputList_effective_size
outputList_effective_size=0
s = inputList[inputList_effective_size-1]
for j in xrange(inputList_effective_size):
e = inputList[j]
if inside(e, cp1, cp2):
if not inside(s, cp1, cp2):
k+=1
outputList[k]=computeIntersection(cp1, cp2, e, s)
k+=1
outputList[k]=e
elif inside(s, cp1, cp2):
k+=1
outputList[k]=computeIntersection(cp1, cp2, e, s)
s = e
if k<0:
return np.empty((0,0),dtype=np.float32)
outputList_effective_size=k+1
cp1 = cp2
k=-1
return outputList[:outputList_effective_size]
cdef np.ndarray[np.float32_t, ndim=1] computeIntersection(np.ndarray[np.float32_t, ndim=1] cp1, np.ndarray[np.float32_t, ndim=1] cp2, np.ndarray[np.float32_t, ndim=1] e, np.ndarray[np.float32_t, ndim=1] s):
cdef np.ndarray[np.float32_t, ndim=1] dc = cp1-cp2
cdef np.ndarray[np.float32_t, ndim=1] dp = s-e
cdef np.float32_t n1 = cp1[0] * cp2[1] - cp1[1] * cp2[0]
cdef np.float32_t n2 = s[0] * e[1] - s[1] * e[0]
cdef np.float32_t n3 = 1.0 / (dc[0] * dp[1] - dc[1] * dp[0])
return np.array([(n1*dp[0] - n2*dc[0]) * n3, (n1*dp[1] - n2*dc[1]) * n3], dtype=np.float32)
cdef bint inside(np.ndarray[np.float32_t, ndim=1] p, np.ndarray[np.float32_t, ndim=1] cp1, np.ndarray[np.float32_t, ndim=1] cp2):
return (cp2[0]-cp1[0])*(p[1]-cp1[1]) > (cp2[1]-cp1[1])*(p[0]-cp1[0])
这是基准:
import numpy as np
from cython1 import clip_cython1
from cython2 import clip_cython2
import time
sp=np.array([[50, 150],[200,50],[350,150],[250,300],[200,250],[150,350],[100,250],[100,200]],dtype=np.float32)
cp=np.array([[100,100],[300,100],[300,300],[100,300]],dtype=np.float32)
t1=time.time()
for i in xrange(120000):
a=clip_cython1(sp, cp)
t2=time.time()
print (t2-t1)
t1=time.time()
for i in xrange(120000):
a=clip_cython2(sp, cp)
t2=time.time()
print (t2-t1)
39.45
44.12
第二个更糟糕!
编辑2 来自 CodeReview 的 @Peter Taylor 的最佳答案使用了这样一个事实:每次计算 inside_s 时它都是多余的,因为 s=e 并且您已经计算了 inside_e (并分解 dc 和 n1超出了功能,但没有多大帮助)。
cimport cython
import numpy as np
cimport numpy as np
def clip(np.ndarray[np.float32_t, ndim=2] subjectPolygon,np.ndarray[np.float32_t, ndim=2] clipPolygon):
outputList = list(subjectPolygon)
cdef np.ndarray[np.float32_t, ndim=1] cp1 = clipPolygon[-1,:]
cdef np.ndarray[np.float32_t, ndim=1] cp2
cdef bint inside_e, inside_s
cdef np.float32_t n1
cdef np.ndarray[np.float32_t, ndim=1] dc
cdef int i
for i in range(clipPolygon.shape[0]):
cp2 = clipPolygon[i]
#intermediate
n1 = cp1[0] * cp2[1] - cp1[1] * cp2[0]
dc=cp1-cp2
inputList = outputList
outputList = []
s = inputList[-1]
inside_s=inside(s, cp1, dc)
for index, subjectVertex in enumerate(inputList):
e = subjectVertex
inside_e=inside(e, cp1, dc)
if inside_e:
if not inside_s:
outputList.append(computeIntersection(dc, n1, e, s))
outputList.append(e)
elif inside_s:
outputList.append(computeIntersection(dc, n1, e, s))
s = e
inside_s=inside_e
if len(outputList)<1:
return []
cp1 = cp2
return(outputList)
cdef np.ndarray[np.float32_t, ndim=1] computeIntersection(np.ndarray[np.float32_t, ndim=1] dc, np.float32_t n1, np.ndarray[np.float32_t, ndim=1] e, np.ndarray[np.float32_t, ndim=1] s):
cdef np.ndarray[np.float32_t, ndim=1] dp = s-e
cdef np.float32_t n2 = s[0] * e[1] - s[1] * e[0]
cdef np.float32_t n3 = 1.0 / (dc[0] * dp[1] - dc[1] * dp[0])
return np.array([(n1*dp[0] - n2*dc[0]) * n3, (n1*dp[1] - n2*dc[1]) * n3], dtype=np.float32)
cdef bint inside(np.ndarray[np.float32_t, ndim=1] p, np.ndarray[np.float32_t, ndim=1] cp1, np.ndarray[np.float32_t, ndim=1] dc):
return (-dc[0])*(p[1]-cp1[1]) > (-dc[1])*(p[0]-cp1[0])
混合两个版本(只有 numpy 数组和 @Peter Taylor 的技巧效果稍差)。不知道为什么?可能是因为我们必须分配一长串大小为 sP.shape[0]*cp.shape[0] 的列表?
最佳答案
我的速度提高了 15 倍:
In [12]: timeit clippy.clip(clippy.sP, clippy.cP)
10000 loops, best of 3: 126 µs per loop
In [13]: timeit clippy.clip1(clippy.sP, clippy.cP)
10000 loops, best of 3: 75.9 µs per loop
In [14]: timeit myclip.clip(clippy.sP, clippy.cP)
10000 loops, best of 3: 47.1 µs per loop
In [15]: timeit myclip.clip1(clippy.sP, clippy.cP)
100000 loops, best of 3: 8.2 µs per loop
clippy.clip
是您的原始函数。
clippy.clip1
也是 Python,但用元组解包替换了大部分列表索引。
def clip1(subjectPolygon, clipPolygon):
def inside(p0,p1):
return(cp20-cp10)*(p1-cp11) > (cp21-cp11)*(p0-cp10)
def computeIntersection():
dc0, dc1 = cp10 - cp20, cp11 - cp21
dp0, dp1 = s0 - e0, s1 - e1
n1 = cp10 * cp21 - cp11 * cp20
n2 = s0 * e1 - s1 * e0
n3 = 1.0 / (dc0 * dp1 - dc1 * dp0)
return [(n1*dp0 - n2*dc0) * n3, (n1*dp1 - n2*dc1) * n3]
outputList = subjectPolygon
cp10, cp11 = clipPolygon[-1]
for cp20, cp21 in clipPolygon:
inputList = outputList
#print(inputList)
outputList = []
s0,s1 = inputList[-1]
s_in = inside(s0, s1)
for e0, e1 in inputList:
e_in = inside(e0, e1)
if e_in:
if not s_in:
outputList.append(computeIntersection())
outputList.append((e0, e1))
elif s_in:
outputList.append(computeIntersection())
s0,s1,s_in = e0,e1,e_in
if len(outputList)<1:
return []
cp10, cp11 = cp20, cp21
return outputList
myclip.clip
是原始的cythonized
;仍然使用列表,而不是数组。
myclip.clip1
是第二个cythonized
:
cdef computeIntersection1(double cp10, double cp11, double cp20, double cp21,double s0, double s1,double e0, double e1):
dc0, dc1 = cp10 - cp20, cp11 - cp21
dp0, dp1 = s0 - e0, s1 - e1
n1 = cp10 * cp21 - cp11 * cp20
n2 = s0 * e1 - s1 * e0
n3 = 1.0 / (dc0 * dp1 - dc1 * dp0)
return (n1*dp0 - n2*dc0) * n3, (n1*dp1 - n2*dc1) * n3
cdef cclip1(subjectPolygon, clipPolygon):
cdef double cp10, cp11, cp20, cp21
cdef double s0, s1, e0, e1
cdef double s_in, e_in
outputList = subjectPolygon
cp10, cp11 = clipPolygon[-1]
for cp20, cp21 in clipPolygon:
inputList = outputList
#print(inputList)
outputList = []
s0,s1 = inputList[-1]
#s_in = inside(s0, s1)
s_in = (cp20-cp10)*(s1-cp11) - (cp21-cp11)*(s0-cp10)
for e0, e1 in inputList:
#e_in = inside(e0, e1)
e_in = (cp20-cp10)*(e1-cp11) - (cp21-cp11)*(e0-cp10)
if e_in>0:
if s_in<=0:
outputList.append(computeIntersection1(cp10,cp11,cp20,cp21,s0,s1,e0,e1))
outputList.append((e0, e1))
elif s_in>0:
outputList.append(computeIntersection1(cp10,cp11,cp20,cp21,s0,s1,e0,e1))
s0,s1,s_in = e0,e1,e_in
if len(outputList)<1:
return []
cp10, cp11 = cp20, cp21
return outputList
def clip1(subjectPolygon, clipPolygon):
return cclip1(subjectPolygon, clipPolygon)
-a
注释的html
仍然显示相当多的黄色,但大多数计算不需要Python。在 compute
函数中,有一个 Python 检查 0 除数,以及用于构建返回元组的 Python 调用。并且元组解包仍然调用Python。所以还有改进的空间。
在 Python 代码中,使用 numpy 没有任何优势。列表很小,并且列表元素访问速度更快。但在 cython 中,数组可能是类型化内存 View 和纯 C 代码的垫脚石。
其他时间。
您的第二次编辑:
In [24]: timeit edit2.clip(np.array(clippy.sP,np.float32), np.array(clippy.cP,np
...: .float32))
1000 loops, best of 3: 228 µs per loop
@Matt 的
边界框
In [25]: timeit clippy.polygon_clip(clippy.rp,clippy.cp,100,100,300,300)
1000 loops, best of 3: 208 µs per loop
扩展类
我通过定义扩展类清理了代码
cdef class Point:
cdef public double x, y
def __init__(self, x, y):
self.x = x
self.y = y
这让我可以写出这样的东西:
s = inputList[-1]
s_in = insideP(s, cp1, cp2)
“cover”函数必须将元组列表转换为点和 v.v 列表。
sP = [Point(*x) for x in subjectPolygon]
这会造成轻微的速度损失。
关于python - 使用 Cython 处理可变大小的列表,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/44765229/