我构建了两个包含图像直方图值的字典。每个字典都以图像文件的文件名作为键,并以三个一维向量的列表作为其值。
示例: {'someFileName.jpg' : ['forecolor=2,3,5,5,6','edge=2,4,5','texture= 5,4,3']}
这是我的一本词典的实际表示:
字典1
{'/Users/images/Transcend-8GB-Class-10-SDHC-Flash-Memory-Card.jpg': ['fcolor=2,4,14,5,0,0,0,0,0,0,0,0,0,0,12,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15,6,0,0,0,0,1,0,0,0,0,0,0,0,0,0,20,9,0,0,0,2,2,0,0,0,0,0,0,0,0,0,13,6,0,0,0,1,0,0,0,0,0,0,0,0,0,0,10,8,0,0,0,1,2,0,0,0,0,0,0,0,0,0,17,17,0,0,0,3,6,0,0,0,0,0,0,0,0,0,7,5,0,0,0,2,0,0,0,0,0,0,0,0,0,0,4,3,0,0,0,1,1,0,0,0,0,0,0,0,0,0,6,6,0,0,0,2,3', 'edge=1,252,1,32,124,194,63,252,67,15,240,1,7,244,66,47,0,192,63', 'texture=1,78,27,37,13,6,6,7,78']}
字典2
{'/Users/images/kodax-camera-M531.jpg': ['fcolor=2,74,6,20,30,1,2,0,1,0,0,0,1,3,2,0,0,0,0,0,1,1,1,0,0,2,0,0,0,2,2,0,0,0,0,0,2,2,1,0,0,5,0,0,0,1,4,0,0,0,0,0,2,2,1,0,0,1,0,0,0,3,1,0,0,0,0,0,1,1,0,0,0,3,0,0,0,1,2,0,0,0,0,0,2,2,1,0,0,4,0,0,0,0,5,0,0,0,0,0,2,1,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0', 'edge=1,4,1,88,128,22,8,39,25,142,230,226,31,60,64,255,252,12,76', 'texture=1,15,32,31,28,19,16,12,98']}
我的最终目标是将其中两个字典传递给一个方法并实际运行余弦值,
示例:因此每个字典都将列表作为其值,因此对于每个字典键,我想在字典1的key1、velu1与字典2的key1、value1之间进行向量乘法,
我有向量乘法函数,所以我试图弄清楚如何正确迭代,我正在考虑使用yield函数,但当我尝试时它并没有真正起作用。这是我到目前为止所拥有的:
def cosignSimilarity(image1VectorDict, image2VectorDict):
for image1Key, image2Value in image1VectorDict.iteritems():
print image1Key
for aValue in image1Value:
print aValue
for image2Key, image2Value in image2VectorDict.iteritems():
for eValue in image2Value:
print aValue
print "\n"
print eValue
仅供引用:我并不是在求余弦计算方面的帮助。
这就是我当前的代码吐出数据的方式,如果我可以将键与键从一个字典隔离到另一个字典,那么我就可以完成其余的工作,例如计算余弦值。
First Dictionary
{'/Users/test/Transcend-8GB-Class-10-SDHC-Flash-Memory-Card.jpg': ['fcolor=2,4,14,5,0,0,0,0,0,0,0,0,0,0,12,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15,6,0,0,0,0,1,0,0,0,0,0,0,0,0,0,20,9,0,0,0,2,2,0,0,0,0,0,0,0,0,0,13,6,0,0,0,1,0,0,0,0,0,0,0,0,0,0,10,8,0,0,0,1,2,0,0,0,0,0,0,0,0,0,17,17,0,0,0,3,6,0,0,0,0,0,0,0,0,0,7,5,0,0,0,2,0,0,0,0,0,0,0,0,0,0,4,3,0,0,0,1,1,0,0,0,0,0,0,0,0,0,6,6,0,0,0,2,3', 'edge=1,252,1,32,124,194,63,252,67,15,240,1,7,244,66,47,0,192,63', 'texture=1,78,27,37,13,6,6,7,78']}
------------------
Second Dictionary
{'/Users/test/kodax-camera-M531.jpg': ['fcolor=2,74,6,20,30,1,2,0,1,0,0,0,1,3,2,0,0,0,0,0,1,1,1,0,0,2,0,0,0,2,2,0,0,0,0,0,2,2,1,0,0,5,0,0,0,1,4,0,0,0,0,0,2,2,1,0,0,1,0,0,0,3,1,0,0,0,0,0,1,1,0,0,0,3,0,0,0,1,2,0,0,0,0,0,2,2,1,0,0,4,0,0,0,0,5,0,0,0,0,0,2,1,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0', 'edge=1,4,1,88,128,22,8,39,25,142,230,226,31,60,64,255,252,12,76', 'texture=1,15,32,31,28,19,16,12,98']}
++++++++++++++++++
/Users/test/Transcend-8GB-Class-10-SDHC-Flash-Memory-Card.jpg
fcolor=2,4,14,5,0,0,0,0,0,0,0,0,0,0,12,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15,6,0,0,0,0,1,0,0,0,0,0,0,0,0,0,20,9,0,0,0,2,2,0,0,0,0,0,0,0,0,0,13,6,0,0,0,1,0,0,0,0,0,0,0,0,0,0,10,8,0,0,0,1,2,0,0,0,0,0,0,0,0,0,17,17,0,0,0,3,6,0,0,0,0,0,0,0,0,0,7,5,0,0,0,2,0,0,0,0,0,0,0,0,0,0,4,3,0,0,0,1,1,0,0,0,0,0,0,0,0,0,6,6,0,0,0,2,3
fcolor=2,4,14,5,0,0,0,0,0,0,0,0,0,0,12,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15,6,0,0,0,0,1,0,0,0,0,0,0,0,0,0,20,9,0,0,0,2,2,0,0,0,0,0,0,0,0,0,13,6,0,0,0,1,0,0,0,0,0,0,0,0,0,0,10,8,0,0,0,1,2,0,0,0,0,0,0,0,0,0,17,17,0,0,0,3,6,0,0,0,0,0,0,0,0,0,7,5,0,0,0,2,0,0,0,0,0,0,0,0,0,0,4,3,0,0,0,1,1,0,0,0,0,0,0,0,0,0,6,6,0,0,0,2,3
fcolor=2,74,6,20,30,1,2,0,1,0,0,0,1,3,2,0,0,0,0,0,1,1,1,0,0,2,0,0,0,2,2,0,0,0,0,0,2,2,1,0,0,5,0,0,0,1,4,0,0,0,0,0,2,2,1,0,0,1,0,0,0,3,1,0,0,0,0,0,1,1,0,0,0,3,0,0,0,1,2,0,0,0,0,0,2,2,1,0,0,4,0,0,0,0,5,0,0,0,0,0,2,1,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0
fcolor=2,4,14,5,0,0,0,0,0,0,0,0,0,0,12,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15,6,0,0,0,0,1,0,0,0,0,0,0,0,0,0,20,9,0,0,0,2,2,0,0,0,0,0,0,0,0,0,13,6,0,0,0,1,0,0,0,0,0,0,0,0,0,0,10,8,0,0,0,1,2,0,0,0,0,0,0,0,0,0,17,17,0,0,0,3,6,0,0,0,0,0,0,0,0,0,7,5,0,0,0,2,0,0,0,0,0,0,0,0,0,0,4,3,0,0,0,1,1,0,0,0,0,0,0,0,0,0,6,6,0,0,0,2,3
edge=1,4,1,88,128,22,8,39,25,142,230,226,31,60,64,255,252,12,76
fcolor=2,4,14,5,0,0,0,0,0,0,0,0,0,0,12,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15,6,0,0,0,0,1,0,0,0,0,0,0,0,0,0,20,9,0,0,0,2,2,0,0,0,0,0,0,0,0,0,13,6,0,0,0,1,0,0,0,0,0,0,0,0,0,0,10,8,0,0,0,1,2,0,0,0,0,0,0,0,0,0,17,17,0,0,0,3,6,0,0,0,0,0,0,0,0,0,7,5,0,0,0,2,0,0,0,0,0,0,0,0,0,0,4,3,0,0,0,1,1,0,0,0,0,0,0,0,0,0,6,6,0,0,0,2,3
texture=1,15,32,31,28,19,16,12,98
edge=1,252,1,32,124,194,63,252,67,15,240,1,7,244,66,47,0,192,63
edge=1,252,1,32,124,194,63,252,67,15,240,1,7,244,66,47,0,192,63
fcolor=2,74,6,20,30,1,2,0,1,0,0,0,1,3,2,0,0,0,0,0,1,1,1,0,0,2,0,0,0,2,2,0,0,0,0,0,2,2,1,0,0,5,0,0,0,1,4,0,0,0,0,0,2,2,1,0,0,1,0,0,0,3,1,0,0,0,0,0,1,1,0,0,0,3,0,0,0,1,2,0,0,0,0,0,2,2,1,0,0,4,0,0,0,0,5,0,0,0,0,0,2,1,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0
edge=1,252,1,32,124,194,63,252,67,15,240,1,7,244,66,47,0,192,63
edge=1,4,1,88,128,22,8,39,25,142,230,226,31,60,64,255,252,12,76
edge=1,252,1,32,124,194,63,252,67,15,240,1,7,244,66,47,0,192,63
texture=1,15,32,31,28,19,16,12,98
texture=1,78,27,37,13,6,6,7,78
texture=1,78,27,37,13,6,6,7,78
fcolor=2,74,6,20,30,1,2,0,1,0,0,0,1,3,2,0,0,0,0,0,1,1,1,0,0,2,0,0,0,2,2,0,0,0,0,0,2,2,1,0,0,5,0,0,0,1,4,0,0,0,0,0,2,2,1,0,0,1,0,0,0,3,1,0,0,0,0,0,1,1,0,0,0,3,0,0,0,1,2,0,0,0,0,0,2,2,1,0,0,4,0,0,0,0,5,0,0,0,0,0,2,1,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0
texture=1,78,27,37,13,6,6,7,78
edge=1,4,1,88,128,22,8,39,25,142,230,226,31,60,64,255,252,12,76
texture=1,78,27,37,13,6,6,7,78
texture=1,15,32,31,28,19,16,12,98
显然,正如你所看到的,我吐出了许多相同值的重复
这些是我正在处理的实际词典:
字典1:
{'/Users/test/Transcend-8GB-Class-10-SDHC-Flash-Memory-Card.jpg': ['fcolor=2,4,14,5,0,0,0,0,0,0,0,0,0,0,12,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15,6,0,0,0,0,1,0,0,0,0,0,0,0,0,0,20,9,0,0,0,2,2,0,0,0,0,0,0,0,0,0,13,6,0,0,0,1,0,0,0,0,0,0,0,0,0,0,10,8,0,0,0,1,2,0,0,0,0,0,0,0,0,0,17,17,0,0,0,3,6,0,0,0,0,0,0,0,0,0,7,5,0,0,0,2,0,0,0,0,0,0,0,0,0,0,4,3,0,0,0,1,1,0,0,0,0,0,0,0,0,0,6,6,0,0,0,2,3', 'edge=1,252,1,32,124,194,63,252,67,15,240,1,7,244,66,47,0,192,63', 'texture=1,78,27,37,13,6,6,7,78']}
字典2:
{'/Users/test/kodax-camera-M531.jpg': ['fcolor=2,74,6,20,30,1,2,0,1,0,0,0,1,3,2,0,0,0,0,0,1,1,1,0,0,2,0,0,0,2,2,0,0,0,0,0,2,2,1,0,0,5,0,0,0,1,4,0,0,0,0,0,2,2,1,0,0,1,0,0,0,3,1,0,0,0,0,0,1,1,0,0,0,3,0,0,0,1,2,0,0,0,0,0,2,2,1,0,0,4,0,0,0,0,5,0,0,0,0,0,2,1,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0', 'edge=1,4,1,88,128,22,8,39,25,142,230,226,31,60,64,255,252,12,76', 'texture=1,15,32,31,28,19,16,12,98']}
我有lamba函数
cosinLamba = lambda a, b : round(NP.inner(a, b)/(LA.norm(a)*LA.norm(b)), 3)
我想迭代字典 1 和字典 2 并获取字典 1 的 fcolor 值 'fcolor=2,4,14,5,0,0,0,0,0 ,0,0,0,0,0,12,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15,6,0,0 ,0,0,1,0,0,0,0,0,0,0,0,0,20,9,0,0,0,2,2,0,0,0,0,0,0 ,0,0,0,13,6,0,0,0,1,0,0,0,0,0,0,0,0,0,0,10,8,0,0,0,1 ,2,0,0,0,0,0,0,0,0,0,17,17,0,0,0,3,6,0,0,0,0,0,0,0,0 ,0,7,5,0,0,0,2,0,0,0,0,0,0,0,0,0,0,4,3,0,0,0,1,1,0 ,0,0,0,0,0,0,0,0,6,6,0,0,0,2,3'
和dictionary2的fcolor值
'fcolor=2,74,6,20,30,1,2,0,1,0,0,0,1,3,2,0,0,0,0,0,1,1,1,0,0,2,0,0,0,2,2,0,0,0,0,0,2,2,1,0,0,5,0,0,0,1,4,0,0,0,0,0,2,2,1,0,0,1,0,0,0,3,1,0,0,0,0,0,1,1,0,0,0,3,0,0,0,1,2,0,0,0,0,0,2,2,1,0,0,4,0,0,0,0,5,0,0,0,0,0,2,1,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0'
将它们发送到我的巴函数cosinLamba(value1, value2)
value1和value 2是字符串,这就是为什么我将它们作为值保存在我的字典中。 我想对 fcolor、texture、edge 执行每个字典中为给定图片存储的所有向量。
最佳答案
您可以首先将您的表示更改为:
{'someFileName.jpg' : {'forecolor': [2,3,5,5,6],'edge': [2,4,5],'texture':[5,4,3]}}
或者
{('someFileName.jpg', 'forecolor'): [2,3,5,5,6],
('someFileName.jpg', 'edge'): [2,4,5],
('someFileName.jpg', 'texture'):[5,4,3]}
例如,获取第一种情况对应的列表:
from itertools import product
# pair info for each image with info of every image from another dictionary
for (fn1, d1), (fn2,d2) in product(dict1.iteritems(), dict2.iteritems()):
for property_, list_value in d1.iteritems():
compute_cosine_similarity(list_value, d2[property_])
将您的表示与字符串列表一起使用,如下所示:
from itertools import product
# pair info for each image with info of every image from another dictionary
for (fn1,lst1), (fn2,lst2) in product(dict1.iteritems(), dict2.iteritems()):
# assume all lists has the same order of elements
for string_value1, string_value2 in zip(lst1, lst2):
compute(string_value1, string_value2)
您不应将数字存储为 ASCII 字符串列表。如果需要节省内存,可以使用 numpy 数组。 cosinLamba
已经接受它们。
from collections import namedtuple
import numpy as np
Info = namedtuple('Info', 'forecolor edge texture')
dict1 = {'someFileName.jpg': Info(np.array([...], dtype=np.uint8),
np.array([...], dtype=np.uint8),
np.array([...], dtype=np.uint8))}
调用 cosine_similarity() 的代码与您的表示完全相同。
关于Python:迭代两个以列表为值的字典,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/11731439/