我有一个列表是这样的
a=[{'time':3},{'time':4},{'time':5}]
我想像这样以相反的顺序获取值的累加和
b=[{'exp':3,'cumsum':12},{'exp':4,'cumsum':9},{'exp':5,'cumsum':5}]
获得此信息的最有效方法是什么?我读过其他答案,其中使用 numpy
给出了类似
a=[1,2,3]
b=numpy.cumsum(a)
但我还需要在字典中插入 cumsum
最佳答案
a=[{'time':3},{'time':4},{'time':5}]
b = []
cumsum = 0
for e in a[::-1]:
cumsum += e['time']
b.insert(0, {'exp':e['time'], 'cumsum':cumsum})
print(b)
输出:
[{'exp': 3, 'cumsum': 12}, {'exp': 4, 'cumsum': 9}, {'exp': 5, 'cumsum': 5}]
所以事实证明,在列表的开头插入是 slow (在))。相反,尝试
deque
(O(1)):
from collections import deque
a=[{'time':3},{'time':4},{'time':5}]
b = deque()
cumsum = 0
for e in a[::-1]:
cumsum += e['time']
b.appendleft({'exp':e['time'], 'cumsum':cumsum})
print(b)
print(list(b))
输出:
deque([{'cumsum': 12, 'exp': 3}, {'cumsum': 9, 'exp': 4}, {'cumsum': 5, 'exp': 5}])
[{'cumsum': 12, 'exp': 3}, {'cumsum': 9, 'exp': 4}, {'cumsum': 5, 'exp': 5}]
下面是测试 ITT 每种方法速度的脚本,以及带有计时结果的图表:
from collections import deque
from copy import deepcopy
import numpy as np
import pandas as pd
from random import randint
from time import time
def Nehal_pandas(l):
df = pd.DataFrame(l)
df['cumsum'] = df.ix[::-1, 'time'].cumsum()[::-1]
df.columns = ['exp', 'cumsum']
return df.to_json(orient='records')
def Merlin_pandas(l):
df = pd.DataFrame(l).rename(columns={'time':'exp'})
df["cumsum"] = df['exp'][::-1].cumsum()
return df.to_dict(orient='records')
def RahulKP_numpy(l):
cumsum_list = np.cumsum([i['time'] for i in l][::-1])[::-1]
for i,j in zip(l,cumsum_list):
i.update({'cumsum':j})
def Divakar_pandas(l):
df = pd.DataFrame(l)
df.columns = ['exp']
df['cumsum'] = (df[::-1].cumsum())[::-1]
return df.T.to_dict().values()
def cb_insert_0(l):
b = []
cumsum = 0
for e in l[::-1]:
cumsum += e['time']
b.insert(0, {'exp':e['time'], 'cumsum':cumsum})
return b
def cb_deque(l):
b = deque()
cumsum = 0
for e in l[::-1]:
cumsum += e['time']
b.appendleft({'exp':e['time'], 'cumsum':cumsum})
b = list(b)
return b
def cb_deque_noconvert(l):
b = deque()
cumsum = 0
for e in l[::-1]:
cumsum += e['time']
b.appendleft({'exp':e['time'], 'cumsum':cumsum})
return b
def hpaulj_gen(l, var='value'):
cum=0
for i in l:
j=i[var]
cum += j
yield {var:j, 'sum':cum}
def hpaulj_inplace(l, var='time'):
cum = 0
for i in l:
cum += i[var]
i['sum'] = cum
def test(number_of_lists, min_list_length, max_list_length):
test_lists = []
for _ in range(number_of_lists):
test_list = []
number_of_dicts = randint(min_list_length,max_list_length)
for __ in range(number_of_dicts):
random_value = randint(0,50)
test_list.append({'time':random_value})
test_lists.append(test_list)
lists = deepcopy(test_lists)
start_time = time()
for l in lists:
res = list(hpaulj_gen(l[::-1], 'time'))[::-1]
elapsed_time = time() - start_time
print('hpaulj generator:'.ljust(25), '%.2f' % (number_of_lists / elapsed_time), 'lists per second')
lists = deepcopy(test_lists)
start_time = time()
for l in lists:
hpaulj_inplace(l[::-1])
elapsed_time = time() - start_time
print('hpaulj in place:'.ljust(25), '%.2f' % (number_of_lists / elapsed_time), 'lists per second')
lists = deepcopy(test_lists)
start_time = time()
for l in lists:
res = cb_insert_0(l)
elapsed_time = time() - start_time
print('craig insert list at 0:'.ljust(25), '%.2f' % (number_of_lists / elapsed_time), 'lists per second')
lists = deepcopy(test_lists)
start_time = time()
for l in lists:
res = cb_deque(l)
elapsed_time = time() - start_time
print('craig deque:'.ljust(25), '%.2f' % (number_of_lists / elapsed_time), 'lists per second')
lists = deepcopy(test_lists)
start_time = time()
for l in lists:
res = cb_deque_noconvert(l)
elapsed_time = time() - start_time
print('craig deque no convert:'.ljust(25), '%.2f' % (number_of_lists / elapsed_time), 'lists per second')
lists = deepcopy(test_lists)
start_time = time()
for l in lists:
RahulKP_numpy(l) # l changed in place
elapsed_time = time() - start_time
print('Rahul K P numpy:'.ljust(25), '%.2f' % (number_of_lists / elapsed_time), 'lists per second')
lists = deepcopy(test_lists)
start_time = time()
for l in lists:
res = Divakar_pandas(l)
elapsed_time = time() - start_time
print('Divakar pandas:'.ljust(25), '%.2f' % (number_of_lists / elapsed_time), 'lists per second')
lists = deepcopy(test_lists)
start_time = time()
for l in lists:
res = Nehal_pandas(l)
elapsed_time = time() - start_time
print('Nehal pandas:'.ljust(25), '%.2f' % (number_of_lists / elapsed_time), 'lists per second')
lists = deepcopy(test_lists)
start_time = time()
for l in lists:
res = Merlin_pandas(l)
elapsed_time = time() - start_time
print('Merlin pandas:'.ljust(25), '%.2f' % (number_of_lists / elapsed_time), 'lists per second')
关于python - 如何在python中的字典列表中查找项目的累计总和,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/39054413/