我有一些巨大的 Excel 文件,但即使是“适度”的文件(50 MB)我也受阻。我确实需要跳过前两行,但我不认为这是减速。你能想到别的吗?
wb = load_workbook(MyFile,read_only=True)
ws = wb.active
NDepth = ws.max_row-2
NTime = ws.max_column -1
Local_Depth = np.zeros((NDepth,))
Local_Temp = np.zeros((NDepth,NTime))
iterlist = islice(ws.iter_rows(),2,None)
start = time.time()
i=0
for row in iterlist:
Local_Depth[i] = row[0].value
j=0
for col in row[1:]:
Local_Temp[i,j] = col.value
j += 1
i += 1
print "Done", time.time()-start
在 M4700 Dell Precision 上加载文件花了我 7 多分钟。大约 8000 行和 800 列。一定有什么地方不对吗?我应该在我的 Python 2.7 设置中进行一些其他调整吗?
谢谢,约翰
最佳答案
我只剩 22 秒了
import numpy as np
import time
from openpyxl import Workbook
from openpyxl import load_workbook
import zipfile
def rowList(fullfilename):
with zipfile.ZipFile(fullfilename, mode='r') as z:
with z.open('xl/worksheets/sheet1.xml', 'r') as f:
irow =-1
while irow <0:
hstring = f.read(50000)
if hstring == "":
break;
irow = hstring.find("sheetData")
if irow < 0:
return
string = hstring[irow+10:]
while True:
irow = string.find("</row>")
while irow<0:
hstring = f.read(50000)
if hstring == "":
break;
string += hstring
irow = string.find("</row>")
if irow < 0:
return
irow +=6
ist = string.find("<c")
yield string[ist:irow-6]
string = string[irow:]
def splitRow(func,row):
j = 0
c1 = row.find("<v")
c2 = 0
while c1 > 0:
c1 += c2 + 3
c2 = c1 + row[c1:].find("</v")
yield func(row[c1:c2])
j += 1
c2 += 3
c1 = row[c2:].find("<v")
start = time.time()
wb = load_workbook(MyFile,read_only=True, use_iterators=True)
ws = wb.active
NDepth = ws.max_row-2
NTime = ws.max_column -1
wb._archive.close()
Local_Store = np.empty((NDepth,NTime+1))
Local_Time = np.empty((NTime,))
print NDepth, NTime
print "Data Accessed via Iterators", time.time()-start
start = time.time()
print "About to call RowList"
i = -2
j = 1
for row in rowList(MyFile):
if i == -2:
True
else:
if i == -1:
Local_Time[:] = list(splitRow(float,row))
else:
Local_Store[i,:] = list(splitRow(float,row))
i += 1
print i, "Rows Parsed", time.time()-start
关于python - openpyxl 只读 use_iterators,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/36236392/