python - openpyxl 只读 use_iterators

我有一些巨大的 Excel 文件，但即使是“适度”的文件(50 MB)我也受阻。我确实需要跳过前两行，但我不认为这是减速。你能想到别的吗？

wb = load_workbook(MyFile,read_only=True)
ws = wb.active

NDepth = ws.max_row-2
NTime = ws.max_column -1

Local_Depth = np.zeros((NDepth,))
Local_Temp = np.zeros((NDepth,NTime))

iterlist = islice(ws.iter_rows(),2,None)

start = time.time()

i=0
for row in iterlist:
    Local_Depth[i] = row[0].value 
    j=0
    for col in row[1:]:
        Local_Temp[i,j] = col.value
        j += 1
    i += 1

print "Done", time.time()-start

在 M4700 Dell Precision 上加载文件花了我 7 多分钟。大约 8000 行和 800 列。一定有什么地方不对吗？我应该在我的 Python 2.7 设置中进行一些其他调整吗？

谢谢，约翰

最佳答案

我只剩 22 秒了

import numpy as np
import time
from openpyxl import Workbook
from openpyxl import load_workbook
import zipfile


def rowList(fullfilename):
with  zipfile.ZipFile(fullfilename, mode='r') as z: 
    with z.open('xl/worksheets/sheet1.xml', 'r') as f:
        irow =-1
        while irow <0:
            hstring = f.read(50000)
            if hstring == "":
                break;
            irow = hstring.find("sheetData")
            if irow < 0:
                return
        string = hstring[irow+10:] 
        while True:
            irow = string.find("</row>")
            while irow<0:
                hstring = f.read(50000)
                if hstring == "":
                    break;
                string += hstring
                irow = string.find("</row>")
            if irow < 0:
                return
            irow +=6 
            ist = string.find("<c")
            yield string[ist:irow-6]
            string = string[irow:]


def splitRow(func,row):

j = 0
c1 = row.find("<v")
c2 = 0
while c1 > 0: 
    c1 += c2 + 3
    c2 = c1 + row[c1:].find("</v")
    yield func(row[c1:c2])
    j += 1
    c2 += 3
    c1 = row[c2:].find("<v")


start = time.time()

wb = load_workbook(MyFile,read_only=True, use_iterators=True) 
ws = wb.active
NDepth = ws.max_row-2
NTime = ws.max_column -1
wb._archive.close()

Local_Store = np.empty((NDepth,NTime+1))
Local_Time = np.empty((NTime,))

print NDepth, NTime
print "Data Accessed via Iterators", time.time()-start

start = time.time()

print "About to call RowList"

i = -2
j = 1
for row in rowList(MyFile): 
if i == -2:
    True 
else:
    if i == -1:
        Local_Time[:] = list(splitRow(float,row))
    else:
        Local_Store[i,:] = list(splitRow(float,row))

i += 1  

print i, "Rows Parsed", time.time()-start

关于python - openpyxl 只读 use_iterators，我们在Stack Overflow上找到一个类似的问题： https://stackoverflow.com/questions/36236392/

python - openpyxl 只读 use_iterators

上一篇：python - 找不到模块 - PYTHON

下一篇：python - Flask - 在应用程序上下文之外工作