python - 将多个 DBF (csv) 文件合并为一个,沿列附加

标签 python csv python-2.7 dbf

我一直在研究许多 cvs 文件合并,但运气不佳。这是我当前的脚本。结果是仅写入 1 个数据文件或将文件附加到行而不是列。我添加了\n 来尝试避免行问题。

import os
yolo = []
location = os.listdir("C:\Users\jcm\Desktop\RO")
for filename in location:
    #print  filename
    if "(Wide)" in filename:
        yolo.append(filename)
total = len(yolo)
with open("out4.dbf", "a") as f:

    for num in yolo:
        for line in open("C:\Users\jcm\Desktop\RO\\" +num,"rb"):
             f.write(line+"\n")
             print line+"\n"

打印行\n正在打印所有数据,但不将其添加到附加文件中。 这是数据库文件格式。我找到了将 DBF 格式化为 csv 的方法,但没有合并数千个数据点。第一行是字段名称,第二行是数据类型,第三行以上是数据。

这是我用来过滤 DBF 文件的代码。我已在我正在使用的底部添加了我的代码。

import struct, datetime, decimal, itertools      
def dbfreader(f):
    """Returns an iterator over records in a Xbase DBF file.

    The first row returned contains the field names.
    The second row contains field specs: (type, size, decimal places).
    Subsequent rows contain the data records.
    If a record is marked as deleted, it is skipped.

    File should be opened for binary reads.

    """
    # See DBF format spec at:
    #     http://www.pgts.com.au/download/public/xbase.htm#DBF_STRUCT

    numrec, lenheader = struct.unpack('<xxxxLH22x', f.read(32))    
    numfields = (lenheader - 33) // 32

    fields = []
    for fieldno in xrange(numfields):
        name, typ, size, deci = struct.unpack('<11sc4xBB14x', f.read(32))
        name = name.replace('\0', '')       # eliminate NULs from string   
        fields.append((name, typ, size, deci))
    yield [field[0] for field in fields]
    yield [tuple(field[1:]) for field in fields]

    terminator = f.read(1)
    assert terminator == '\r'

    fields.insert(0, ('DeletionFlag', 'C', 1, 0))
    fmt = ''.join(['%ds' % fieldinfo[2] for fieldinfo in fields])
    fmtsiz = struct.calcsize(fmt)
    for i in xrange(numrec):
        record = struct.unpack(fmt, f.read(fmtsiz))
        if record[0] != ' ':
            continue                        # deleted record
        result = []
        for (name, typ, size, deci), value in itertools.izip(fields, record):
            if name == 'DeletionFlag':
                continue
            if typ == "N":
                value = value.replace('\0', '').lstrip()
                value = value.replace(' ', '').lstrip()
                if value == '':
                    value = 0

                elif deci:
                    value = decimal.Decimal(value)
                else:
                    value = int(value)
            elif typ == 'DATE':
                y, m, d = int(value[:4]), int(value[4:6]), int(value[6:8])
                value = datetime.date(y, m, d)
            elif typ == 'L':
                value = (value in 'YyTt' and 'T') or (value in 'NnFf' and 'F') or '?'
            elif typ == 'F':
                value = float(value)
            result.append(value)
        yield result
yoloies = []
yolo = []
yolos = [] 
def dbfwriter(f, fieldnames, fieldspecs, records):

    """ Return a string suitable for writing directly to a binary dbf file.

    File f should be open for writing in a binary mode.

    Fieldnames should be no longer than ten characters and not include \x00.
    Fieldspecs are in the form (type, size, deci) where
        type is one of:
            C for ascii character data
            M for ascii character memo data (real memo fields not supported)
            D for datetime objects
            N for ints or decimal objects
            L for logical values 'T', 'F', or '?'
        size is the field width
        deci is the number of decimal places in the provided decimal object
    Records can be an iterable over the records (sequences of field values).

    """
    # header info
    ver = 3
    now = datetime.datetime.now()
    yr, mon, day = now.year-1900, now.month, now.day
    numrec = len(records)
    numfields = len(fieldspecs)
    lenheader = numfields * 32 + 33
    lenrecord = sum(field[1] for field in fieldspecs) + 1
    hdr = struct.pack('<BBBBLHH20x', ver, yr, mon, day, numrec, lenheader, lenrecord)
    f.write(hdr)

    # field specs
    for name, (typ, size, deci) in itertools.izip(fieldnames, fieldspecs):
        name = name.ljust(11, '\x00')
        fld = struct.pack('<11sc4xBB14x', name, typ, size, deci)
        f.write(fld)

    # terminator
    f.write('\r')

    # records
    for record in records:
        f.write(' ')                        # deletion flag
        for (typ, size, deci), value in itertools.izip(fieldspecs, record):
            if typ == "N":
                value = str(value).rjust(size, ' ')

            elif typ == 'DATE':
                value = value.strftime('%Y%m%d')
            elif typ == 'L':
                value = str(value)[0].upper()
            else:
                value = str(value)[:size].ljust(size, ' ')
            #assert len(value) == size
            f.write(value)
        yoloies.append(record[1])    
        yolo.append(int(float(record[6])))
        yolos.append(int(record[0]))
    # End of file
    f.write('\x1A')


# -------------------------------------------------------
# Example calls
if __name__ == '__main__':
    import sys, csv
    from cStringIO import StringIO
    from operator import itemgetter

    # Read a database
    filename = 'out.dbf'      
    if len(sys.argv) == 2:
        filename = sys.argv[1]
    f = open(filename, 'rb')
    db = list(dbfreader(f))
    f.close()
    for record in db:
        print record
    fieldnames, fieldspecs, records = db[0], db[1], db[2:]

    # Alter the database
    #del records[0]
    #records.sort(key=itemgetter(4))

    # Remove a field
    #del fieldnames[3]
    #del fieldspecs[3]
    #records = [rec[1:] for rec in records]

    # Create a new DBF
    f = StringIO()
    dbfwriter(f, fieldnames, fieldspecs, records)

    # Read the data back from the new DBF
    print '-' * 20    
    f.seek(0)
    for line in dbfreader(f):
        print line
    f.close()

    # Convert to CSV
    print '.' * 20    
    f = StringIO()
    csv.writer(f).writerow(fieldnames)    
    csv.writer(f).writerows(records)
    print "break"
    #filename = 'MyDBF.cvs'
    #g = open(filename, 'w')
    #g.write(f.getvalue())
    print f.getvalue()
    f.close()
    print "break"
    #jcm code
    intensities = []
    intensities = zip(yolos,yoloies, yolo)

    print intensities
    with open("yolosss.csv", "w") as out_file:
        for date, time, intensity in zip(yolos, yoloies, yolo):
            out_file.write("%i,%s,%i\n" % (date, time, intensity))
            print "done"

数据样本。

['Date', 'Time', 'Millitm', 'Marker', '0', 'Sts_00', '1', 'Sts_01', '2', 'Sts_02', '3', 'Sts_03', '4', 'Sts_04', '5', 'Sts_05', '6', 'Sts_06', '7', 'Sts_07', '8', 'Sts_08', '9', 'Sts_09', '10', 'Sts_10', '11', 'Sts_11', '12', 'Sts_12']
[('D', 8, 0), ('C', 8, 0), ('N', 3, 0), ('C', 1, 0), ('F', 17, 8), ('C', 1, 0), ('F', 17, 8), ('C', 1, 0), ('F', 17, 8), ('C', 1, 0), ('F', 1, 0), ('C', 1, 0), ('F', 1, 0), ('C', 1, 0), ('F', 17, 8), ('C', 1, 0), ('F', 17, 8), ('C', 1, 0), ('F', 17, 8), ('C', 1, 0), ('F', 17, 8), ('C', 1, 0), ('F', 17, 8), ('C', 1, 0), ('F', 17, 8), ('C', 1, 0), ('F', 17, 8), ('C', 1, 0), ('F', 17, 8), ('C', 1, 0)]
['20130531', '00:00:28', 977, 'B', 548.84643555, ' ', 0.0, ' ', 83.11103058, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 6.93495178, ' ', 7.26524115, ' ', 0.0, ' ', 231.49482727, ' ', 16.30217171, ' ', 12.48698235, ' ']
['20130531', '00:05:13', 577, ' ', 571.55731201, ' ', 0.0, ' ', 81.2725296, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 6.93943787, ' ', 7.26580191, ' ', 0.0, ' ', 230.02883911, ' ', 16.31218529, ' ', 12.94760895, ' ']
['20130531', '00:20:13', 701, ' ', 547.88513184, ' ', 0.0, ' ', 82.43811798, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 6.9360733, ' ', 7.27926016, ' ', 0.0, ' ', 230.24513245, ' ', 16.202034, ' ', 11.91620636, ' ']
['20130531', '00:35:13', 885, ' ', 613.19396973, ' ', 0.0, ' ', 81.4948349, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 6.9276619, ' ', 7.29215765, ' ', 0.0, ' ', 230.04084778, ' ', 16.14195251, ' ', 11.29536152, ' ']
['20130531', '00:50:14', 60, ' ', 466.35424805, ' ', 0.0, ' ', 79.93270874, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 6.92541885, ' ', 7.3028121, ' ', 0.0, ' ', 230.50949097, ' ', 16.02178955, ' ', 10.67451763, ' ']
['20130531', '01:05:14', 174, ' ', 449.89187622, ' ', 0.0, ' ', 83.67579651, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 6.92373657, ' ', 7.31290579, ' ', 0.0, ' ', 233.10502625, ' ', 15.83153057, ' ', 10.29399967, ' ']
['20130531', '01:20:15', 19, ' ', 444.96517944, ' ', 0.0, ' ', 76.59817505, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 6.92429733, ' ', 7.31010199, ' ', 0.0, ' ', 229.95674133, ' ', 15.71136761, ' ', 10.48425865, ' ']
['20130531', '01:35:15', 94, ' ', 428.08221436, ' ', 0.0, ' ', 83.30929565, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 6.92373657, ' ', 7.29944754, ' ', 0.0, ' ', 232.22782898, ' ', 15.61123085, ' ', 11.28534794, ' ']
['20130531', '01:50:15', 238, ' ', 485.58041382, ' ', 0.0, ' ', 81.09828949, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 6.93831635, ' ', 7.30785894, ' ', 0.0, ' ', 229.75245667, ' ', 15.39093208, ' ', 10.68453121, ' ']
['20130531', '02:05:15', 382, ' ', 437.81542969, ' ', 0.0, ' ', 82.19178009, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 6.93102646, ' ', 7.3213172, ' ', 0.0, ' ', 232.1557312, ' ', 15.23071384, ' ', 9.82335949, ' ']
['20130531', '02:20:15', 687, ' ', 412.64120483, ' ', 0.0, ' ', 84.45085144, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 6.91925049, ' ', 7.33533621, ' ', 0.0, ' ', 231.02618408, ' ', 15.11054993, ' ', 9.17247486, ' ']
['20130531', '02:35:15', 841, ' ', 383.62173462, ' ', 0.0, ' ', 81.18240356, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 6.9119606, ' ', 7.34318686, ' ', 0.0, ' ', 231.53088379, ' ', 15.01041412, ' ', 8.6918211, ' ']
['20130531', '02:50:15', 986, ' ', 404.04953003, ' ', 0.0, ' ', 79.95674133, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 6.90186691, ' ', 7.33982229, ' ', 0.0, ' ', 230.24513245, ' ', 14.82015514, ' ', 8.9221344, ' ']
['20130531', '03:05:16', 120, ' ', 392.8142395, ' ', 0.0, ' ', 81.9334259, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 6.90298843, ' ', 7.32019567, ' ', 0.0, ' ', 230.22109985, ' ', 14.53977394, ' ', 9.99359131, ' ']
['20130531', '03:20:16', 515, ' ', 494.0519104, ' ', 0.0, ' ', 82.09564972, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 6.91812897, ' ', 7.30056906, ' ', 0.0, ' ', 233.12905884, ' ', 14.29944706, ' ', 10.74461269, ' ']
['20130531', '03:35:18', 381, ' ', 517.84429932, ' ', 0.0, ' ', 82.45013428, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 6.92541885, ' ', 7.28991461, ' ', 0.0, ' ', 232.68444824, ' ', 14.11920166, ' ', 10.87478924, ' ']
['20130531', '03:50:18', 946, ' ', 488.16390991, ' ', 0.0, ' ', 81.68108368, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 6.93214798, ' ', 7.28486776, ' ', 0.0, ' ', 231.71112061, ' ', 13.8688612, ' ', 11.23528004, ' ']
['20130531', '04:05:19', 141, ' ', 508.65179443, ' ', 0.0, ' ', 80.45542145, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 6.93214798, ' ', 7.28542852, ' ', 0.0, ' ', 229.87261963, ' ', 13.66858959, ' ', 11.29536152, ' ']
['20130531', '04:20:19', 435, ' ', 553.83325195, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 6.89233398, ' ', 7.27701712, ' ', 0.0, ' ', 0.04806537, ' ', 14.34951496, ' ', 24.47328377, ' ']
['20130531', '04:21:56', 224, 'E', 542.41772461, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 6.87607145, ' ', 7.27197027, ' ', 0.0, ' ', 0.04806537, ' ', 14.98037338, ' ', 24.40318871, ' ']
['20130531', '05:17:14', 780, 'B', 0.0, 'U', 0.0, 'U', 0.0, 'U', 0.0, 'U', 0.0, 'U', 0.0, 'U', 0.0, 'U', 0.0, 'U', 0.0, 'U', 0.0, 'U', 0.0, 'U', 0.0, 'U', 0.0, 'U']
['20130531', '05:32:15', 505, ' ', 554.61431885, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 6.8906517, ' ', 7.24337101, ' ', 0.0, ' ', 0.04806537, ' ', 86.66786957, ' ', 12.76736355, ' ']
['20130531', '05:47:15', 669, ' ', 546.20288086, ' ', 0.0, ' ', 0.01201634, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 6.89345551, ' ', 7.24337101, ' ', 0.0, ' ', 0.06008171, ' ', 163.59248352, ' ', 12.31675053, ' ']
['20130531', '06:02:15', 864, ' ', 512.49700928, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 6.89289474, ' ', 7.24168873, ' ', 0.0, ' ', 0.04806537, ' ', 184.21052551, ' ', 12.16654682, ' ']
['20130531', '06:17:16', 879, ' ', 417.08724976, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 6.89738083, ' ', 7.24056721, ' ', 0.0, ' ', 0.04806537, ' ', 210.36610413, ' ', 12.18657398, ' ']
['20130531', '06:32:17', 104, ' ', 450.612854, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 6.89233398, ' ', 7.24112797, ' ', 0.0, ' ', 0.04806537, ' ', 238.79475403, ' ', 12.06641006, ' ']
['20130531', '06:47:18', 530, ' ', 539.05310059, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 0.0, ' ', 6.88896894, ' ', 7.24112797, ' ', 0.0, ' ', 0.06008171, ' ', 249.29904175, ' ', 12.05639648, ' ']

记事本中的数据示例

   ᢠ                    DATE       D                   TIME       C                   MILLITM    N                  MARKER     C                   N0         N                  STS_00     C                   N1         N    
              STS_01     C                   N2         N                  STS_02     C                   N3         N                  STS_03     C                   N4         N                  STS_04     C                   N5         N    
              STS_05     C                   N6         N    
              STS_06     C                   N7         N    
              STS_07     C                   N8         N    
              STS_08     C                   N9         N    
              STS_09     C                   N10        N                  STS_10     C                   N11        N                  STS_11     C                   N12        N                  STS_12     C                   
 2013053100:00:28977.00B548.84643555 0.00000000 83.11103058 0.00 0.00 0.00000000 0.00000000 6.93495178 7.26524115 0.00000000 231.49482727 16.30217171 12.48698235  2013053100:05:13577.00 571.55731201 0.00000000 81.27252960 0.00 0.00 0.00000000 0.00000000 6.93943787 7.26580191 0.00000000 230.02883911 16.31218529 12.94760895  2013053100:20:13701.00 547.88513184 0.00000000 82.43811798 0.00 0.00 0.00000000 0.00000000 6.93607330 7.27926016 0.00000000 230.24513245 16.20203400 11.91620636  2013053100:35:13885.00 613.19396973 0.00000000 81.49483490 0.00 0.00 0.00000000 0.00000000 6.92766190 7.29215765 0.00000000 230.04084778 16.14195251 11.29536152  2013053100:50:14 60.00 466.35424805 0.00000000 79.93270874 0.00 0.00 0.00000000 0.00000000 6.92541885 7.30281210 0.00000000 230.50949097 16.02178955 10.67451763 

最佳答案

目前我能提供的唯一建议是使用 my dbf module而不是自己编写。

如果您可以提供两个数据文件的示例以及它们在最终 dbf 文件中的外观示例,我可以提供更多信息。

这是您的代码,已简化:

import dbf      
import os
from glob import glob

if __name__ == '__main__':
    # get interesting dbf file names
    ROfiles = set(glob(r"C:\Users\jcm\Desktop\RO\*(Wide)*.dbf"))

    merged_dbf = None

    # Read a database
    for dbf_filename in ROfiles:
        with dbf.Table(dbf_filename) as table:

            # create merged dbf if it hasn't been
            if merged_dbf is None:
                merged_dbf = table.new('merged')

            with merged_dbf:
                # merge in records
                for record in table:
                    merged_dbf.append(record)

                #Create a new csv from merged_dbf
                dbf.export(merged_dbf, 'catwomen')

关于python - 将多个 DBF (csv) 文件合并为一个,沿列附加,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/17045571/

相关文章:

python - 启动 celery worker throws "no attribute ' worker_state_db'"

python - 创建代码大纲的库

python-2.7 - 使用 Python 进行计算逻辑回归,不同的样本大小

python - 无效过滤器 : Only one property per query may have inequality filters (>=, <=、>、<)

python-2.7 - 如何在 Pandas 中使用 base 10 错误修复 int() 的无效文字

python - sys.argv[1]索引错误: list index out of range

python - Django数据库: how to filter objects based on other object's field?

javascript - 从上传的 csv 数据创建数据表列定义

python - 如果所有值都在同一列中,如何从 csv 文件读取数据?

python - 使用 Python 将列表导出为 CSV