Python Pandas 数据框到 XML

标签 python xml pandas

将不胜感激一些帮助或插入正确的方向。我有一个 pandas 数据框,来自一个 txt 文件,我想将它插入到我正在制作的 xml 文档中。我可以设置 xml 文档,并使用以下命令将我的数据框转换为 xml:How do convert a pandas/dataframe to XML?但我似乎无法将转换后的数据框 xml 插入到制作的 xml 文档中。

到目前为止,我的代码是:

import pandas as pd
from xml.dom.minidom import Document
from xml.dom.minidom import parseString 


colnamesRBR = ['TIMESTAMP','A']
df = pd.read_table('test_data.txt',sep = ',',header=0,names=colnamesRBR,parse_dates={'datetime':['TIMESTAMP']},index_col='datetime')

doc = Document()
base = doc.createElement('Timeseries')
doc.appendChild(base)

entry = doc.createElement('Series')
base.appendChild(entry)

entry1 = doc.createElement('Header')
entry.appendChild(entry1)

type = doc.createElement('type')
type_content = doc.createTextNode('instantaneous')
type.appendChild(type_content)
entry1.appendChild(type)

timeStepElem = doc.createElement('timeStep')
timeStepElem.setAttribute ('unit','minute')
timeStepElem.setAttribute ('multiplier','5')
entry1.appendChild(timeStepElem)

startDateElem = doc.createElement('startDate')
startDateElem.setAttribute ('time','13:30:00')
startDateElem.setAttribute ('date','2015-06-24')
entry1.appendChild(startDateElem)

eventElem = doc.createElement('event')
eventElem.setAttribute ('time','endDate')
eventElem.setAttribute ('date','2015-06-25')
eventElem.setAttribute ('value','2015-06-25')
entry.appendChild(eventElem)

def to_xml(df, filename=None, mode='w'):
    def row_to_xml(row):
        xml = []
        for i, col_name in enumerate(row.index):
            xml.append('  <event date="{0}" time="{1}" value="{1}"/>'.format(col_name, row.iloc[i]))
        return '\n'.join(xml)
    res = '\n'.join(df.apply(row_to_xml, axis=0))

    if filename is None:
        return res
    with open(filename, mode) as f:
        f.write(res)

series = parseString(to_xml(df)).childNodes[0]
entry.appendChild(series)

pd.DataFrame.to_xml = to_xml
print df.to_xml()

f = open("test.xml","w")
doc.writexml(f, indent = "   ", addindent="   ",newl="\n")
f.close()

xml 保存的输出文件看起来不错:

<?xml version="1.0" ?>
   <Timeseries>
      <Series>
         <Header>
            <type>instantaneous</type>
            <timeStep multiplier="5" unit="minute"/>
            <startDate date="2015-06-24" time="13:30:00"/>
         </Header>
         <event date="2015-06-25" time="endDate" value="2015-06-25"/>
      </Series>
   </Timeseries>

并且 pandas dataframe 转换的 xml 很好:

<event date="2015-03-09 15:40:00" time="52.2885" value="52.2885"/>
  <event date="2015-03-09 15:50:00" time="52.3277" value="52.3277"/>
  <event date="2015-03-09 16:00:00" time="52.5045" value="52.5045"/>
  <event date="2015-03-09 16:10:00" time="52.5702" value="52.5702"/>
  <event date="2015-03-09 16:20:00" time="52.5608" value="52.5608"/>

我似乎无法在系列元素下插入 xml 文档,我在文档中手动完成了一个。已经尝试了一段时间,但似乎无法将其放入 element.attribute 函数中,此时开始怀疑我是否不应该将 txt 直接解析为 xml,但我现在喜欢 pd 选项。

如果有帮助,只是一些示例数据:

TIMESTAMP,A
2015/03/09 15:40,52.2885
2015/03/09 15:50,52.3277
2015/03/09 16:00,52.5045
2015/03/09 16:10,52.5702
2015/03/09 16:20,52.5608

目前的错误是:

File "<ipython-input-10-906277431901>", line 1, in <module>
    runfile('C:/Users/clinton.chrystal/Documents/Python Scripts/Clint/Text_changes/from_data_to_xml_for SO.py', wdir='C:/Users/clinton.chrystal/Documents/Python Scripts/Clint/Text_changes')

  File "C:\Anaconda\lib\site-packages\spyderlib\widgets\externalshell\sitecustomize.py", line 682, in runfile
    execfile(filename, namespace)

  File "C:\Anaconda\lib\site-packages\spyderlib\widgets\externalshell\sitecustomize.py", line 71, in execfile
    exec(compile(scripttext, filename, 'exec'), glob, loc)

  File "C:/Users/clinton.chrystal/Documents/Python Scripts/Clint/Text_changes/from_data_to_xml_for SO.py", line 60, in <module>
    series = parseString(to_xml(df)).childNodes[0]

  File "C:\Anaconda\lib\xml\dom\minidom.py", line 1928, in parseString
    return expatbuilder.parseString(string)

  File "C:\Anaconda\lib\xml\dom\expatbuilder.py", line 940, in parseString
    return builder.parseString(string)

  File "C:\Anaconda\lib\xml\dom\expatbuilder.py", line 223, in parseString
    parser.Parse(string, True)

ExpatError: junk after document element: line 2, column 2

最佳答案

首先去掉 to_xml 方法中的 Series 标签:

def to_xml(df, filename=None, mode='w'):
    def row_to_xml(row):
        date = row.TIMESTAMP.split()[0]
        time = row.TIMESTAMP.split()[1]
        value = row.A
        xml = '<event date="{0}" time="{1}" value="{2}"></event>'.format(date, time, value)
        return xml
    res = ' '.join(df.apply(row_to_xml, axis=1))

    if filename is None:
        return res
    with open(filename, mode) as f:
        f.write(res)

然后您可以像这样创建您的 XML 树:

*from xml.dom.minidom import parseString 

doc = Document()
base = doc.createElement('Timeseries')
doc.appendChild(base)
series = parseString('<Series>' + to_xml(df) + '</Series>').childNodes[0]
base.appendChild(series)

base.appendChild(series)

header = doc.createElement('Header')
series.appendChild(header)

type = doc.createElement('type')
type_content = doc.createTextNode('instantaneous')
type.appendChild(type_content)
header.appendChild(type)

timeStepElem = doc.createElement('timeStep')
timeStepElem.setAttribute ('unit','minute')
timeStepElem.setAttribute ('multiplier','5')
header.appendChild(timeStepElem)

startDateElem = doc.createElement('startDate')
startDateElem.setAttribute ('time','13:30:00')
startDateElem.setAttribute ('date','2015-06-24')
header.appendChild(startDateElem)
print(doc.toprettyxml())*

输出:

<?xml version="1.0" ?>
<Timeseries>
        <Series>
                <event date="2015/03/09" time="15:40" value="52.2885"/>

                <event date="2015/03/09" time="15:50" value="52.3277"/>

                <event date="2015/03/09" time="16:00" value="52.5045"/>

                <event date="2015/03/09" time="16:10" value="52.5702"/>

                <event date="2015/03/09" time="16:20" value="52.5608"/>
                <Header>
                        <type>instantaneous</type>
                        <timeStep multiplier="5" unit="minute"/>
                        <startDate date="2015-06-24" time="13:30:00"/>
                </Header>
        </Series>
</Timeseries>

关于Python Pandas 数据框到 XML,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/31260151/

相关文章:

python - 组合列表元素

python - python中的线程——同时处理多个大文件

python - “模块”对象没有属性 'now' 将尝试创建 CSV

python - 将数据从 MongoDB 游标加载到 pandas Dataframe 的更快方法

python - 在python中使用正则表达式捕获表情

python - 如何将 python 单元测试框架文件的输出写入 .xml 文件以在 jenkins 上可视化?

xml - 如何用xml数据向量化?

xml - 尝试读取 xsl 中的 xml 元素值

Python:从 DataFrame 中的两列创建结构化 numpy 结构化数组

python - 用日期时间索引组成数据框