c++ - boost::property_tree : 解析复杂的xml结构

标签 c++ xml boost boost-propertytree boost-parameter

我想使用 boost property_tree 解析下面的 xml 结构。

<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<Txn ver="1.0">
    <TOpts tCount="1"  tformat="0"  ttimeout="10" />
    <TData>
        <Tvalue date="YYMMDD" time="HHMM" Ref="100"/>
    </TData>
    <TCustOpts>
        <Param name="SALE" value="xyz" />
    </TCustOpts>
</Txn>

我能够解析上面 xml 的第一个 Topts 字段,但是对于 TDataTCustOpts 字段,我没有得到正确的迭代和解析 xml 并面临异常的方法。有人可以为我提供 TDataTCustOpts 字段解析的正确方法吗? 下面是我的代码供引用。

stringstream ssString;
boost::property_tree::ptree pt1;
ssString << xml;
boost::property_tree::read_xml(ssString, pt1);

string TxnVer = pt1.get<string>("Txn.<xmlattr>.ver");

boost::property_tree::ptree formats = pt1.get_child("Txn");
BOOST_FOREACH(boost::property_tree::ptree::value_type const& node, formats) {
    if (node.first == "TOpts") {
        const boost::property_tree::ptree & attributes = node.second.get_child("<xmlattr>");
        BOOST_FOREACH(boost::property_tree::ptree::value_type const& v, attributes) {
            if (v.first == "tCount") {
                std::cout << " tCount " << v.second.data() << endl;
            }
            else if (v.first == "tformat") {
                std::cout << " tformat" << v.second.data() << endl;
            }
            else if (v.first == "ttimeout") {
                std::cout << " ttimeout " << v.second.data() << endl;
            }
          }
    }
    else if (node.first == "TOpts")

    else if (node.first == "TCustOpts") {
        const boost::property_tree::ptree & attributes1 = node.second.get_child("<xmlattr>");
        BOOST_FOREACH(boost::property_tree::ptree::value_type const& s, attributes1) {
            if (s.first == "name"){
                std::cout << "name " << s.second.data() << endl;
            }
            else if (s.first == "value") {
                std::cout << "value " << s.second.data() << endl;
            }
        }
    }
}

最佳答案

好的,使用 Property Tree 解析信息时通常的反模式是“疯狂循环”。

以树格式存储键值对的整个想法是避免循环低级结构,而不是使用方便的寻址(使用路径)。

另一种反模式是将所有解析都放在一个大函数中。我会把事情分开。

定义一些数据类型

让我们从定义一些数据类型开始,以使我们的数据易于管理:

namespace Domain {
    struct TOpts {
        size_t count;
        std::string format;
        size_t timeout ;
    };

    struct TData {
        std::string date; // YYMMD
        std::string time; // HHMM
        size_t ref;
    };

    struct TCustOpts {
        std::multimap<std::string, std::string> params;
    };

    struct Txn {
        std::string version;
        TOpts opts;
        TData data;
        TCustOpts custom_opts;
    };
}

这是我们的临时“领域层”。

让我们解析吧!

所以,下面是我将如何编写解析代码:

namespace Parsing {
    // concrete parse functions
    void parse(Domain::TOpts& v, ptree const& pt) {
        v.count   = pt.get("<xmlattr>.tCount", 0);
        v.format  = pt.get("<xmlattr>.tformat", "0");
        v.timeout = pt.get("<xmlattr>.ttimeout", 0);
    }

    void parse(Domain::TData& v, ptree const& pt) {
        v.date = pt.get("Tvalue.<xmlattr>.date", "YYMMDD");
        v.time = pt.get("Tvalue.<xmlattr>.time", "HHMM");
        v.ref  = pt.get("Tvalue.<xmlattr>.Ref", 0);
    }

    void parse(Domain::TCustOpts& v, ptree const& pt) {
        for (auto& param : pt) {
            if (param.first != "Param")
                continue;

            v.params.emplace(
                param.second.get("<xmlattr>.name", "(anon)"),
                param.second.get("<xmlattr>.value", ""));
        }
    }

    // make any parse helper available optionally
    template <typename T>
    void parse_optional(T& v, boost::optional<ptree const&> pt) {
        if (pt) parse(v, *pt);
    }

    void parse(Domain::Txn& v, ptree const& pt) {
        v.version = pt.get("<xmlattr>.ver", "0.0");
        parse_optional(v.opts,        pt.get_child_optional("TOpts"));
        parse_optional(v.data,        pt.get_child_optional("TData"));
        parse_optional(v.custom_opts, pt.get_child_optional("TCustOpts"));
    }
}

唯一不太直接的事情是 parse_optional 来处理可能不存在的子树。

使用它:

int main() {
    boost::property_tree::ptree pt;
    {
        extern char const* xml;
        std::stringstream ss(xml);
        read_xml(ss, pt);
    }

    Domain::Txn transaction;
    Parsing::parse(transaction, pt.get_child("Txn"));

    std::cout << transaction; // complete roundtrip
}

奖励:往返

让我们也将相同的“域”类保存回属性树,这样我们就可以验证它是否有效:

namespace Writing { // for DEBUG/demo only
    void serialize(Domain::TOpts const& v, ptree& pt) {
        pt.put("<xmlattr>.tCount", v.count);
        pt.put("<xmlattr>.tformat", v.format);
        pt.put("<xmlattr>.ttimeout", v.timeout);
    }

    void serialize(Domain::TData const& v, ptree& pt) {
        pt.put("Tvalue.<xmlattr>.date", v.date);
        pt.put("Tvalue.<xmlattr>.time", v.time);
        pt.put("Tvalue.<xmlattr>.Ref", v.ref);
    }

    void serialize(Domain::TCustOpts const& v, ptree& pt) {
        for (auto& param : v.params) {
            auto& p = pt.add_child("Param", ptree{});
            p.put("<xmlattr>.name", param.first);
            p.put("<xmlattr>.value", param.second);
        }
    }

    void serialize(Domain::Txn const& v, ptree& pt) {
        auto& txn = pt.add_child("Txn", ptree{});
        txn.put("<xmlattr>.ver", v.version);
        serialize(v.opts,        txn.add_child("TOpts", ptree{}));
        serialize(v.data,        txn.add_child("TData", ptree{}));
        serialize(v.custom_opts, txn.add_child("TCustOpts", ptree{}));
    }
}

完整演示

此演示显示了您的原始 XML 解析和序列化:

Live On Coliru

#include <boost/property_tree/xml_parser.hpp>
#include <iostream>
#include <map>

using boost::property_tree::ptree;

namespace Domain {
    struct TOpts {
        size_t count;
        std::string format;
        size_t timeout ;
    };

    struct TData {
        std::string date; // YYMMD
        std::string time; // HHMM
        size_t ref;
    };

    struct TCustOpts {
        std::multimap<std::string, std::string> params;
    };

    struct Txn {
        std::string version;
        TOpts opts;
        TData data;
        TCustOpts custom_opts;
    };
}

namespace Parsing {
    // concrete parse functions
    void parse(Domain::TOpts& v, ptree const& pt) {
        v.count   = pt.get("<xmlattr>.tCount", 0);
        v.format  = pt.get("<xmlattr>.tformat", "0");
        v.timeout = pt.get("<xmlattr>.ttimeout", 0);
    }

    void parse(Domain::TData& v, ptree const& pt) {
        v.date = pt.get("Tvalue.<xmlattr>.date", "YYMMDD");
        v.time = pt.get("Tvalue.<xmlattr>.time", "HHMM");
        v.ref  = pt.get("Tvalue.<xmlattr>.Ref", 0);
    }

    void parse(Domain::TCustOpts& v, ptree const& pt) {
        for (auto& param : pt) {
            if (param.first != "Param")
                continue;

            v.params.emplace(
                param.second.get("<xmlattr>.name", "(anon)"),
                param.second.get("<xmlattr>.value", ""));
        }
    }

    // make any parse helper available optionally
    template <typename T>
    void parse_optional(T& v, boost::optional<ptree const&> pt) {
        if (pt) parse(v, *pt);
    }

    void parse(Domain::Txn& v, ptree const& pt) {
        v.version = pt.get("<xmlattr>.ver", "0.0");
        parse_optional(v.opts,        pt.get_child_optional("TOpts"));
        parse_optional(v.data,        pt.get_child_optional("TData"));
        parse_optional(v.custom_opts, pt.get_child_optional("TCustOpts"));
    }
}

namespace Writing { // for DEBUG/demo only
    void serialize(Domain::TOpts const& v, ptree& pt) {
        pt.put("<xmlattr>.tCount", v.count);
        pt.put("<xmlattr>.tformat", v.format);
        pt.put("<xmlattr>.ttimeout", v.timeout);
    }

    void serialize(Domain::TData const& v, ptree& pt) {
        pt.put("Tvalue.<xmlattr>.date", v.date);
        pt.put("Tvalue.<xmlattr>.time", v.time);
        pt.put("Tvalue.<xmlattr>.Ref", v.ref);
    }

    void serialize(Domain::TCustOpts const& v, ptree& pt) {
        for (auto& param : v.params) {
            auto& p = pt.add_child("Param", ptree{});
            p.put("<xmlattr>.name", param.first);
            p.put("<xmlattr>.value", param.second);
        }
    }

    void serialize(Domain::Txn const& v, ptree& pt) {
        auto& txn = pt.add_child("Txn", ptree{});
        txn.put("<xmlattr>.ver", v.version);
        serialize(v.opts,        txn.add_child("TOpts", ptree{}));
        serialize(v.data,        txn.add_child("TData", ptree{}));
        serialize(v.custom_opts, txn.add_child("TCustOpts", ptree{}));
    }
}

namespace { // for debug/demo only
    std::ostream& operator<<(std::ostream& os, Domain::Txn const& v) {
        ptree tmp;
        Writing::serialize(v, tmp);
        write_xml(os, tmp, boost::property_tree::xml_writer_make_settings<std::string>(' ', 4));
        return os;
    }
}

int main() {
    boost::property_tree::ptree pt;
    {
        extern char const* xml;
        std::stringstream ss(xml);
        read_xml(ss, pt);
    }

    Domain::Txn transaction;
    Parsing::parse(transaction, pt.get_child("Txn"));

    std::cout << transaction; // complete roundtrip
}

char const* xml = R"(<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<Txn ver="1.0">
    <TOpts tCount="1"  tformat="0"  ttimeout="10" />
    <TData>
        <Tvalue date="YYMMDD" time="HHMM" Ref="100"/>
    </TData>
    <TCustOpts>
        <Param name="SALE" value="xyz" />
    </TCustOpts>
</Txn>
)";

打印:

<?xml version="1.0" encoding="utf-8"?>
<Txn ver="1.0">
    <TOpts tCount="1" tformat="0" ttimeout="10"/>
    <TData>
        <Tvalue date="YYMMDD" time="HHMM"/>
    </TData>
    <TCustOpts>
        <Param name="SALE" value="xyz"/>
    </TCustOpts>
</Txn>

关于c++ - boost::property_tree : 解析复杂的xml结构,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/47013164/

相关文章:

java - 无法将名称解析为 (n) 'type definition' 组件

Java XML : parsing nested XML file with identical tags

c++ - 有效删除 C++ STL vector 中的双条目?

C++ 与 Dll 通信出错

c++ - 使用带有基类的模板作为参数

c++ - 给定一个大小为 N*N 的矩阵。我们需要找出特定字符串的位置数

c++ - 控制 map 中元素的顺序

c# - 在 C# 中将大型 XML 文件切割成较小的部分

c++ - 禁用 boost 的 `progress_display` 输出

c++ - 从标准 C++ 代码访问 iPhone 的沙箱文档文件夹