boost - 为什么在 boost Spirit 中使用流会对性能造成如此大的影响?

标签 boost boost-spirit

我准备了一个小型基准程序来测量不同的解析方式。问题在于,当使用流和自定义函数将日期存储为 time_t + double 时,性能会大幅下降。

std::string 奇怪的 boost spirit 特征是因为查找回溯会用非匹配行的所有公共(public)部分填充变量字符串,直到找到匹配的行。

对源代码质量感到抱歉(复制/粘贴、错误的变量名称、弱缩进......)。我知道这个基准代码不会包含在《Clean Code》书中,所以请忽略这个事实,让我们专注于这个主题。

我知道最快的方法是使用字符串而不回溯,但是流的时间增量确实很奇怪。有人可以解释一下发生了什么事吗?

#include <boost/fusion/adapted/struct.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/spirit/repository/include/qi_seek.hpp>
#include <boost/chrono/chrono.hpp>
#include <iomanip>
#include <ctime>

typedef std::string::const_iterator It;

namespace structs {
    struct Timestamp {
        std::time_t date;
        double ms;

        friend std::istream& operator>> (std::istream& stream, Timestamp& time)
        {
            struct std::tm tm;

            if (stream >> std::get_time(&tm, "%Y-%b-%d %H:%M:%S") >> time.ms)
                time.date = std::mktime(&tm);

            return stream;
        }
    };

    struct Record1 {
        std::string date;
        double time;
        std::string str;
    };

    struct Record2 {
        Timestamp date;
        double time;
        std::string str;
    };

    typedef std::vector<Record1> Records1;
    typedef std::vector<Record2> Records2;
}

BOOST_FUSION_ADAPT_STRUCT(structs::Record1,
        (std::string, date)
        (double, time)
        (std::string, str))

BOOST_FUSION_ADAPT_STRUCT(structs::Record2,
        (structs::Timestamp, date)
        (double, time)
        (std::string, str))

namespace boost { namespace spirit { namespace traits {
    template <typename It>
    struct assign_to_attribute_from_iterators<std::string, It, void> {
        static inline void call(It f, It l, std::string& attr) {
            attr = std::string(&*f, std::distance(f,l));
        }
    };
} } }

namespace qi = boost::spirit::qi;

namespace QiParsers {
    template <typename It>
    struct Parser1 : qi::grammar<It, structs::Record1()>
    {
        Parser1() : Parser1::base_type(start) {
            using namespace qi;

            start = '[' >> raw[*~char_(']')] >> ']'
                >> " - " >> double_ >> " s"
                >> " => String: "  >> raw[+graph]
                >> eol;
        }

    private:
        qi::rule<It, structs::Record1()> start;
    };

    template <typename It>
    struct Parser2 : qi::grammar<It, structs::Record2()>
    {
        Parser2() : Parser2::base_type(start) {
            using namespace qi;

            start = '[' >> stream >> ']'
                >> " - " >> double_ >> " s"
                >> " => String: "  >> raw[+graph]
                >> eol;
        }

    private:
        qi::rule<It, structs::Record2()> start;
    };

    template <typename It>
    struct Parser3 : qi::grammar<It, structs::Records1()>
    {
        Parser3() : Parser3::base_type(start) {
            using namespace qi;
            using boost::phoenix::push_back;

            line = '[' >> raw[*~char_(']')] >> ']'
                >> " - " >> double_ >> " s"
                >> " => String: "  >> raw[+graph];

            ignore = *~char_("\r\n");

            start = (line[push_back(_val, _1)] | ignore) % eol;
        }

    private:
        qi::rule<It> ignore;
        qi::rule<It, structs::Record1()> line;
        qi::rule<It, structs::Records1()> start;
    };

    template <typename It>
    struct Parser4 : qi::grammar<It, structs::Records2()>
    {
        Parser4() : Parser4::base_type(start) {
            using namespace qi;
            using boost::phoenix::push_back;

            line = '[' >> stream >> ']'
                >> " - " >> double_ >> " s"
                >> " => String: "  >> raw[+graph];

            ignore = *~char_("\r\n");

            start = (line[push_back(_val, _1)] | ignore) % eol;
        }

    private:
        qi::rule<It> ignore;
        qi::rule<It, structs::Record2()> line;
        qi::rule<It, structs::Records2()> start;
    };
}

template<typename Parser, typename Container>
Container parse_seek(It b, It e, const std::string& message)
{
    static const Parser parser;

    Container records;

    boost::chrono::high_resolution_clock::time_point t0 = boost::chrono::high_resolution_clock::now();
    parse(b, e, *boost::spirit::repository::qi::seek[parser], records);
    boost::chrono::high_resolution_clock::time_point t1 = boost::chrono::high_resolution_clock::now();

    auto elapsed = boost::chrono::duration_cast<boost::chrono::milliseconds>(t1 - t0);
    std::cout << "Elapsed time: " << elapsed.count() << " ms (" << message << ")\n";

    return records;
}

template<typename Parser, typename Container>
Container parse_ignoring(It b, It e, const std::string& message)
{
    static const Parser parser;

    Container records;

    boost::chrono::high_resolution_clock::time_point t0 = boost::chrono::high_resolution_clock::now();
    parse(b, e, parser, records);
    boost::chrono::high_resolution_clock::time_point t1 = boost::chrono::high_resolution_clock::now();

    auto elapsed = boost::chrono::duration_cast<boost::chrono::milliseconds>(t1 - t0);
    std::cout << "Elapsed time: " << elapsed.count() << " ms (" << message << ")\n";

    return records;
}

static const std::string input1 = "[2018-Mar-01 00:00:00.000000] - 1.000 s => String: Valid_string\n";
static const std::string input2 = "[2018-Mar-02 00:00:00.000000] - 2.000 s => I dont care\n";
static std::string input("");

int main() {
    const int N1 = 10;
    const int N2 = 100000;

    input.reserve(N1 * (input1.size() + N2*input2.size()));

    for (int i = N1; i--;)
    {
        input += input1;

        for (int j = N2; j--;)
            input += input2;
    }

    const auto records1 = parse_seek<QiParsers::Parser1<It>, structs::Records1>(input.begin(), input.end(), "std::string + seek");
    const auto records2 = parse_seek<QiParsers::Parser2<It>, structs::Records2>(input.begin(), input.end(), "stream + seek");

    const auto records3 = parse_ignoring<QiParsers::Parser3<It>, structs::Records1>(input.begin(), input.end(), "std::string + ignoring");
    const auto records4 = parse_ignoring<QiParsers::Parser4<It>, structs::Records2>(input.begin(), input.end(), "stream + ignoring");

    return 0;
}

控制台中的结果是:

Elapsed time: 1445 ms (std::string + seek)
Elapsed time: 21519 ms (stream + seek)
Elapsed time: 860 ms (std::string + ignoring)
Elapsed time: 19046 ms (stream + ignoring)

最佳答案

好的,在发布的代码中,70%1 的时间花费在流的下溢操作上。

没有研究/为什么/,而是²写了一些简单的实现来看看我是否可以做得更好。第一步:

² Update I've since analyzed it and provided a PR.

The improvement created by that PR does not affect the bottom line in this particular case (see SUMMARY)

  • 下降operator>>对于 Timestamp (我们不会使用它)
  • 替换 '[' >> stream >> ']' 的所有实例与替代方案'[' >> raw[*~char_(']')] >> ']'这样我们将始终使用该特征将迭代器范围转换为属性类型( std::stringTimestamp )

现在,我们实现 assign_to_attribute_from_iterators<structs::Timestamp, It>特质:

变体 1:数组源

template <typename It>
struct assign_to_attribute_from_iterators<structs::Timestamp, It, void> {
    static inline void call(It f, It l, structs::Timestamp& time) {
        boost::iostreams::stream<boost::iostreams::array_source> stream(f, l);

        struct std::tm tm;
        if (stream >> std::get_time(&tm, "%Y-%b-%d %H:%M:%S") >> time.ms)
            time.date = std::mktime(&tm);
        else throw "Parse failure";
    }
};

使用 callgrind 进行分析: (点击放大)

它确实有相当大的改进,可能是因为我们假设底层的字符缓冲区是连续的,而 Spirit 实现无法做出该假设。我们大约 42% 的时间都花在 time_get 上。 .

粗略地说,25% 的时间花在了语言环境上,其中令人担忧的 ~20% 花在了动态转换上:(

变体 2:可重用的数组源

相同,但重用静态流实例来查看是否有显着差异:

static boost::iostreams::stream<boost::iostreams::array_source> s_stream;

template <typename It>
struct assign_to_attribute_from_iterators<structs::Timestamp, It, void> {
    static inline void call(It f, It l, structs::Timestamp& time) {
        struct std::tm tm;

        if (s_stream.is_open()) s_stream.close();
        s_stream.clear();
        boost::iostreams::array_source as(f, l);
        s_stream.open(as);

        if (s_stream >> std::get_time(&tm, "%Y-%b-%d %H:%M:%S") >> time.ms)
            time.date = std::mktime(&tm);
        else throw "Parse failure";
    }
};

分析显示没有显着差异)。

变体 3:strptimestrtod/from_chars

让我们看看降到 C 级是否会减少语言环境的伤害:

template <typename It>
struct assign_to_attribute_from_iterators<structs::Timestamp, It, void> {
    static inline void call(It f, It l, structs::Timestamp& time) {
        struct std::tm tm;
        auto remain = strptime(&*f, "%Y-%b-%d %H:%M:%S", &tm);
        time.date = std::mktime(&tm);

    #if __has_include(<charconv>) || __cpp_lib_to_chars >= 201611
        auto result = std::from_chars(&*f, &*l, time.ms); // using <charconv> from c++17
    #else
        char* end;
        time.ms = std::strtod(remain, &end);

        assert(end > remain);
        static_cast<void>(l); // unused
    #endif
    }
};

As you can see, using strtod is a bit suboptimal here. The input range is bounded, but there's no way to tell strtod about that. I have not been able to profile the from_chars approach, which is strictly safer because it doesn't have this issue.

In practice for your sample code it is safe to use strtod because we know the input buffer is NUL-terminated.

在这里您可以看到解析日期时间仍然是一个值得关注的因素:

  • mktime 15.58 %
  • strptime 40.54 %
  • strtod 5.88%

但总而言之,现在差异不再那么严重了:

  • 解析器1:14.17 %
  • 解析器2:43.44 %
  • 解析器3:5.69%
  • 解析器4:35.49%

变体 4:再次 boost 日期时间

有趣的是,“低级”C-API 的性能与使用更高级的 Boost posix_time::ptime 相差不远。功能:

template <typename It>
struct assign_to_attribute_from_iterators<structs::Timestamp, It, void> {
    static inline void call(It f, It l, structs::Timestamp& time) {
        time.date = to_time_t(boost::posix_time::time_from_string(std::string(f,l)));
    }
};

This might sacrifice some precision, according to the docs:

enter image description here

这里,解析日期和时间所花费的总时间是 68%。解析器的相对速度接近最后一个:

  • 解析器1:12.33 %
  • 解析器2:43.86%
  • 解析器3:5.22%
  • 解析器4:37.43%

摘要

总而言之,即使您有分配更多字符串的风险,存储字符串似乎也更快。我做了一个非常简单的检查,看看这是否可以下降到 SSO通过增加子字符串的长度:

static const std::string input1 = "[2018-Mar-01 00:01:02.012345 THWARTING THE SMALL STRING OPTIMIZATION HERE THIS WON'T FIT, NO DOUBT] - 1.000 s => String: Valid_string\n";
static const std::string input2 = "[2018-Mar-02 00:01:02.012345 THWARTING THE SMALL STRING OPTIMIZATION HERE THIS WON'T FIT, NO DOUBT] - 2.000 s => I dont care\n";

没有重大影响,因此剩下解析本身。

很明显,要么您想要延迟解析时间( Parser3 是迄今为止最快的),要么应该使用耗时考验的 Boost posix_time功能。

列表

这是我使用的组合基准代码。有一些事情发生了变化:

  • 添加了一些健全性检查输出(以避免测试无意义的代码)
  • 使迭代器变得通用(更改为 char* 对优化构建中的性能没有显着影响)
  • 以上变体都可以通过更改 #if 1 在代码中手动切换至#if 0在正确的地方
  • 为了方便起见,减少了 N1/N2

我大量使用了 C++14,因为代码的目的是寻找瓶颈。在分析之后,获得的任何智慧都可以相对轻松地向后移植。

Live On Coliru

#include <boost/fusion/adapted/struct.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/spirit/repository/include/qi_seek.hpp>
#include <boost/date_time/posix_time/posix_time.hpp>
#include <boost/chrono/chrono.hpp>
#include <iomanip>
#include <ctime>
#if __has_include(<charconv>) || __cpp_lib_to_chars >= 201611
#    include <charconv> // not supported yet until GCC 8
#endif

namespace structs {
    struct Timestamp {
        std::time_t date;
        double ms;
    };

    struct Record1 {
        std::string date;
        double time;
        std::string str;
    };

    struct Record2 {
        Timestamp date;
        double time;
        std::string str;
    };

    typedef std::vector<Record1> Records1;
    typedef std::vector<Record2> Records2;
}

BOOST_FUSION_ADAPT_STRUCT(structs::Record1,
        (std::string, date)
        (double, time)
        (std::string, str))

BOOST_FUSION_ADAPT_STRUCT(structs::Record2,
        (structs::Timestamp, date)
        (double, time)
        (std::string, str))

namespace boost { namespace spirit { namespace traits {
    template <typename It>
    struct assign_to_attribute_from_iterators<std::string, It, void> {
        static inline void call(It f, It l, std::string& attr) {
            attr = std::string(&*f, std::distance(f,l));
        }
    };

    static boost::iostreams::stream<boost::iostreams::array_source> s_stream;

    template <typename It>
    struct assign_to_attribute_from_iterators<structs::Timestamp, It, void> {
        static inline void call(It f, It l, structs::Timestamp& time) {
#if 1
            time.date = to_time_t(boost::posix_time::time_from_string(std::string(f,l)));
#elif 1
            struct std::tm tm;
            boost::iostreams::stream<boost::iostreams::array_source> stream(f, l);

            if (stream >> std::get_time(&tm, "%Y-%b-%d %H:%M:%S") >> time.ms)
                time.date = std::mktime(&tm);
            else
                throw "Parse failure";
#elif 1
            struct std::tm tm;
            if (s_stream.is_open()) s_stream.close();
            s_stream.clear();
            boost::iostreams::array_source as(f, l);
            s_stream.open(as);

            if (s_stream >> std::get_time(&tm, "%Y-%b-%d %H:%M:%S") >> time.ms)
                time.date = std::mktime(&tm);
            else
                throw "Parse failure";
#else
            struct std::tm tm;
            auto remain = strptime(&*f, "%Y-%b-%d %H:%M:%S", &tm);
            time.date = std::mktime(&tm);

        #if __has_include(<charconv>) || __cpp_lib_to_chars >= 201611
            auto result = std::from_chars(&*f, &*l, time.ms); // using <charconv> from c++17
        #else
            char* end;
            time.ms = std::strtod(remain, &end);

            assert(end > remain);
            static_cast<void>(l); // unused
        #endif
#endif
        }
    };
} } }

namespace qi = boost::spirit::qi;

namespace QiParsers {
    template <typename It>
    struct Parser1 : qi::grammar<It, structs::Record1()>
    {
        Parser1() : Parser1::base_type(start) {
            using namespace qi;

            start = '[' >> raw[*~char_(']')] >> ']'
                >> " - " >> double_ >> " s"
                >> " => String: "  >> raw[+graph]
                >> eol;
        }

    private:
        qi::rule<It, structs::Record1()> start;
    };

    template <typename It>
    struct Parser2 : qi::grammar<It, structs::Record2()>
    {
        Parser2() : Parser2::base_type(start) {
            using namespace qi;

            start = '[' >> raw[*~char_(']')] >> ']'
                >> " - " >> double_ >> " s"
                >> " => String: "  >> raw[+graph]
                >> eol;
        }

    private:
        qi::rule<It, structs::Record2()> start;
    };

    template <typename It>
    struct Parser3 : qi::grammar<It, structs::Records1()>
    {
        Parser3() : Parser3::base_type(start) {
            using namespace qi;
            using boost::phoenix::push_back;

            line = '[' >> raw[*~char_(']')] >> ']'
                >> " - " >> double_ >> " s"
                >> " => String: "  >> raw[+graph];

            ignore = *~char_("\r\n");

            start = (line[push_back(_val, _1)] | ignore) % eol;
        }

    private:
        qi::rule<It> ignore;
        qi::rule<It, structs::Record1()> line;
        qi::rule<It, structs::Records1()> start;
    };

    template <typename It>
    struct Parser4 : qi::grammar<It, structs::Records2()>
    {
        Parser4() : Parser4::base_type(start) {
            using namespace qi;
            using boost::phoenix::push_back;

            line = '[' >> raw[*~char_(']')] >> ']'
                >> " - " >> double_ >> " s"
                >> " => String: "  >> raw[+graph];

            ignore = *~char_("\r\n");

            start = (line[push_back(_val, _1)] | ignore) % eol;
        }

    private:
        qi::rule<It> ignore;
        qi::rule<It, structs::Record2()> line;
        qi::rule<It, structs::Records2()> start;
    };
}

template <typename Parser> static const Parser s_instance {};

template<template <typename> class Parser, typename Container, typename It>
Container parse_seek(It b, It e, const std::string& message)
{
    Container records;

    auto const t0 = boost::chrono::high_resolution_clock::now();
    parse(b, e, *boost::spirit::repository::qi::seek[s_instance<Parser<It> >], records);
    auto const t1 = boost::chrono::high_resolution_clock::now();

    auto elapsed = boost::chrono::duration_cast<boost::chrono::milliseconds>(t1 - t0);
    std::cout << "Elapsed time: " << elapsed.count() << " ms (" << message << ")\n";

    return records;
}

template<template <typename> class Parser, typename Container, typename It>
Container parse_ignoring(It b, It e, const std::string& message)
{
    Container records;

    auto const t0 = boost::chrono::high_resolution_clock::now();
    parse(b, e, s_instance<Parser<It> >, records);
    auto const t1 = boost::chrono::high_resolution_clock::now();

    auto elapsed = boost::chrono::duration_cast<boost::chrono::milliseconds>(t1 - t0);
    std::cout << "Elapsed time: " << elapsed.count() << " ms (" << message << ")\n";

    return records;
}

static const std::string input1 = "[2018-Mar-01 00:01:02.012345] - 1.000 s => String: Valid_string\n";
static const std::string input2 = "[2018-Mar-02 00:01:02.012345] - 2.000 s => I dont care\n";

std::string prepare_input() {
    std::string input;
    const int N1 = 10;
    const int N2 = 1000;

    input.reserve(N1 * (input1.size() + N2*input2.size()));

    for (int i = N1; i--;) {
        input += input1;
        for (int j = N2; j--;)
            input += input2;
    }

    return input;
}

int main() {
    auto const input = prepare_input();

    auto f = input.data(), l = f + input.length();

    for (auto& r: parse_seek<QiParsers::Parser1, structs::Records1>(f, l, "std::string + seek")) {
        std::cout << r.date << "\n";
        break;
    }
    for (auto& r: parse_seek<QiParsers::Parser2, structs::Records2>(f, l, "stream + seek")) {
        auto tm = *std::localtime(&r.date.date);
        std::cout << std::put_time(&tm, "%Y-%b-%d %H:%M:%S") << "\n";
        break;
    }
    for (auto& r: parse_ignoring<QiParsers::Parser3, structs::Records1>(f, l, "std::string + ignoring")) {
        std::cout << r.date << "\n";
        break;
    }
    for (auto& r: parse_ignoring<QiParsers::Parser4, structs::Records2>(f, l, "stream + ignoring")) {
        auto tm = *std::localtime(&r.date.date);
        std::cout << std::put_time(&tm, "%Y-%b-%d %H:%M:%S") << "\n";
        break;
    }
}

打印类似的内容

Elapsed time: 14 ms (std::string + seek)
2018-Mar-01 00:01:02.012345
Elapsed time: 29 ms (stream + seek)
2018-Mar-01 00:01:02
Elapsed time: 2 ms (std::string + ignoring)
2018-Mar-01 00:01:02.012345
Elapsed time: 22 ms (stream + ignoring)
2018-Mar-01 00:01:02

¹ 所有百分比均与计划成本相关。这确实扭曲了百分比(如果不考虑非流解析器测试,提到的 70% 会更糟),但这些数字足以作为相对比较的指南在测试运行中。

关于boost - 为什么在 boost Spirit 中使用流会对性能造成如此大的影响?,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/49693376/

相关文章:

c++ - 使用 Boost 构建单元测试时出现链接器错误

gcc - boost 文件系统的 Boost 链接错误,为什么?

c++ - 语法分解问题

c++ - 为什么此 Boost.Spirit x3 规则使用尖括号正确解析,但使用引号解析错误?

c++ - 如何从 Boost Spirit X3 词素解析器中获取字符串?

c++ - 使用 boost 检索当前世纪

C++ 使用 boost::ptr_vector 泄漏内存

C++ is_str_empty 谓词

c++ - 我怎样才能让这个想法以 pre 2.x boost spirit 编译?

C++解析比特流