c++ - Boost.Spirit HTTP header 解析器、跟踪

标签 c++ parsing boost boost-spirit

我已经使用 Boost.Spirit 实现了 HTTP header 解析器:

#define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/classic.hpp>
#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/fusion/include/std_pair.hpp>
#include <boost/fusion/include/map.hpp>
#include <boost/fusion/adapted/struct.hpp>
#include <iostream>
#include <string>

using namespace boost;

typedef std::map<std::string, std::string> header_fields_t;
struct HttpRequestHeader
{
    std::string _method;
    std::string _uri;
    std::string _http_version;
    header_fields_t _header_fields;
};

BOOST_FUSION_ADAPT_STRUCT
(
    HttpRequestHeader,
    (std::string, _method)
    (std::string, _uri)
    (std::string, _http_version)
    (header_fields_t, _header_fields)
)

template <typename Iterator>
struct HttpHeaderGrammar: spirit::qi::grammar < Iterator,      
HttpRequestHeader(), spirit::ascii::space_type >
{
    HttpHeaderGrammar() : HttpHeaderGrammar::base_type(http_header,     "HttpHeaderGrammar Grammar")
    {
        auto eol_rule = spirit::qi::no_skip[spirit::qi::lit('\r')] >> spirit::qi::no_skip[spirit::qi::lit('\n')];
        method = spirit::qi::lexeme[+spirit::qi::alpha];
        uri = spirit::qi::lexeme[+(spirit::qi::char_ - spirit::qi::space)];
        http_ver = spirit::lexeme[spirit::qi::lit("HTTP/") >> +(spirit::qi::char_("0-9."))];

        auto field_key = spirit::lexeme[+(spirit::qi::char_("a-zA-Z-") | spirit::qi::digit)];
        auto field_value = spirit::lexeme[+(spirit::qi::char_ - '\r' - '\n')];
        fields = *(field_key >> ':' >> field_value >> eol_rule);
        http_header =
            method >> uri >> http_ver >> eol_rule
            >> fields
            ;
        BOOST_SPIRIT_DEBUG_NODES((method)(uri)(http_ver)(fields))
    }

    spirit::qi::rule<Iterator, std::string(), spirit::ascii::space_type> method;
    spirit::qi::rule<Iterator, std::string(), spirit::ascii::space_type> uri;
    spirit::qi::rule<Iterator, std::string(), spirit::ascii::space_type> http_ver;
    spirit::qi::rule<Iterator, std::map<std::string, std::string>(), spirit::ascii::space_type> fields;
    spirit::qi::rule<Iterator, HttpRequestHeader(),     spirit::ascii::space_type> http_header;
};

int main(int argc, char* argv[])
{
    std::cout << "/////////////////////////////////////////////////////////\n\n";
    std::cout << "Expression parser...\n\n";
    std::cout <<     "/////////////////////////////////////////////////////////\n\n";

    typedef std::string::const_iterator iterator_type;
    HttpHeaderGrammar<iterator_type> httpGrammar;
    BOOST_SPIRIT_DEBUG_NODE(httpGrammar);

    HttpRequestHeader httpHeader;

    std::string str(
        "CONNECT www.tutorialspoint.com HTTP/1.1\r\n"
        "User-Agent: Mozilla/4.0 (compatible; MSIE5.01; Windows NT)\r\n");

    {
        std::string::const_iterator iter = str.begin();
        std::string::const_iterator end = str.end();
        bool r = spirit::qi::phrase_parse(iter, end, httpGrammar,     spirit::ascii::space, httpHeader);

        if (r && iter == end)
        {
            std::cout << "-------------------------\n";
            std::cout << "Parsing succeeded\n";
            std::cout << "-------------------------\n";
        }
        else
        {
            std::string rest(iter, end);
            std::cout << "-------------------------\n";
            std::cout << "Parsing failed\n";
            std::cout << "stopped at: \": " << rest << "\"\n";
            std::cout << "-------------------------\n";
        }
    }

    std::cout << "Bye... :-) \n\n";
    return 0;
}

最后,我遇到了两个问题:

1)在windows上运行时,可以运行并且解析成功。在linux上运行时,解析失败。在调试时,我注意到 http 请求文本保留“...\\r\\n...”,并且看起来 Boost.Spirit 错误地处理了这些符号。我几乎不明白出了什么问题以及如何修复。

2) 我没有看到任何跟踪输出。我在制作 Boost.Spirit 打印跟踪输出时做错了什么?

-谢谢

最佳答案

确实发生了一些事情:

  • 无法将表达式分配给 auto(请参阅 https://stackoverflow.com/a/31294109/2417774)
  • 跳过空格时无法匹配 eol(使用 blank)
  • 无需添加词位,只需删除 skipper ; Boost spirit skipper issues
  • 图形已经是除空格之外的所有字符
  • 考虑使语法更加具体(+alpha 表示动词“招揽问题”)
  • 停止包含 10 年已过时的“经典” header 。

给你: Live On Coliru

#define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/qi.hpp>
#include <boost/fusion/include/std_pair.hpp>
#include <map>
#include <iostream>

typedef std::map<std::string, std::string> header_fields_t;

struct HttpRequestHeader
{
    std::string _method;
    std::string _uri;
    std::string _http_version;
    header_fields_t _header_fields;
};

BOOST_FUSION_ADAPT_STRUCT(HttpRequestHeader, _method, _uri, _http_version, _header_fields)

namespace qi = boost::spirit::qi;

template <typename Iterator, typename Skipper = qi::ascii::blank_type>
struct HttpHeaderGrammar: qi::grammar <Iterator, HttpRequestHeader(), Skipper> {
    HttpHeaderGrammar() : HttpHeaderGrammar::base_type(http_header, "HttpHeaderGrammar Grammar") {
        method        = +qi::alpha;
        uri           = +qi::graph;
        http_ver      = "HTTP/" >> +qi::char_("0-9.");

        field_key     = +qi::char_("0-9a-zA-Z-");
        field_value   = +~qi::char_("\r\n");

        fields = *(field_key >> ':' >> field_value >> qi::lexeme["\r\n"]);

        http_header = method >> uri >> http_ver >> qi::lexeme["\r\n"] >> fields;

        BOOST_SPIRIT_DEBUG_NODES((method)(uri)(http_ver)(fields)(http_header))
    }

  private:
    qi::rule<Iterator, std::map<std::string, std::string>(), Skipper> fields;
    qi::rule<Iterator, HttpRequestHeader(), Skipper> http_header;
    // lexemes
    qi::rule<Iterator, std::string()> method, uri, http_ver;
    qi::rule<Iterator, std::string()> field_key, field_value;
};

int main()
{
    typedef std::string::const_iterator It;
    HttpHeaderGrammar<It> httpGrammar;

    HttpRequestHeader httpHeader;

    std::string str(
        "CONNECT www.tutorialspoint.com HTTP/1.1\r\n"
        "User-Agent: Mozilla/4.0 (compatible; MSIE5.01; Windows NT)\r\n");

    It iter = str.begin(), end = str.end();
    bool r = phrase_parse(iter, end, httpGrammar, qi::ascii::blank, httpHeader);

    if (r && iter == end) {
        std::cout << "Parsing succeeded\n";
    } else {
        std::cout << "Parsing failed\n";
        std::cout << "stopped at: \"" << std::string(iter, end) << "\"\n";
    }

    std::cout << "Bye... :-) \n\n";
}

输出:

Parsing succeeded
Bye... :-) 

调试信息:

<http_header>
  <try>CONNECT www.tutorial</try>
  <method>
    <try>CONNECT www.tutorial</try>
    <success> www.tutorialspoint.</success>
    <attributes>[[C, O, N, N, E, C, T]]</attributes>
  </method>
  <uri>
    <try>www.tutorialspoint.c</try>
    <success> HTTP/1.1\r\nUser-Agen</success>
    <attributes>[[w, w, w, ., t, u, t, o, r, i, a, l, s, p, o, i, n, t, ., c, o, m]]</attributes>
  </uri>
  <http_ver>
    <try>HTTP/1.1\r\nUser-Agent</try>
    <success>\r\nUser-Agent: Mozill</success>
    <attributes>[[1, ., 1]]</attributes>
  </http_ver>
  <fields>
    <try>User-Agent: Mozilla/</try>
    <success></success>
    <attributes>[[[[U, s, e, r, -, A, g, e, n, t], [M, o, z, i, l, l, a, /, 4, ., 0,  , (, c, o, m, p, a, t, i, b, l, e, ;,  , M, S, I, E, 5, ., 0, 1, ;,  , W, i, n, d, o, w, s,  , N, T, )]]]]</attributes>
  </fields>
  <success></success>
  <attributes>[[[C, O, N, N, E, C, T], [w, w, w, ., t, u, t, o, r, i, a, l, s, p, o, i, n, t, ., c, o, m], [1, ., 1], [[[U, s, e, r, -, A, g, e, n, t], [M, o, z, i, l, l, a, /, 4, ., 0,  , (, c, o, m, p, a, t, i, b, l, e, ;,  , M, S, I, E, 5, ., 0, 1, ;,  , W, i, n, d, o, w, s,  , N, T, )]]]]]</attributes>
</http_header>

关于c++ - Boost.Spirit HTTP header 解析器、跟踪,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/36777663/

相关文章:

c++ - Execvp 不执行带参数的 ping 命令

c++ - 将指向类对象的智能指针与类对象混合

使用不同的输入(数组)调用 C++ 相同的函数,如何缩小我的语句?

parsing - 使用 yacc 时,如何告诉 yyparse() 要停止解析?

json - 访问 Express Web 服务器中的 JSON 值

c++ - 有没有办法在使用宏定义的 Boost 测试框架中忽略 SIGCHLD?

c++ - 从 C++ 调用 R 函数

python - 如何使用 python 使 html 解析更具性能

c++ - 使用 boost 检索当前世纪

c++ - boost::multiprecision 比较两个 cpp_int 值