c++ - boost-spirit 解析器 lex->qi : Getting the "undocumented" on_success mechanism to work

标签 c++ parsing boost-spirit lexer

编辑:我删除了词法分析器,因为它没有与 Qi 完全集成,只是混淆了语法(参见 here)。


on_success 没有很好的记录,我正在尝试将它连接到我的解析器。处理 on_success 的示例处理的是刚刚构建在 qi 上的解析器——即,没有 lex

这就是我尝试介绍结构的方式:

using namespace qi::labels;
qi::on_success(event_entry_,std::cout << _val << _1);

但是它不会编译。我很害怕 lex 的问题。有人能告诉我我做错了什么,然后告诉我所有可用的占位符是什么、类型以及它们代表什么(因为它们没有记录)。

完整文件如下:

#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/home/phoenix/bind/bind_member_variable.hpp>
#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/none.hpp>
#include <boost/cstdint.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <string>
#include <exception>
#include <vector>

namespace lex = boost::spirit::lex;
namespace px = boost::phoenix;
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;


template <typename Lexer>
struct tokens : lex::lexer<Lexer>
{
    tokens()
        : left_curly("\"{\""),
        right_curly("\"}\""),
        left_paren("\"(\""),
        right_paren("\")\""),
        colon(":"),
        scolon(";"),
        namespace_("(?i:namespace)"),
        event("(?i:event)"),
        optional("(?i:optional)"),
        required("(?i:required)"),
        repeated("(?i:repeated)"),
        t_int_4("(?i:int4)"),
        t_int_8("(?i:int8)"),
        t_string("(?i:string)"),
        ordinal("\\d+"),
        identifier("\\w+")

    {
        using boost::spirit::lex::_val;

        this->self
            = 
              left_curly    [ std::cout << px::val("lpar") << std::endl]
            | right_curly   [ std::cout << px::val("rpar") << std::endl]
            | left_paren
            | right_paren
            | colon               [ std::cout << px::val("colon") << std::endl]
            | scolon
            | namespace_          [ std::cout << px::val("kw namesapce") << std::endl]
            | event               [ std::cout << px::val("kw event") << std::endl]
            | optional            [ std::cout << px::val("optional ")  << "-->" << _val << "<--" << std::endl]
            | required            [ std::cout << px::val("required") << std::endl]
            | repeated
            | t_int_4
            | t_int_8
            | t_string
            | ordinal             [ std::cout << px::val("val ordinal (") << _val << ")" << std::endl]
            | identifier          [std::cout << px::val("val identifier(") << _val << ")" << std::endl];


        this->self("WS") =   lex::token_def<>("[ \\t\\n]+");
    }


    lex::token_def<lex::omit> left_curly, right_curly, colon, scolon,repeated, left_paren, right_paren;
    lex::token_def<lex::omit> namespace_, event, optional, required,t_int_4, t_int_8, t_string;
    lex::token_def<boost::uint32_t> ordinal;
    lex::token_def<> identifier;
};

enum event_entry_qualifier
{
    ENTRY_OPTIONAL,
    ENTRY_REQUIRED,
    ENTRY_REPEATED
};

enum entry_type
{
    RBL_INT4,
    RBL_INT8,
    RBL_STRING,
    RBL_EVENT
};

struct oid
{
    boost::uint32_t   ordinal;
    std::string       name;
};

BOOST_FUSION_ADAPT_STRUCT
(
  oid,
  (boost::uint32_t, ordinal)
  (std::string, name)
)

struct type_descriptor
{
    entry_type  type_id;
    std::string referenced_event;
};

BOOST_FUSION_ADAPT_STRUCT
(
  type_descriptor,
  (entry_type, type_id)
  (std::string, referenced_event)
)

struct event_entry
{
    event_entry_qualifier  qualifier;
    oid                   identifier;
    type_descriptor       descriptor;
};

BOOST_FUSION_ADAPT_STRUCT
(
  event_entry,
  (event_entry_qualifier, qualifier)
  (oid, identifier)
  (type_descriptor, descriptor)
)

struct event_descriptor
{
    oid                       identifier;
    std::vector<event_entry>  event_entries;
};

BOOST_FUSION_ADAPT_STRUCT
(
    event_descriptor,
    (oid, identifier)
    (std::vector<event_entry>, event_entries)
)

template <typename Iterator, typename Lexer>
struct grammar : qi::grammar<Iterator,event_descriptor(), qi::in_state_skipper<Lexer> >
{
    template <typename TokenDef>
    grammar(TokenDef const& tok)
      : grammar::base_type(event_descriptor_)
    {
      using qi::_val;
      //start = event;
      event_descriptor_ = tok.event >> oid_ >> tok.left_curly >> *(event_entry_) >> tok.right_curly;

      event_entry_ = event_qualifier >> oid_ >> type_descriptor_ >> tok.scolon;

      event_qualifier = tok.optional [ _val = ENTRY_OPTIONAL]  
                      | tok.required [ _val = ENTRY_REQUIRED]
                      | tok.repeated [ _val = ENTRY_REPEATED];

      oid_  = tok.ordinal 
            >> tok.colon 
            >> tok.identifier;

      type_descriptor_ 
          = (( atomic_type >> qi::attr("")) 
          | ( event_type >> tok.left_paren >> tok.identifier >> tok.right_paren));

      atomic_type = tok.t_int_4         [ _val = RBL_INT4]
              | tok.t_int_8             [ _val = RBL_INT8]
              | tok.t_string            [ _val = RBL_STRING];

      event_type = tok.event            [_val = RBL_EVENT];

      using namespace qi::labels;
      qi::on_success(event_entry_,std::cout << _val << _1);
    }

    qi::rule<Iterator> start;
    qi::rule<Iterator, event_descriptor(), qi::in_state_skipper<Lexer> > event_descriptor_; 
    qi::rule<Iterator, event_entry(), qi::in_state_skipper<Lexer> > event_entry_;
    qi::rule<Iterator, event_entry_qualifier()> event_qualifier;
    qi::rule<Iterator, entry_type()> atomic_type;
    qi::rule<Iterator, entry_type()> event_type;
    qi::rule<Iterator, type_descriptor(),qi::in_state_skipper<Lexer> > type_descriptor_;
    qi::rule<Iterator, oid()> oid_;


};

std::string test = " EVENT 1:sihan { OPTIONAL 123:hassan int4; OPTIONAL 123:hassan int4; } ";

int main()
{
    typedef lex::lexertl::token<std::string::iterator, boost::mpl::vector<boost::uint32_t, std::string> > token_type;
    typedef lex::lexertl::actor_lexer<token_type> lexer_type;
    typedef tokens<lexer_type>::iterator_type iterator_type;

    tokens<lexer_type> token_lexer;
    grammar<iterator_type,tokens<lexer_type>::lexer_def> grammar(token_lexer);

    std::string::iterator it = test.begin();
    iterator_type first = token_lexer.begin(it, test.end());
    iterator_type last = token_lexer.end();

    bool r; 

    r = qi::phrase_parse(first, last, grammar, qi::in_state("WS")[token_lexer.self]);

    if(r)
        ;
    else
    {
        std::cout << "parsing failed" << std::endl;
    }
}

最佳答案

看头文件我觉得占位符的意思是:

_1 = Iterator position when the rule was tried.
_2 = Iterator to the end of the input.
_3 = Iterator position right after the rule has been successfully matched.

(因为我不确定上面的行是否可以理解,这里有一个你输入的小例子)

                                 rule being tried
                        _________________________________
                       ´                                 `
[EVENT][1][:][sihan][{][OPTIONAL][123][:][hassan][int4][;][OPTIONAL][321][:][hassan2][int4][;][}]
                           _1                                 _3                                 _2

正如 GManNickG 在评论中提到的,这些是词法分析器迭代器,您无法使用它们轻松访问原始字符串。 conjure2 example 结合使用词法分析器和 on_error/on_success .为此,它使用了一种特殊的 token ,position_token .此 token 始终可以访问与其自身关联的原始字符串的一对迭代器(当您使用 lex::omit 时,普通 token 会丢失此信息)。 position_token有几个有趣的方法。 matched()返回 iterator_range<OriginalIterator> , 和 begin()end()返回相应的迭代器。

在下面的代码中,我选择创建一个 phoenix::function它采用两个词法分析器迭代器(用 _1 和 _3 调用)并返回一个覆盖它们之间距离的字符串(使用 std::string(begin_iter->begin(), end_iter->begin()) )。

我发现的一个问题是空格处于不同状态这一事实导致迭代器 position_token返回无效。我为解决这个问题所做的是将所有内容都置于相同状态,然后简单地使用 lex::_pass = lex::pass_flags::pass_ignore带空格。

最后一个(小)问题是,如果你想使用 std::cout << _val你需要定义 operator<<对于您感兴趣的类型。

PS:我总是使用 BOOST_SPIRIT_USE_PHOENIX_V3,这要求每个 Sprite /凤凰都来自 boost/spirit/include/... .如果出于任何原因,您需要/想要使用 V2,则需要更改 phoenix::function。我也不能使用旧式的 for 循环,所以如果你不能使用 c++11,你将不得不更改 event_descriptor 的 operator<< 的定义。


#define BOOST_SPIRIT_USE_PHOENIX_V3
// #define BOOST_SPIRIT_DEBUG
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/spirit/include/phoenix_bind.hpp> //CHANGED
#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/spirit/include/lex_lexertl_position_token.hpp> //ADDED
#include <boost/none.hpp>
#include <boost/cstdint.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
#include <string>
#include <exception>
#include <vector>

namespace lex = boost::spirit::lex;
namespace px = boost::phoenix;
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;


template <typename Lexer>
struct tokens : lex::lexer<Lexer>
{
    tokens()
        : left_curly("\"{\""),
        right_curly("\"}\""),
        left_paren("\"(\""),
        right_paren("\")\""),
        colon(":"),
        scolon(";"),
        namespace_("(?i:namespace)"),
        event("(?i:event)"),
        optional("(?i:optional)"),
        required("(?i:required)"),
        repeated("(?i:repeated)"),
        t_int_4("(?i:int4)"),
        t_int_8("(?i:int8)"),
        t_string("(?i:string)"),
        ordinal("\\d+"),
        identifier("\\w+")

    {
        using boost::spirit::lex::_val;

        this->self
            = 
              left_curly    //[ std::cout << px::val("lpar") << std::endl]
            | right_curly   //[ std::cout << px::val("rpar") << std::endl]
            | left_paren
            | right_paren
            | colon               //[ std::cout << px::val("colon") << std::endl]
            | scolon
            | namespace_         // [ std::cout << px::val("kw namesapce") << std::endl]
            | event              // [ std::cout << px::val("kw event") << std::endl]
            | optional            //[ std::cout << px::val("optional ")  << "-->" << _val << "<--" << std::endl]
            | required            //[ std::cout << px::val("required") << std::endl]
            | repeated
            | t_int_4
            | t_int_8
            | t_string
            | ordinal             //[ std::cout << px::val("val ordinal (") << _val << ")" << std::endl]
            | identifier          //[std::cout << px::val("val identifier(") << _val << ")" << std::endl]
            | lex::token_def<>("[ \\t\\n]+")   [lex::_pass = lex::pass_flags::pass_ignore] //CHANGED
            ;
    }


    lex::token_def<lex::omit> left_curly, right_curly, left_paren, right_paren, colon, scolon;
    lex::token_def<lex::omit> namespace_, event, optional, required, repeated, t_int_4, t_int_8, t_string;
    lex::token_def<boost::uint32_t> ordinal;
    lex::token_def<> identifier;
};

enum event_entry_qualifier
{
    ENTRY_OPTIONAL,
    ENTRY_REQUIRED,
    ENTRY_REPEATED
};

enum entry_type
{
    RBL_INT4,
    RBL_INT8,
    RBL_STRING,
    RBL_EVENT
};

struct oid
{
    boost::uint32_t   ordinal;
    std::string       name;
};

BOOST_FUSION_ADAPT_STRUCT
(
  oid,
  (boost::uint32_t, ordinal)
  (std::string, name)
)

std::ostream& operator<<(std::ostream& os, const oid& val) //ADDED
{
    return os << val.ordinal << "-" << val.name;
}

struct type_descriptor
{
    entry_type  type_id;
    std::string referenced_event;
};

BOOST_FUSION_ADAPT_STRUCT
(
  type_descriptor,
  (entry_type, type_id)
  (std::string, referenced_event)
)

std::ostream& operator<<(std::ostream& os, const type_descriptor& val)  //ADDED
{
    return os << val.type_id << "-" << val.referenced_event;
}

struct event_entry
{
    event_entry_qualifier  qualifier;
    oid                   identifier;
    type_descriptor       descriptor;
};


BOOST_FUSION_ADAPT_STRUCT
(
  event_entry,
  (event_entry_qualifier, qualifier)
  (oid, identifier)
  (type_descriptor, descriptor)
)

std::ostream& operator<<(std::ostream& os, const event_entry& val)  //ADDED
{
    return os << val.qualifier << "-" << val.identifier << "-" << val.descriptor;
}

struct event_descriptor
{
    oid                       identifier;
    std::vector<event_entry>  event_entries;
};



BOOST_FUSION_ADAPT_STRUCT
(
    event_descriptor,
    (oid, identifier)
    (std::vector<event_entry>, event_entries)
)

std::ostream& operator<<(std::ostream& os, const event_descriptor& val)  //ADDED
{
    os << val.identifier << "[";
    for(const auto& entry: val.event_entries)   //C++11
        os << entry;
    os << "]";
    return os;
}

struct build_string_impl        //ADDED
{
    template <typename Sig>
    struct result;
    template <typename This, typename Iter1, typename Iter2>
    struct result<This(Iter1,Iter2)>
    {
        typedef std::string type;
    };

    template <typename Iter1, typename Iter2>
    std::string operator()(Iter1 begin, Iter2 end) const
    {
        return std::string(begin->begin(),end->begin());
    }
};

px::function<build_string_impl> build_string;

template <typename Iterator, typename Lexer>
struct grammar : qi::grammar<Iterator,event_descriptor() >
{
    template <typename TokenDef>
    grammar(TokenDef const& tok)
      : grammar::base_type(event_descriptor_)
    {
      using qi::_val;
      //start = event;
      event_descriptor_ = tok.event >> oid_ >> tok.left_curly >> *(event_entry_) >> tok.right_curly;

      event_entry_ = event_qualifier >> oid_ >> type_descriptor_ >> tok.scolon;

      event_qualifier = tok.optional [ _val = ENTRY_OPTIONAL]  
                      | tok.required [ _val = ENTRY_REQUIRED]
                      | tok.repeated [ _val = ENTRY_REPEATED];

      oid_  = tok.ordinal 
            >> tok.colon 
            >> tok.identifier;

      type_descriptor_ 
          = (( atomic_type >> qi::attr("")) 
          | ( event_type >> tok.left_paren >> tok.identifier >> tok.right_paren));

      atomic_type = tok.t_int_4         [ _val = RBL_INT4]
              | tok.t_int_8             [ _val = RBL_INT8]
              | tok.t_string            [ _val = RBL_STRING];

      event_type = tok.event            [_val = RBL_EVENT];

      using namespace qi::labels;
      qi::on_success(event_entry_,std::cout << _val << " " << build_string(_1,_3) << std::endl); //CHANGED
      // BOOST_SPIRIT_DEBUG_NODES( (event_descriptor_)(event_entry_)(event_qualifier)(oid_)(type_descriptor_)(atomic_type)(event_type) );

    }

    qi::rule<Iterator> start;
    qi::rule<Iterator, event_descriptor()> event_descriptor_; 
    qi::rule<Iterator, event_entry()> event_entry_;
    qi::rule<Iterator, event_entry_qualifier()> event_qualifier;
    qi::rule<Iterator, entry_type()> atomic_type;
    qi::rule<Iterator, entry_type()> event_type;
    qi::rule<Iterator, type_descriptor()> type_descriptor_;
    qi::rule<Iterator, oid()> oid_;


};

std::string test = " EVENT 1:sihan { OPTIONAL 123:hassan int4; OPTIONAL 321:hassan2 int4; } ";

int main()
{
    typedef lex::lexertl::position_token<std::string::iterator, boost::mpl::vector<boost::uint32_t, std::string> > token_type; //CHANGED
    typedef lex::lexertl::actor_lexer<token_type> lexer_type;
    typedef tokens<lexer_type>::iterator_type iterator_type;

    tokens<lexer_type> token_lexer;
    grammar<iterator_type,tokens<lexer_type>::lexer_def> grammar(token_lexer);

    std::string::iterator it = test.begin();
    iterator_type first = token_lexer.begin(it, test.end());
    iterator_type last = token_lexer.end();

    bool r; 

    r = qi::parse(first, last, grammar); //CHANGED

    if(r)
        ;
    else
    {
        std::cout << "parsing failed" << std::endl;
    }
}

关于c++ - boost-spirit 解析器 lex->qi : Getting the "undocumented" on_success mechanism to work,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/19203490/

相关文章:

c++ - 如何搜索和删除字符串中的字符

c++ - 带有定时器 C++ 成员函数的 std::function

ruby-on-rails - 在 Ruby 中指定和执行规则

parsing - 使用解析器组合器的原因是什么?

c++ - 在语法构造函数中评估语义 Action (或不?)

parsing - Boost Spirit是否可以处理类似Postscript/PDF的语言?

c++ - C++中的依赖倒置(根据S.O.L.I.D原则)

c++ - 用于获取 C++ 中所有类型和集合的调试字符串的通用函数

java - 使用 com.fasterxml.jackson 而不是 org.json 解析 Json

c++ - 如何在现代 C++ 中解析文本数据文件?