c++ - 扩展现有的 Spirit 语法(AST 和船长的问题)

标签 c++ boost-spirit

我正在尝试使用 boost-spirit 解析以下文本,但到目前为止没有成功,请参阅下面的评论

Output_A :=
    LE600ms.{
    (LE1s.{FE1s.{Signal1}} AND
    LE1s.{FE1s.{Signal2}})
    OR
    (LE3ms.{FE2ms.{NOT.Signal3}} AND
    LE3ms.{FE2ms.{Signal4}})};
  • LE[xy]s = x ysecs 的前沿,其中 y = m,n(毫、纳米)
  • FE[xy]s = x ysecs 的下降沿,其中 y = m,n(毫、纳米)
  • Output_A,Signal1/2/3/4 - 标识符

我试过遵守规则 (有关声明,请参阅随附的来源)

start   =  (ident_ >> ":=" >  expr_)  
           [ _val = phx::construct<binop<op_equ> >(_1, _2) ] 
          ;
expr_   = tok_.alias();
tok_    = ( not_   >> binop_ >> tok_ ) 
          [ _val = phx::bind(make_binop, qi::_2, qi::_1, qi::_3) ] 
          | not_   [ _val = _1 ] ;

not_    = "NOT."  >> simple [ _val = phx::construct<unop <op_not> >(_1) ]        
          | simple [ _val = _1 ] ;


// Not sure how to use something like following rule for 
// final expression
// Here I need the edge type LE or FE and duration, 
// so something like binary operator
tdelay_ = lexeme[ ( lit("LE") | lit("FE") ) 
                 > double_ > *char_("mn") > "s." ] ; // ???

simple   =   ('('  > expr_ > ')') 
           | ('['  > expr_ > ']') 
           | ('{'  > expr_ > '}') 
           | ident_ 
           | lexeme[double_] ;

ident_   =  lexeme[ char_("a-zA-Z_") >> *char_("a-zA-Z_0-9")];

此外,

是否可以在 lexeme 中使用规则,例如 我如何使用 ident_ 检查它的属性 say

Output_A.status := Signal_4;

所以这样的规则:

lexeme[ident_ >> '.' "status" ] > ":=" ident_ ; 

有可能吗?

  • 我的 source .

  • 使用 Boost-1.55.0

最佳答案

对于各种“其他问题”:

  1. 我认为您错过了 not 规则中括号的平衡:

    not_    =  ("NOT."  >> simple [ _val = phx::construct<unop <op_not> >(_1) ]
               | simple [ _val = _1 ]  )
               | (tdelay_  >> simple [ _val = phx::construct<unop <op_not> >(_1) ]
               | simple [ _val = _1 ]  )
               ;
    

    归结为

    not_    = ("NOT."  >> simple | simple)
            | (tdelay_ >> simple | simple)
            ;
    

    这样写好像比较乱

    not_    = "NOT."   >> simple
            | simple
            | tdelay_  >> simple
            ;
    

    现在您可能会发现问题:simple 可能是 ident_ident_ 匹配 tdelay_ 之类的东西(直到结束 .)。所以我们拥有的是一个成功解析的 _ident(例如 LE600ms),但随后... unexpected .这正是报告的内容:

    Expectation Failure at '.{ ....'
    

    所以,有两种方法可以解决问题:

    • 要么让 ident_ 忽略可能被解释为 tdelay_ 的表达式:

       simple   = ('(' > expr_ > ')')
                | ('[' > expr_ > ']')
                | ('{' > expr_ > '}')
                | lexeme[double_]
                | (!tdelay_ >> ident_) // HERE
                ;
      

      虽然这可能会导致相当大的回溯,所以

    • 或者,您可以在 not_ 中“修复”解析器分支的首选顺序:`

       not_    = ("NOT."  >> simple [ _val = phx::construct<unop <op_not> >(_1) ])
               | (tdelay_ >> simple [ _val = phx::construct<unop <op_not> >(_1) ])
               | simple [ _val = _1 ]
               ;
      

      这是我的建议

  2. 您对 prop_ident_ 的“整合”是……笨拙。而不是

    start   =  (ident_ >> ":=" >  expr_)  [ _val = phx::construct<binop<op_equ> >(_1, _2) ]
              | prop_ident_ // [ _val = _2 ]
              ;
    // ~20 lines of grammar skipped
    prop_ident_ =  lexeme[ char_("a-zA-Z_") >> *char_("a-zA-Z_0-9")
                                           >>  prop_
                                           > ":="
                                           >  char_("a-zA-Z_") >> *char_("a-zA-Z_0-9")
                             ]
                             //[ _val = _2 ]
                             ;
    

    您应该按照您描述的方式考虑表达您的语法:

    program_    = *statement_;
    statement_  = (signal_def_ | prop_assgn_) >> ';';
    
    signal_def_ = ident_ >> ":=" >>  expr_;
    prop_assgn_ = ident_ >> lexeme ['.' >> raw [ prop_ ]] >> ":=" >>  ident_;
    

    哇!?所有的语义 Action 都去哪儿了?好吧,我发明了一些最简单的 AST 类型来反射(reflect)实际解析的结构:

    qi::rule<It, signal_definition(),  Skipper> signal_def_;
    qi::rule<It, property_assignment(),Skipper> prop_assgn_;
    qi::rule<It, statement(),          Skipper> statement_;
    qi::rule<It, program(),            Skipper> program_;
    

    还有实际的类型?呜呜呜:

    struct signal_definition   { std::string name; expr value; };
    struct property_assignment { std::string signal, property, value_ident; };
    
    BOOST_FUSION_ADAPT_STRUCT(signal_definition, (std::string, name)(expr, value))
    BOOST_FUSION_ADAPT_STRUCT(property_assignment, (std::string, signal)(std::string, property)(std::string, value_ident))
    
    typedef boost::variant<signal_definition, property_assignment> statement;
    typedef std::vector<statement> program;
    

    查看此转换的结果 Live On Coliru

奖励:完成 AST

现在如果我们也“修复”延迟表达式以与我们的 AST 集成,我们可以

  • 语法更易读

    program_    = *statement_;
    statement_  = (signal_def_ | prop_assgn_) >> ';';
    
    signal_def_ = ident_ >> ":=" >>  expr_;
    prop_assgn_ = ident_ >> lexeme ['.' >> raw [ prop_ ]] >> ":=" >>  ident_;
    
    expr_       = ( not_ >> binop_ >> expr_ )  [ _val = phx::bind(make_binop, qi::_2, qi::_1, qi::_3) ] 
                  | not_ [ _val = _1 ];
    not_        = neg_expr_
                | delay_expr_
                | simple
                ;
    
    neg_expr_   = "NOT."  >> simple [ _val = phx::construct<unop <op_not> >(_1) ];
    
    delay_expr_ = tdelay_ >> expr_;
    
    tdelay_     = raw[edge_] > double_ > raw[unit_];
    
    simple      =   ('(' > expr_ > ')') 
                  | ('[' > expr_ > ']') 
                  | ('{' > expr_ > '}') 
                  | lexeme[double_]
                  | ident_
                  ;
    
    ident_      = char_("a-zA-Z_") >> *char_("a-zA-Z_0-9");
    
  • 在不丢失信息的情况下打印我们的 AST:例如输入:

    Test_Q := LE600ms.Signal12;
    Test_A := Signal1;
    Test_Z := (Signal1);
    Test_B := (Signal1 OR Signal12) AND Signal3;
    Test_A.expire := Signal2;
    
    Output_B :=
        LE600ms.{
        (LE1s.{FE1s.{Signal1}} AND
        LE1s.{FE1s.{Signal2}})
        OR
        (LE3ms.{FE2ms.{NOT.Signal3}} AND
        LE3ms.{FE2ms.{Signal4}})};
    

    产生输出:

    Test_Q := (with LE delay of 600ms. Signal12);
    Test_A := Signal1;
    Test_Z := Signal1;
    Test_B := ((Signal1 | Signal12) & Signal3);
    Test_A.expire := Signal2;
    Output_B := (with LE delay of 600ms. ((with LE delay of 1s. ((with FE delay of 1s. Signal1) & (with LE delay of 1s. (with FE delay of 1s. Signal2)))) | (with LE delay of 3ms. ((with FE delay of 2ms. (!Signal3)) & (with LE delay of 3ms. (with FE delay of 2ms. Signal4))))));
    

请注意,既然语法正确处理了...语法(例如多语句的概念),主要驱动程序也更简单:

static const Skip skip = qi::space | "--" >> *(qi::char_ - qi::eol) >> qi::eol;
static const parser<It, Skip> p;

try
{
    program result;
    bool ok = qi::phrase_parse(f, l, p, skip, result);

    if (!ok) std::cerr << "invalid input\n";
    else     std::cout << result << "\n";
    if (f!=l)
        std::cerr << "remaining unparsed input: '" << std::string(f,l) << "'\n";
}
catch (const qi::expectation_failure<It>& e)
{
    std::cerr << "Expectation Failure at '" << std::string(e.first, e.last) << "'" << std::endl;
}

事实上,整个代码 list 现在更短了,尽管它做了更多的事情。见<强>Live On Coliru 还有!

完整代码 list

完整的代码 list 在 Coliru(上方)和这篇文章中(以备将来使用)

#define BOOST_SPIRIT_USE_PHOENIX_V3

#include <fstream>
#include <boost/fusion/adapted/struct.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/variant/recursive_wrapper.hpp>

namespace qi    = boost::spirit::qi;
namespace phx   = boost::phoenix;

typedef std::string var;
template <typename tag> struct binop;
template <typename tag> struct unop;
struct delayed_expr;

typedef boost::variant<var,double, 
        // Logical Operators
        boost::recursive_wrapper<binop<struct op_equ> >, 
        boost::recursive_wrapper<unop <struct op_not> >, 
        boost::recursive_wrapper<binop<struct op_and> >, 
        boost::recursive_wrapper<binop<struct op_xor> >, 
        boost::recursive_wrapper<binop<struct op_or>  >, 

        // /*Airthemetic Operators*/
        boost::recursive_wrapper<binop<struct op_plus>  >, 
        boost::recursive_wrapper<binop<struct op_minus> >, 
        boost::recursive_wrapper<binop<struct op_mul>   >, 
        boost::recursive_wrapper<binop<struct op_div>   >, 
        boost::recursive_wrapper<binop<struct op_mod>   >, 

        // /*Relational Operators*/
        boost::recursive_wrapper<binop<struct op_gt>  >, 
        boost::recursive_wrapper<binop<struct op_lt>  >, 
        boost::recursive_wrapper<binop<struct op_gte> >, 
        boost::recursive_wrapper<binop<struct op_lte> >, 
        boost::recursive_wrapper<binop<struct op_eq> >,
        boost::recursive_wrapper<binop<struct op_ne> >,

        // tentative stuff
        boost::recursive_wrapper<delayed_expr>
    > expr;

template <typename tag> struct binop 
{ 
    //explicit binop(const expr& l, const std::string& c, const expr& r) : oper1(l), oper2(r), op(c) { }
     explicit binop(const expr& l,  const expr& r) : oper1(l), oper2(r) { }
    expr oper1, oper2; 
    //std::string op;
};

template <typename tag> struct unop  
{ 
    explicit unop(const expr& o) : oper1(o) { }
    expr oper1; 
};

struct signal_definition   {
    std::string name; expr value; 
    friend std::ostream& operator<<(std::ostream& os, signal_definition const& sd) {
        return os << sd.name << " := " << sd.value;
    }
};
struct property_assignment {
    std::string signal, property, value_ident; 
    friend std::ostream& operator<<(std::ostream& os, property_assignment const& pa) {
        return os << pa.signal << '.' << pa.property << " := " << pa.value_ident;
    }
};

struct tdelay {
    std::string edge, unit;
    double amount;
    friend std::ostream& operator<<(std::ostream& os, tdelay const& td) {
        return os << "with " << td.edge << " delay of " << td.amount << td.unit << " ";
    }
};

struct delayed_expr {
    tdelay delay;
    expr e;
};

BOOST_FUSION_ADAPT_STRUCT(signal_definition,   (std::string, name)(expr, value))
BOOST_FUSION_ADAPT_STRUCT(property_assignment, (std::string, signal)(std::string, property)(std::string, value_ident))
BOOST_FUSION_ADAPT_STRUCT(tdelay,              (std::string, edge)(double, amount)(std::string, unit))
BOOST_FUSION_ADAPT_STRUCT(delayed_expr,        (tdelay, delay)(expr, e))

typedef boost::variant<signal_definition, property_assignment> statement;
typedef std::vector<statement> program;

std::ostream& operator<<(std::ostream& os, const expr& e);

struct printer : boost::static_visitor<void>
{
    printer(std::ostream& os) : _os(os) {}
    std::ostream& _os;

    void operator()(const var& v)             const { _os << v;  }
    void operator()(const double& val)        const { _os << val; }

    void operator()(const binop<op_and>& b)   const { print(" & ",  b.oper1, b.oper2); }
    void operator()(const binop<op_or >& b)   const { print(" | ",  b.oper1, b.oper2); }
    void operator()(const binop<op_xor>& b)   const { print(" ^ ",  b.oper1, b.oper2); }
    void operator()(const binop<op_equ>& b)   const { print(" = ",  b.oper1, b.oper2); }

    void operator()(const binop<op_plus>& b)  const { print(" + ",  b.oper1, b.oper2); }
    void operator()(const binop<op_minus>& b) const { print(" - ",  b.oper1, b.oper2); }
    void operator()(const binop<op_mul>& b)   const { print(" * ",  b.oper1, b.oper2); }
    void operator()(const binop<op_div>& b)   const { print(" / ",  b.oper1, b.oper2); }
    void operator()(const binop<op_mod>& b)   const { print(" % ",  b.oper1, b.oper2); }

    void operator()(const binop<op_gt>& b)    const { print(" > ",  b.oper1, b.oper2); }
    void operator()(const binop<op_lt>& b)    const { print(" < ",  b.oper1, b.oper2); }
    void operator()(const binop<op_gte>& b)   const { print(" >= ", b.oper1, b.oper2); }
    void operator()(const binop<op_lte>& b)   const { print(" <= ", b.oper1, b.oper2); }
    void operator()(const binop<op_eq>& b)    const { print(" == ", b.oper1, b.oper2); }
    void operator()(const binop<op_ne>& b)    const { print(" != ", b.oper1, b.oper2); }

    void print(const std::string& op, const expr& l, const expr& r) const
    {
        _os << "(";
            boost::apply_visitor(*this, l);
            _os << op;
            boost::apply_visitor(*this, r);
        _os << ")";
    }

    void operator()(const delayed_expr& u) const
    {
        _os << '(' << u.delay << u. e << ')';
    }

    void operator()(const unop<op_not>& u) const
    {
        _os << "(!";
            boost::apply_visitor(*this, u.oper1);
        _os << ")";
    }
};

std::ostream& operator<<(std::ostream& os, const expr& e)
{ 
    boost::apply_visitor(printer(os), e); 
    return os; 
}

std::ostream& operator<<(std::ostream& os, const program& p)
{ 
    for (auto& stmt : p) os << stmt << ";\n";
    return os; 
}

template <typename It, typename Skipper = qi::space_type>
    struct parser : qi::grammar<It, program(), Skipper>
{
    enum op_token { 
        TOK_PLUS, TOK_MINUS, TOK_DIV, TOK_MULT, TOK_MOD,
        TOK_LT, TOK_LTE, TOK_GT, TOK_GTE,
        TOK_EQ, TOK_NE,TOK_AND,TOK_OR,TOK_XOR
    };

    static expr make_binop(op_token discriminant, const expr& left, const expr& right)
    {
        switch(discriminant)
        {
            case TOK_PLUS:  return binop<op_plus>(left , right); // "+" ,
            case TOK_MINUS: return binop<op_minus>(left, right); // "-" ,
            case TOK_DIV:   return binop<op_div>(left  , right); // "/" ,
            case TOK_MULT:  return binop<op_mul>(left  , right); // "*" ,
            case TOK_MOD:   return binop<op_mod>(left  , right); // "%" ,
            case TOK_LT:    return binop<op_lt>(left   , right); // "<" ,
            case TOK_LTE:   return binop<op_lte>(left  , right); // "<=",
            case TOK_GT:    return binop<op_gt>(left   , right); // ">" ,
            case TOK_GTE:   return binop<op_gte>(left  , right); // ">" ,
            case TOK_EQ:    return binop<op_eq>(left   , right); // ">=",
            case TOK_NE:    return binop<op_ne>(left   , right); // "!" ,
            case TOK_AND:   return binop<op_and>(left  , right);
            case TOK_OR:    return binop<op_or>(left   , right);
            case TOK_XOR:   return binop<op_xor>(left  , right);
        }
        throw std::runtime_error("unreachable in make_binop");
    }

    parser() : parser::base_type(program_)
    {
        using namespace qi;

        program_    = *statement_;
        statement_  = (signal_def_ | prop_assgn_) >> ';';

        signal_def_ = ident_ >> ":=" >>  expr_;
        prop_assgn_ = ident_ >> lexeme ['.' >> raw [ prop_ ]] >> ":=" >>  ident_;

        expr_       = ( not_ >> binop_ >> expr_ )  [ _val = phx::bind(make_binop, qi::_2, qi::_1, qi::_3) ] 
                      | not_ [ _val = _1 ];
        not_        = neg_expr_
                    | delay_expr_
                    | simple
                    ;

        neg_expr_   = "NOT."  >> simple [ _val = phx::construct<unop <op_not> >(_1) ];

        delay_expr_ = tdelay_ >> expr_;

        tdelay_     = raw[edge_] > double_ > raw[unit_];

        simple      =   ('(' > expr_ > ')') 
                      | ('[' > expr_ > ']') 
                      | ('{' > expr_ > '}') 
                      | lexeme[double_]
                      | ident_
                      ;

        ident_      = char_("a-zA-Z_") >> *char_("a-zA-Z_0-9");

        BOOST_SPIRIT_DEBUG_NODES(
                (program_) (signal_def_) (prop_assgn_)
                (expr_) (not_) (neg_expr_) (delay_expr_)
                (simple) (ident_) (tdelay_)
             )

        binop_.add
            ("-",  TOK_MINUS)
            ("+",  TOK_PLUS)
            ("/",  TOK_DIV)
            ("*",  TOK_MULT)
            ("%",  TOK_MOD)
            ("<",  TOK_LT)
            ("<=", TOK_LTE)
            (">",  TOK_GT)
            (">=", TOK_GTE)
            ("==", TOK_EQ)
            ("!=", TOK_NE)
            ("AND", TOK_AND)
            ("OR",  TOK_OR)
            ("XOR", TOK_XOR)
            ;
        prop_.add("status")("expire")("collect")("inhibit");
        edge_.add("LE")("FE");
        unit_.add("ms.")("ns.")("s.");
    }

  private:
    qi::symbols<char, bool>     edge_, prop_, unit_;
    qi::symbols<char, op_token> binop_;

    qi::rule<It, var()> ident_;
    qi::rule<It, tdelay()> tdelay_;
    qi::rule<It, delayed_expr(), Skipper> delay_expr_;
    qi::rule<It, expr(),         Skipper> not_, simple, expr_, neg_expr_;

    qi::rule<It, signal_definition(),  Skipper> signal_def_;
    qi::rule<It, property_assignment(),Skipper> prop_assgn_;
    qi::rule<It, statement(),          Skipper> statement_;
    qi::rule<It, program(),            Skipper> program_;
};

int main()
{
    std::ifstream fin("input.txt");
    std::stringstream buffer;
    buffer << fin.rdbuf();
    std::string input = buffer.str();
    fin.close();

    typedef std::string::const_iterator It;
    typedef qi::rule<It> Skip;
    It f(input.begin()), l(input.end());

    static const Skip skip = qi::space | "--" >> *(qi::char_ - qi::eol) >> qi::eol;
    static const parser<It, Skip> p;

    try
    {
        program result;
        bool ok = qi::phrase_parse(f, l, p, skip, result);

        if (!ok) std::cerr << "invalid input\n";
        else     std::cout << result << "\n";
        if (f!=l)
            std::cerr << "remaining unparsed input: '" << std::string(f,l) << "'\n";
    } 
    catch (const qi::expectation_failure<It>& e)
    {
        std::cerr << "Expectation Failure at '" << std::string(e.first, e.last) << "'" << std::endl;
    }
}

关于c++ - 扩展现有的 Spirit 语法(AST 和船长的问题),我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/21604865/

相关文章:

c++ - 我怎么会 'generate variadic parameters'?

c++ - 我可以使用带有 SDL2 图像扩展名的 SDL-1.2.15 吗?

c++ - 将 cairo 表面直接渲染到 OpenGL 纹理

c++ - Spirit X3 : attribute of alternative parser, 不是 `char` ,而是 `variant<char, char>`

c++ - 使用 boostspirit 解析 html 转义序列

c++ - 在使用宽字符串解析罗马数字时,Boost spirit 库无法正常工作

c++ - 将计算结果保存到常量引用中

android - 如何使用 Oboe(用于 Android 的低延迟音频的 C++ 库)从 DatagramPacket 接收 byte[] 缓冲区?

c++ - boost spirit : how to parse length preceeding byte array?

c++ - 我究竟什么时候可以使用期望运算符?