c++ - Boost:仅解析先前声明的变量

标签 c++ parsing boost boost-spirit boost-spirit-qi

我使用来自网络上各种来源的 boost 库拼凑了一个解析器。它有效(虽然不像我希望的那样干净)但我遇到了一个特定的问题。在解析器的第一部分,我首先解析函数名,然后是括号中的一组参数。稍后,在解析实际表达式时,在解析 factor 中我允许解析数字和变量。但是,我只想解析那些先前在 vars 中声明的变量。解析器。这是我的语法:

template<typename Iterator>
  struct exp_parser : qi::grammar<Iterator, expression(), ascii::space_type>
  {
    exp_parser() : exp_parser::base_type(all)
    {
      using qi::_val;
      using qi::_1;
      using qi::char_;
      using qi::double_;
      using qi::lit;
      using phoenix::at_c;
      using phoenix::push_back;
      using phoenix::bind;

      all =
        name [at_c<0>(_val) = _1] >> '(' >> vars [at_c<1>(_val) = _1] >> ')' >> '='
        >> expr [at_c<2>(_val) = _1];

      // Parsing of actual expression
      expr =
          term                            [_val = _1]
          >> *(   ('+' >> term            [_val += _1])
              |   ('-' >> term            [_val -= _1])
            );

      term =
          factor                          [_val = _1]
          >> *(   ('*' >> factor          [_val *= _1])
              |   ('/' >> factor          [_val /= _1])
            );

      factor =
          simple                          [_val = _1]
          |   '(' >> expr                 [_val = _1] >> ')'
          |   ('-' >> factor              [_val = bind(make_unary, UN_OP::MIN, _1)])
          |   ("sin" >> factor            [_val = bind(make_unary, UN_OP::SIN, _1)])
          |   ("cos" >> factor            [_val = bind(make_unary, UN_OP::COS, _1)])
          |   ("tan" >> factor            [_val = bind(make_unary, UN_OP::TAN, _1)])
          |   ('+' >> factor              [_val = _1]);

      // Prototyping of expression
      prtctd %= lit("sin") | lit("cos") | lit("tan");
      var    %= !prtctd >> char_('a','z');
      num    %= double_;
      simple %= var | num | ('(' >> expr >> ')');
      name   %= ((char_('a','z') | char_('A','Z') ) >> *(char_('a','z') | char_('A','Z') | char_('0','9') ));
      vars   %= (char_('a','z') >> *(',' >> char_('a','z')));
    }
    qi::rule<Iterator, ast(), ascii::space_type> expr, term, factor, simple;

    qi::rule<Iterator, expression(), ascii::space_type> all;
    qi::rule<Iterator, std::string(), ascii::space_type> name, prtctd;
    qi::rule<Iterator, std::vector<char>(), ascii::space_type> vars;
    qi::rule<Iterator, char(), ascii::space_type> var;
    qi::rule<Iterator, double(), ascii::space_type> num;
  };

这是我用来存储所有内容的结构:

  struct expression {
    std::string name;
    std::vector<char> arguments;
    ast syntax_tree;
  };

现在,我如何访问 std::vector<char>factor解析器,以便我只解析正确的变量。

此外,我还不熟悉使用 boost 并将其用作自己的练习以开始学习一些知识。如果有人有任何建议,请告诉我如何清理此代码。

提前致谢!

最佳答案

这是 Spirit 中的一个大反模式:

  all =
    name [at_c<0>(_val) = _1] >> '(' >> vars [at_c<1>(_val) = _1] >> ')' >> '='
    >> expr [at_c<2>(_val) = _1];

事实上,我相信您一直在查看的示例显示了更好的方法。另外,我注意到您从相互冲突的方法中选择了代码(当语义操作动态评估表达式值时,您无法合成语法树)。

首先,摆脱语义 Action 思维:Boost Spirit: "Semantic actions are evil"?

BOOST_FUSION_ADAPT_STRUCT(expression, name, arguments, syntax_tree)

all = name >> '(' >> vars >> ')' >> '=' >> expr;

还有很多其他的“病”:

  • prtctd应该是一个词位,所以 si\nn不匹配
  • *(char_('a','z') | char_('A','Z') | char_('0','9') )就是*alnum
  • name 也应该是一个词素,所以简单

    name   = alpha >> *alnum;
    
  • vars甚至不使用 var ?

总而言之,这是这些规则的简化(假设您从 prtctdname 中删除了 skipper ):

  prtctd = lit("sin") | "cos" | "tan";
  var    = !prtctd >> ascii::lower;
  num    = double_;
  simple = var | num | '(' >> expr >> ')';
  name   = ascii::alpha >> *ascii::alnum;
  vars   = var % ',';

一个独立的例子

让我们在上面添加一些模拟部分,并有一些我们可以测试的东西:

#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/fusion/adapted.hpp>

namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
namespace phoenix = boost::phoenix;

struct ast {
    template <typename T> ast& operator+=(T&&) { return *this; }
    template <typename T> ast& operator*=(T&&) { return *this; }
    template <typename T> ast& operator/=(T&&) { return *this; }
    template <typename T> ast& operator-=(T&&) { return *this; }
    ast() = default;
    template <typename T> ast(T&&) { }
    template <typename T> ast& operator =(T&&) { return *this; }

    friend std::ostream& operator<<(std::ostream& os, ast) { return os << "syntax_tree"; }
};

struct expression {
    std::string name;
    std::vector<std::string> arguments;
    ast syntax_tree;

    friend std::ostream& operator<<(std::ostream& os, expression const& e) { 
        os << e.name << "(";
        for (auto arg : e.arguments) os << arg << ", ";
        return os << ") = " << e.syntax_tree;
    }
};

BOOST_FUSION_ADAPT_STRUCT(expression, name, arguments, syntax_tree)

enum UN_OP { MIN, SIN, COS, TAN };

struct make_unary_f {
    template <typename... Ts> qi::unused_type operator()(Ts&&...) const { return qi::unused; }
} static const make_unary = {};

template<typename Iterator>
  struct exp_parser : qi::grammar<Iterator, expression(), ascii::space_type>
  {
    exp_parser() : exp_parser::base_type(all)
    {
      using qi::_val;
      using qi::_1;
      using qi::char_;
      using qi::double_;
      using qi::lit;
      using phoenix::at_c;
      using phoenix::push_back;
      using phoenix::bind;

      all = name >> '(' >> vars >> ')' >> '=' >> expr;

      // Parsing of actual expression
      expr =
          term                   [_val = _1]
          >> *(   ('+' >> term   [_val += _1])
              |   ('-' >> term   [_val -= _1])
            );

      term =
          factor                 [_val = _1]
          >> *(   ('*' >> factor [_val *= _1])
              |   ('/' >> factor [_val /= _1])
            );

      factor =
          simple                 [_val = _1]
          |   '(' >> expr        [_val = _1] >> ')'
          |   ('-' >> factor     [_val = bind(make_unary, UN_OP::MIN, _1)])
          |   ("sin" >> factor   [_val = bind(make_unary, UN_OP::SIN, _1)])
          |   ("cos" >> factor   [_val = bind(make_unary, UN_OP::COS, _1)])
          |   ("tan" >> factor   [_val = bind(make_unary, UN_OP::TAN, _1)])
          |   ('+' >> factor     [_val = _1]);

      // Prototyping of expression
      prtctd = lit("sin") | "cos" | "tan";
      var    = !prtctd >> ascii::lower;
      num    = double_;
      simple = var | num | '(' >> expr >> ')';
      name   = ascii::alpha >> *ascii::alnum;
      vars   = var % ',';
    }

  private:
    qi::rule<Iterator, ast(), ascii::space_type> expr, term, factor, simple;
    qi::rule<Iterator, expression(), ascii::space_type> all;
    qi::rule<Iterator, std::vector<std::string>(), ascii::space_type> vars;

    // lexemes
    qi::rule<Iterator, std::string()> name, prtctd;
    qi::rule<Iterator, std::string()> var;
    qi::rule<Iterator, double()> num;
  };

int main() {
    for (std::string const& input : {
            "",
            "foo (a) = 3*8+a",
            "bar (x, y) = (sin(x) + y*y) / (x + y)",
            "oops (x, y) = (sin(x) + y*y) / (x + a)",
        })
    try {
        using It = std::string::const_iterator;
        It f = input.begin(), l = input.end();

        expression e;
        bool ok = qi::phrase_parse(f, l, exp_parser<It>{} >> qi::eoi, ascii::space, e);

        if (ok) {
            std::cout << "Parse success: '" << input << "' -> " << e << "\n";
        } else {
            std::cout << "Parse failed: '" << input << "'\n";
        }

        if (f != l)
            std::cout << "Remaining unparsed: '" << std::string(f,l) << "'\n";
    } catch(std::exception const& e) {
        std::cout << "Exception: '" << e.what() << "'\n";
    }
}

正如预期的那样,它仍然解析所有非空行,包括 oops错误地使用了 a而不是 y :

Parse failed: ''
Parse success: 'foo (a) = 3*8+a' -> foo(a, ) = syntax_tree
Parse success: 'bar (x, y) = (sin(x) + y*y) / (x + y)' -> bar(x, y, ) = syntax_tree
Parse success: 'oops (x, y) = (sin(x) + y*y) / (x + a)' -> oops(x, y, ) = syntax_tree

声明和检查

为了匹配声明的变量,我会 use qi::symbols<> :

qi::symbols<char> _declared;

simple = _declared | num | '(' >> expr >> ')';

现在,要添加已声明的项目,我们将设计一个 Phoenix 函数,

struct add_declaration_f {
    add_declaration_f(qi::symbols<char>& ref) : _p(std::addressof(ref)) {}
    qi::symbols<char>* _p;
    void operator()(std::string const& arg) const { _p->add(arg); }
};

phoenix::function<add_declaration_f> _declare { _declared };

并使用它:

  vars  %= var [ _declare(_1) ] % ',';

集成演示

Live On Coliru

#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/fusion/adapted.hpp>

namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
namespace phoenix = boost::phoenix;

struct ast {
    template <typename T> ast& operator+=(T&&) { return *this; }
    template <typename T> ast& operator*=(T&&) { return *this; }
    template <typename T> ast& operator/=(T&&) { return *this; }
    template <typename T> ast& operator-=(T&&) { return *this; }
    ast() = default;
    template <typename T> ast(T&&) { }
    template <typename T> ast& operator =(T&&) { return *this; }

    friend std::ostream& operator<<(std::ostream& os, ast) { return os << "syntax_tree"; }
};

struct expression {
    std::string name;
    std::vector<std::string> arguments;
    ast syntax_tree;

    friend std::ostream& operator<<(std::ostream& os, expression const& e) { 
        os << e.name << "(";
        for (auto arg : e.arguments) os << arg << ", ";
        return os << ") = " << e.syntax_tree;
    }
};

BOOST_FUSION_ADAPT_STRUCT(expression, name, arguments, syntax_tree)

enum UN_OP { MIN, SIN, COS, TAN };

struct make_unary_f {
    template <typename... Ts> qi::unused_type operator()(Ts&&...) const { return qi::unused; }
} static const make_unary = {};

template<typename Iterator>
  struct exp_parser : qi::grammar<Iterator, expression(), ascii::space_type>
  {
    exp_parser() : exp_parser::base_type(all)
    {
      using qi::_val;
      using qi::_1;
      using qi::char_;
      using qi::double_;
      using qi::lit;
      using phoenix::at_c;
      using phoenix::push_back;
      using phoenix::bind;

      all = name >> '(' >> vars >> ')' >> '=' >> expr;

      // Parsing of actual expression
      expr =
          term                   [_val = _1]
          >> *(   ('+' >> term   [_val += _1])
              |   ('-' >> term   [_val -= _1])
            );

      term =
          factor                 [_val = _1]
          >> *(   ('*' >> factor [_val *= _1])
              |   ('/' >> factor [_val /= _1])
            );

      factor =
          simple                 [_val = _1]
          |   '(' >> expr        [_val = _1] >> ')'
          |   ('-' >> factor     [_val = bind(make_unary, UN_OP::MIN, _1)])
          |   ("sin" >> factor   [_val = bind(make_unary, UN_OP::SIN, _1)])
          |   ("cos" >> factor   [_val = bind(make_unary, UN_OP::COS, _1)])
          |   ("tan" >> factor   [_val = bind(make_unary, UN_OP::TAN, _1)])
          |   ('+' >> factor     [_val = _1]);

      // Prototyping of expression
      prtctd = lit("sin") | "cos" | "tan";
      var    = !prtctd >> ascii::lower;
      num    = double_;
      simple = _declared | num | '(' >> expr >> ')';
      name   = ascii::alpha >> *ascii::alnum;
      vars  %= var [ _declare(_1) ] % ',';
    }

  private:
    qi::symbols<char> _declared;

    struct add_declaration_f {
        add_declaration_f(qi::symbols<char>& ref) : _p(std::addressof(ref)) {}
        qi::symbols<char>* _p;
        void operator()(std::string const& arg) const { _p->add(arg); }
    };

    phoenix::function<add_declaration_f> _declare { _declared };

    qi::rule<Iterator, ast(), ascii::space_type> expr, term, factor, simple;
    qi::rule<Iterator, expression(), ascii::space_type> all;
    qi::rule<Iterator, std::vector<std::string>(), ascii::space_type> vars;

    // lexemes
    qi::rule<Iterator, std::string()> name, prtctd;
    qi::rule<Iterator, std::string()> var;
    qi::rule<Iterator, double()> num;
  };

int main() {
    for (std::string const& input : {
            "",
            "foo (a) = 3*8+a",
            "bar (x, y) = (sin(x) + y*y) / (x + y)",
            "oops (x, y) = (sin(x) + y*y) / (x + a)",
        })
    try {
        using It = std::string::const_iterator;
        It f = input.begin(), l = input.end();

        expression e;
        bool ok = qi::phrase_parse(f, l, exp_parser<It>{}, ascii::space, e);

        if (ok) {
            std::cout << "Parse success: '" << input << "' -> " << e << "\n";
        } else {
            std::cout << "Parse failed: '" << input << "'\n";
        }

        if (f != l)
            std::cout << "Remaining unparsed: '" << std::string(f,l) << "'\n";
    } catch(std::exception const& e) {
        std::cout << "Exception: '" << e.what() << "'\n";
    }
}

打印:

Parse failed: ''
Parse success: 'foo (a) = 3*8+a' -> foo(a, ) = syntax_tree
Parse success: 'bar (x, y) = (sin(x) + y*y) / (x + y)' -> bar(x, y, ) = syntax_tree
Parse success: 'oops (x, y) = (sin(x) + y*y) / (x + a)' -> oops(x, y, ) = syntax_tree
Remaining unparsed: '/ (x + a)'

添加>> qi::eoi我们得到的解析器表达式: Live On Coliru

Parse failed: ''
Parse success: 'foo (a) = 3*8+a' -> foo(a, ) = syntax_tree
Parse success: 'bar (x, y) = (sin(x) + y*y) / (x + y)' -> bar(x, y, ) = syntax_tree
Parse failed: 'oops (x, y) = (sin(x) + y*y) / (x + a)'
Remaining unparsed: 'oops (x, y) = (sin(x) + y*y) / (x + a)'

关于c++ - Boost:仅解析先前声明的变量,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/43983833/

相关文章:

c++ - Boost Spirit可以用来解析字节流数据吗?

c++ - vc6到vs2010移植错误

C++: std::vector - "slice"一个 vector 是可能的吗?

c++ - 不允许嵌套函数,但为什么允许嵌套函数原型(prototype)? [C++]

c# - 读取 XML 文件并创建与 XML 内容匹配的对象列表

c++ - boost 文件系统。 is_directory 从不工作

c++ - 使用 CAsynSocket,如何捕获套接字操作的退出代码?

perl - 在 Perl 中,如何正确解析带引号的字符串的制表符/空格分隔文件?

C++ Boost 链接错误

c++ - boost::bind 和 io_service 出现问题