c++ - 语法平衡问题

标签 c++ boost-spirit boost-spirit-qi tree-balancing

是否有可能强制 Boost.Spirit Qi 以这种方式运行,生成的语法可以根据某些运行时可计算的条件/规则/速率进行调整?例如,输入由语言结构组成,在解析过程中会导致不同的选择,有些更频繁,有些则更少。但是备选方案的顺序会影响效率,即语法的运行时最优性。在某些情况下,不可能提前确定在任意输入(可能是强聚类)的情况下更频繁地选择哪个备选方案。

我知道可以在运行时将符号附加到 qi::symbols,但其他一些解析器也需要类似的行为。

最佳答案

遗憾的是,您忘记了(?)包含示例语法。所以我自己编的。它解析这样的语言:

begin
    declare x : int;
    declare y : string;

    let x = 42;
    let y = "Life the universe and everything";

    for(ch : y)
    begin
        if (call is_alpha(ch))
        begin
            declare z : string;
            let z = call to_upper(ch);
            call print(z);
        end; 
        else call print("?");
    end;
end;

现在。您可能会注意到每个“语言构造”(如您在 OP 中所指)都有一个介绍关键字。这是故意的。

因为,现在,我们可以使用 qi::symbols 和这些介绍者关键字来调度规则(这被称为 Nabialek 技巧):

// let's have some language constructs
feature_vardecl    = identifier >> ':' >> type >> ';';
feature_assignment = identifier >> "=" >> expression >> ';';
feature_block      = *statement >> kw["end"] >> ';' | statement;
feature_forloop    = '(' >> identifier >> ':' >> identifier > ')' >> statement;
feature_func_call  = invocation > ';';
feature_if         = ('(' > expression > ')' > statement) >> (kw["else"] > statement);

language_constructs.add
    ("declare", &feature_vardecl)
    ("let",     &feature_assignment)
    ("begin",   &feature_block)
    ("if",      &feature_if)
    ("for",     &feature_forloop)
    ("call",    &feature_func_call);

可以看到,我们将对应语法规则的地址作为值存储在字典中。现在,我们使用 Nabialek 技巧(使用 qi::_a local 来调用子规则):

statement  = 
      (kw[language_constructs] [ qi::_a = qi::_1 ] > qi::lazy(*qi::_a))
    | (expression > ';');

如果你想要一个更“轻量级”的语法,你只需删除一些功能:

language_constructs.add
    ("let",     &feature_assignment)
    ("begin",   &feature_block)
    ("call",    &feature_func_call);

可以甚至动态地向 language_constructs 添加功能以响应输入(例如,输入中的版本标识符,或者仅在解析失败时)。我不确定这是否是个好主意,但是……一切皆有可能。


解析上述程序(如果在 input.txt 中提供)的全功能示例,包括临时“单元测试”、独特的关键字检查支持、调试等:

Live on Coliru

#define BOOST_SPIRIT_DEBUG
#define BOOST_SPIRIT_USE_PHOENIX_V3
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/spirit/repository/include/qi_distinct.hpp>
#include <fstream>

namespace qi = boost::spirit::qi;
namespace phx= boost::phoenix;

using It      = std::string::const_iterator;
using Skipper = qi::space_type;
using Rule    = qi::rule<It, Skipper>;

template <typename G, size_t N>
    bool test(const char (&raw)[N], const G& grammar)
{
    std::string const input(raw, raw+N-1);
    auto f(std::begin(input)), l(std::end(input));
    try
    {
        bool ok = qi::phrase_parse(f, l, grammar, qi::space);
        // if (f!=l) std::cerr << "remaining unparsed: '" << std::string(f,l) << "'\n";
        return ok && (f == l); 
    } catch (qi::expectation_failure<It> const& e)
    {
        // std::cout << "Expectation failure '" << e.what() << "' at '" << std::string(e.first, e.last) << "'\n";
        return false;
    }
}

template <typename It, typename Skipper>
struct toy_grammar : qi::grammar<It, Skipper>
{
    toy_grammar() : toy_grammar::base_type(start)
    {
        using boost::spirit::repository::distinct;
        static const auto kw = distinct(qi::char_("a-zA-Z_0-9"));

        keywords.add("let")("declare")("begin")("end")("for")("call")("if")("else");

        identifier = !kw[keywords] >> qi::lexeme [ qi::alpha >> *qi::char_("a-zA-Z_0-9") ];

        assert( test("z", identifier));
        assert( test("Afgjkj_123123", identifier));
        assert(!test("1", identifier));

        type       = qi::lexeme [ kw["int"] | kw["double"]| kw["string"] | kw["boolean"]];

        assert( test("int",     type));
        assert( test("double",  type));
        assert( test("string",  type));
        assert( test("boolean", type));
        assert(!test("intzies", type));
        assert(!test("Int",     type));

        literal    = qi::lexeme [
                    qi::real_parser<double, qi::strict_real_policies<double>>()
                    | qi::int_
                    | qi::as_string ['"' >> *~qi::char_('"') >> '"']
                    | kw [ qi::bool_ ]
                    ];

        assert( test("42",     literal));
        assert( test("42.",    literal));
        assert( test(".0",     literal));
        assert( test("-3e+7",  literal));
        assert( test("-inf",   literal));
        assert( test("-99",    literal));
        assert( test("\"\"",   literal));
        assert( test("\"\0\"", literal));
        assert( test("true",   literal));
        assert( test("false",  literal));
        assert(!test("trueish",literal));
        assert(!test("yes",    literal));

        invocation = identifier > '(' > -(expression % ',') > ')';

        // arhem, this part left as an exercise for the reader :)
        expression = literal | identifier | (kw["call"] > invocation); 

        assert( test("-99",       expression));
        assert( test("\"santa\"", expression));
        assert( test("clause",    expression));
        assert( test("true",      expression));
        assert( test("call foo()",    expression));
        assert( test("call foo(bar, inf, false)", expression));
        assert(!test("call 42()",     expression));

        // let's have some language constructs
        feature_vardecl    = identifier >> ':' >> type >> ';';
        feature_assignment = identifier >> "=" >> expression >> ';';
        feature_block      = *statement >> kw["end"] >> ';' | statement;
        feature_forloop    = '(' >> identifier >> ':' >> identifier > ')' >> statement;
        feature_func_call  = invocation > ';';
        feature_if_else    = ('(' > expression > ')' > statement) >> (kw["else"] > statement);

        language_constructs.add
            ("declare", &feature_vardecl)
            ("let",     &feature_assignment)
            ("begin",   &feature_block)
            ("if",      &feature_if_else)
            ("for",     &feature_forloop)
            ("call",    &feature_func_call);

        statement  = 
              (kw[language_constructs] [ qi::_a = qi::_1 ] > qi::lazy(*qi::_a))
            | (expression > ';');

        assert( test("declare x : int;"                                       , statement));
        assert( test("let y = true;"                                          , statement));
        assert( test("call foo();",                                             statement));
        assert( test("call foo(bar, inf, false);",                              statement));

        assert( test("begin end;",                                              statement));
        assert( test("begin let y = x; end;",                                   statement));
        assert( test("begin let y = x; call foo(y); end;",                      statement));
        assert( test("for (x : collection) begin let y = x; call foo(y); end;", statement));

        BOOST_SPIRIT_DEBUG_NODES((identifier)(type)(literal)(expression)(invocation)(statement)
                (feature_vardecl)(feature_assignment)(feature_block)(feature_forloop)(feature_func_call)(feature_if_else)
                );

        start = statement;
    }
  private:
    qi::symbols<char, Rule const*> language_constructs;
    qi::symbols<char, qi::unused_type> keywords;

    Rule start,
         identifier, type, literal, expression, invocation, 
         feature_assignment, feature_vardecl, feature_block, feature_forloop, feature_func_call, feature_if_else;

    qi::rule<It, Skipper, qi::locals<Rule const*> > statement;
};

int main()
{
    using namespace std;
    ifstream ifs("input.txt", ios::binary);
    string const input(istreambuf_iterator<char>(ifs), {});

    auto f(begin(input)), l(end(input));
    try
    {
        static const toy_grammar<It, Skipper> p;
        bool ok = qi::phrase_parse(
                f, l,
                p,
                qi::space);

        assert(ok);

        if (f!=l)
            cout << "Program remaining unparsed: '" << string(f,l) << "'\n";
    } catch (qi::expectation_failure<It> const& e)
    {
        cout << "Expectation failure '" << e.what() << "' at '" << string(e.first, e.last) << "'\n";
    }
}

关于c++ - 语法平衡问题,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/20846491/

相关文章:

c++ - Boost::spirit 在规则中使用结构方法

c++ - 为什么会出现错误 "no matching function for call to ' A(A<...auto...> )'"?

c++ - 这个c++宏在做什么?

c++ - 我将如何执行此文本模式匹配

c++ - 组合 `%` 和可选后缀时,自动属性传播有时不起作用

c++ - 使用 Boost Spirit Qi 解析分隔的 token 列表

c++ - 琐碎的 Spirit Parser 语法的段错误

c++ - va_list c++ 的问题

c++ - 当我尝试写入二维数组时出现未处理的异常

c++ - boost::spirit::hold_any 内存损坏