c++ - 为什么 boost spirit lex hung 而不是解析错误?

标签 c++ boost-spirit boost-spirit-qi boost-spirit-lex

我很长一段时间没有使用 boost::spirit,然后又回来了。并停留在简单的情况下(天哪,有时我想杀死这个库……为什么这么简单的任务在 boost 下如此复杂)。

#include <iostream>
#include <string>

#include <boost/bind.hpp>
#include <boost/ref.hpp>

#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/spirit/include/phoenix.hpp>

namespace bs = boost::spirit;
namespace lex = boost::spirit::lex;
namespace qi = boost::spirit::qi;
namespace phx = boost::phoenix;

//---------------------------------------------------------------------------------
// configuration

using base_iterator_type = std::string::iterator;
using token_type = lex::lexertl::token<base_iterator_type>;
using lexer_type = lex::lexertl::lexer<token_type>;

//---------------------------------------------------------------------------------

template <typename Lexer>
struct cpp_tokens : lex::lexer<Lexer>
{
    cpp_tokens()
    {
        class_ = "class";
        identifier = "[a-zA-Z_][a-zA-Z0-9_]*";

        this->self += class_ | identifier;
        this->self("WS") = lex::token_def<>("[ \\t]+");
    }

    lex::token_def<> class_;
    lex::token_def<std::string> identifier;
};

using cpp_lex = cpp_tokens<lexer_type>;
using cpp_iterator = cpp_lex::iterator_type;

//---------------------------------------------------------------------------------

template <typename Iterator, typename Lexer>
struct cpp_grammar_impl : bs::qi::grammar<Iterator, bs::qi::in_state_skipper<Lexer>>
{
    template <typename TokenDef>
    cpp_grammar_impl(TokenDef const& tok) : cpp_grammar_impl::base_type(program, "program")
    {
        program = tok.identifier >> tok.class_;
    }

private:
    using skipper_type = bs::qi::in_state_skipper<Lexer>;
    using simple_rule = qi::rule<Iterator, skipper_type>;

    simple_rule program;
};

using cpp_grammar = cpp_grammar_impl<cpp_iterator, cpp_lex::lexer_def>;

//---------------------------------------------------------------------------------

int main()
{
    std::string str("111 class");

    cpp_lex cppLexer;
    cpp_grammar cppGrammar(cppLexer);

    auto it = str.begin();
    cpp_iterator iter = cppLexer.begin(it, str.end());
    cpp_iterator end = cppLexer.end();

    bool r = qi::phrase_parse(iter, end, cppGrammar, bs::qi::in_state("WS")[cppLexer.self]);

    if (r && iter == end)
    {
        std::cout << "-------------------------\n";
        std::cout << "Parsing succeeded\n";
        std::cout << "-------------------------\n";
    }
    else
    {
        std::string rest(iter, end);
        std::cout << "-------------------------\n";
        std::cout << "Parsing failed\n";
        std::cout << "stopped at: \"" << rest << "\"\n";
        std::cout << "-------------------------\n";
    }

    std::cout << "Bye... :-) \n\n";
}

我预计会出现解析错误,但在上面的示例中,boost::spirit 挂起了处理器并消耗了所有内存。关于 - Coliru 的示例

我做错了什么以及如何解决?

最佳答案

使用调试器你可以很容易的发现解析没有挂掉。

取而代之的是这条线

  std::string rest(iter, end);

那是挂着的。我想它更像是 UB,因为 iterend 迭代器不会解引用到字符元素,而是解引用到标记。

所以简单的解决方法是使用基本迭代器:

   std::string rest(it, str.end());

Live On Coliru

#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/spirit/include/qi.hpp>
#include <iostream>
#include <string>

namespace lex = boost::spirit::lex;
namespace qi = boost::spirit::qi;

//---------------------------------------------------------------------------------
// configuration

using base_iterator_type = std::string::iterator;
using token_type = lex::lexertl::token<base_iterator_type>;
using lexer_type = lex::lexertl::lexer<token_type>;

//---------------------------------------------------------------------------------

template <typename Lexer> struct cpp_tokens : lex::lexer<Lexer> {
    cpp_tokens() {
        class_ = "class";
        identifier = "[a-zA-Z_][a-zA-Z0-9_]*";

        this->self += class_ | identifier;
        this->self("WS") = lex::token_def<>("[ \\t]+");
    }

    lex::token_def<> class_;
    lex::token_def<std::string> identifier;
};

using cpp_lex = cpp_tokens<lexer_type>;
using cpp_iterator = cpp_lex::iterator_type;

//---------------------------------------------------------------------------------

template <typename Iterator, typename Lexer>
struct cpp_grammar_impl : qi::grammar<Iterator, qi::in_state_skipper<Lexer> > {
    template <typename TokenDef>
    cpp_grammar_impl(TokenDef const &tok) : cpp_grammar_impl::base_type(program, "program") {
        program = tok.identifier >> tok.class_;
    }

  private:
    using skipper_type = qi::in_state_skipper<Lexer>;
    using simple_rule = qi::rule<Iterator, skipper_type>;

    simple_rule program;
};

using cpp_grammar = cpp_grammar_impl<cpp_iterator, cpp_lex::lexer_def>;

//---------------------------------------------------------------------------------

int main() {
    std::string str("111 class");

    cpp_lex cppLexer;
    cpp_grammar cppGrammar(cppLexer);

    auto it = str.begin();
    cpp_iterator iter = cppLexer.begin(it, str.end());
    cpp_iterator end = cppLexer.end();

    bool r = qi::phrase_parse(iter, end, cppGrammar, qi::in_state("WS")[cppLexer.self]);

    if (r && iter == end) {
        std::cout << "-------------------------\n";
        std::cout << "Parsing succeeded\n";
        std::cout << "-------------------------\n";
    } else {
        std::string rest(it, str.end());
        std::cout << "-------------------------\n";
        std::cout << "Parsing failed\n";
        std::cout << "stopped at: \"" << rest << "\"\n";
        std::cout << "-------------------------\n";
    }

    std::cout << "Bye... :-) \n\n";
}

打印:

-------------------------
Parsing failed
stopped at: "111 class"
-------------------------
Bye... :-) 

关于c++ - 为什么 boost spirit lex hung 而不是解析错误?,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/52074245/

相关文章:

c++ - boost spirit x3 变体和 std::pair

c++ - 如何创建一个可选的解析器,该解析器能够有条件地删除合成项

c++ - 如何停止 Spirit Qi 'repeat' 解析器中的字符串连接?

c++ - 关于雪豹问题的 gprof

c++ - 父类和子类之间的静态变量

c++ - 奇怪的多线程问题

c++ - 如何在类似于 C 的 C++ 中打印 float

c++ - Boost Spirit X3 eol 意外行为

c++ - Boost.Spirit.Qi 替代 (|) 解析器问题

c++ - 如何让 boost::spirit 停止解析关键字?