多亏了 Boost Spirit 库,我设法解析了一个 pgn 文件,但是一旦出现一些我没有“预料到”的字符,它就会失败。
这是我的 Spirit 语法:
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
BOOST_FUSION_ADAPT_STRUCT(
loloof64::pgn_tag,
(std::string, key),
(std::string, value)
)
BOOST_FUSION_ADAPT_STRUCT(
loloof64::game_move,
(unsigned, move_number),
(std::string, move_turn),
(std::string, white_move),
(std::string, black_move),
(std::string, result)
)
BOOST_FUSION_ADAPT_STRUCT(
loloof64::pgn_game,
(std::vector<loloof64::pgn_tag>, header),
(std::vector<loloof64::game_move>, moves)
)
namespace loloof64 {
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
namespace phoenix = boost::phoenix;
template <typename Iterator>
struct pgn_parser : qi::grammar<Iterator, std::vector<pgn_game>, qi::unused_type>
{
pgn_parser() : pgn_parser::base_type(games)
{
using qi::lexeme;
using ascii::char_;
using qi::uint_;
using qi::alnum;
using qi::space;
using qi::omit;
using qi::eol;
using qi::lit;
quoted_string %= lexeme[lit('"') >> *(char_ - '"') >> lit('"')];
tag %=
'['
>> +alnum
>> omit[+space]
>> quoted_string
>> ']'
>> omit[+eol]
;
header %= +tag;
move_turn %= qi::string("...") | qi::string(".");
regular_move %=
+char_("a-hNBRQK")
>> +char_("a-h1-8x=NBRQK")
>> -qi::string("e.p.")
;
castle_move %= qi::string("O-O-O") | qi::string("O-O");
single_move %=
(regular_move | castle_move) >> -(char_('+') | char_('#'))
;
result %= qi::string("1-0") | qi::string("0-1") | qi::string("1/2-1/2") | qi::string("*");
full_move %=
uint_
>> move_turn
>> omit[*space]
>> single_move
>> -(omit[+space] >> single_move)
>> -(omit[+space] >> result)
;
game_description %= full_move
>> *(omit[*space] >> full_move);
single_game %=
-header
>> game_description
;
games %=
single_game
>> *(omit[*(space|eol)] >> single_game)
;
}
qi::rule<Iterator, pgn_tag(), qi::unused_type> tag;
qi::rule<Iterator, std::vector<pgn_tag>, qi::unused_type> header;
qi::rule<Iterator, std::string(), qi::unused_type> quoted_string;
qi::rule<Iterator, std::string(), qi::unused_type> result;
qi::rule<Iterator, std::string(), qi::unused_type> regular_move;
qi::rule<Iterator, std::string(), qi::unused_type> castle_move;
qi::rule<Iterator, std::string(), qi::unused_type> single_move;
qi::rule<Iterator, std::string(), qi::unused_type> move_turn;
qi::rule<Iterator, game_move(), qi::unused_type> full_move;
qi::rule<Iterator, std::vector<game_move>, qi::unused_type> game_description;
qi::rule<Iterator, pgn_game, qi::unused_type> single_game;
qi::rule<Iterator, std::vector<pgn_game>, qi::unused_type> games;
};
}
我怎么能简单地消费任何我无法“预期”的角色?我的意思是,我怎么能在我的语法规则中忽略任何我不想要的字符?
至于测试目的:
这里是我的解析器头文件 (pgn_games_extractor.hpp)
#ifndef PGNGAMESEXTRACTOR_HPP
#define PGNGAMESEXTRACTOR_HPP
#include <string>
#include <vector>
#include <fstream>
#include <stdexcept>
namespace loloof64 {
struct pgn_tag {
std::string key;
std::string value;
};
struct game_move {
unsigned move_number;
std::string move_turn;
std::string white_move;
std::string black_move;
std::string result;
};
struct pgn_game {
std::vector<pgn_tag> header;
std::vector<game_move> moves;
};
class PgnGamesExtractor
{
public:
PgnGamesExtractor(std::string inputFilePath);
PgnGamesExtractor(std::ifstream &inputFile);
/*
Both constructos may throw PgnParsingException (if bad pgn format) and InputFileException (if missing file)
*/
std::vector<pgn_game> getGames() const { return games; }
virtual ~PgnGamesExtractor();
protected:
private:
std::vector<pgn_game> games;
void parseInput(std::ifstream &inputFile);
};
class PgnParsingException : public std::runtime_error
{
public:
PgnParsingException(std::string message): std::runtime_error(message){}
};
class InputFileException : public std::runtime_error
{
public:
InputFileException(std::string message) : std::runtime_error(message){}
};
}
#endif // PGNGAMESEXTRACTOR_HPP
这是我的解析器源代码 (pgn_games_extractor.cpp):
#include "pgn_games_extractor.hpp"
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
BOOST_FUSION_ADAPT_STRUCT(
loloof64::pgn_tag,
(std::string, key),
(std::string, value)
)
BOOST_FUSION_ADAPT_STRUCT(
loloof64::game_move,
(unsigned, move_number),
(std::string, move_turn),
(std::string, white_move),
(std::string, black_move),
(std::string, result)
)
BOOST_FUSION_ADAPT_STRUCT(
loloof64::pgn_game,
(std::vector<loloof64::pgn_tag>, header),
(std::vector<loloof64::game_move>, moves)
)
namespace loloof64 {
namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
namespace phoenix = boost::phoenix;
template <typename Iterator>
struct pgn_parser : qi::grammar<Iterator, std::vector<pgn_game>, qi::unused_type>
{
pgn_parser() : pgn_parser::base_type(games)
{
using qi::lexeme;
using ascii::char_;
using qi::uint_;
using qi::alnum;
using qi::space;
using qi::omit;
using qi::eol;
using qi::lit;
quoted_string %= lexeme[lit('"') >> *(char_ - '"') >> lit('"')];
tag %=
'['
>> +alnum
>> omit[+space]
>> quoted_string
>> ']'
>> omit[+eol]
;
header %= +tag;
move_turn %= qi::string("...") | qi::string(".");
regular_move %=
+char_("a-hNBRQK")
>> +char_("a-h1-8x=NBRQK")
>> -qi::string("e.p.")
;
castle_move %= qi::string("O-O-O") | qi::string("O-O");
single_move %=
(regular_move | castle_move) >> -(char_('+') | char_('#'))
;
result %= qi::string("1-0") | qi::string("0-1") | qi::string("1/2-1/2") | qi::string("*");
full_move %=
uint_
>> move_turn
>> omit[*space]
>> single_move
>> -(omit[+space] >> single_move)
>> -(omit[+space] >> result)
;
game_description %= full_move
>> *(omit[*space] >> full_move);
single_game %=
-header
>> game_description
;
games %=
single_game
>> *(omit[*(space|eol)] >> single_game)
;
}
qi::rule<Iterator, pgn_tag(), qi::unused_type> tag;
qi::rule<Iterator, std::vector<pgn_tag>, qi::unused_type> header;
qi::rule<Iterator, std::string(), qi::unused_type> quoted_string;
qi::rule<Iterator, std::string(), qi::unused_type> result;
qi::rule<Iterator, std::string(), qi::unused_type> regular_move;
qi::rule<Iterator, std::string(), qi::unused_type> castle_move;
qi::rule<Iterator, std::string(), qi::unused_type> single_move;
qi::rule<Iterator, std::string(), qi::unused_type> move_turn;
qi::rule<Iterator, game_move(), qi::unused_type> full_move;
qi::rule<Iterator, std::vector<game_move>, qi::unused_type> game_description;
qi::rule<Iterator, pgn_game, qi::unused_type> single_game;
qi::rule<Iterator, std::vector<pgn_game>, qi::unused_type> games;
};
}
loloof64::PgnGamesExtractor::PgnGamesExtractor(std::string inputFilePath)
{
std::ifstream inputFile(inputFilePath);
parseInput(inputFile);
}
loloof64::PgnGamesExtractor::PgnGamesExtractor(std::ifstream &inputFile)
{
parseInput(inputFile);
}
loloof64::PgnGamesExtractor::~PgnGamesExtractor()
{
//dtor
}
void loloof64::PgnGamesExtractor::parseInput(std::ifstream &inputFile)
{
using namespace std;
if (! inputFile) throw InputFileException("File does not exist !");
string content("");
getline(inputFile, content, (char) inputFile.eof());
if (inputFile.fail() || inputFile.bad()) throw new InputFileException("Could not read the input file !");
loloof64::pgn_parser<string::const_iterator> parser;
std::vector<loloof64::pgn_game> temp_games;
string::const_iterator iter = content.begin();
string::const_iterator end = content.end();
bool success = boost::spirit::qi::phrase_parse(iter, end, parser, boost::spirit::qi::eol, temp_games);
if (success && iter == end)
{
games = temp_games;
}
else
{
string error_fragment(iter, end);
string error_message("");
error_message = "Failed to parse the input at :'" + error_fragment + "' !";
throw PgnParsingException(error_message);
}
}
我问这个问题是因为我无法解析以下 pgn:ScotchGambitPgn.zip .我认为这是因为此文件存在编码问题。
我正在使用 Spirit 2 和 C++ 11 (Gnu)
最佳答案
根据要求进行简单的 X3 翻译。
- 更少的代码行(10 行)
- 编译时间从 7.4 秒减少到 3.6 秒(clang)
- 编译时间从 11.4 秒减少到 6.0 秒 (gcc5)
- 运行时间从 0.80 秒减少到 0.55 秒(clang 和 gcc)
输出完全相同。
//#define BOOST_SPIRIT_DEBUG
#ifndef PGNGAMESEXTRACTOR_HPP
#define PGNGAMESEXTRACTOR_HPP
#include <string>
#include <vector>
#include <fstream>
#include <stdexcept>
namespace loloof64 {
struct pgn_tag {
std::string key;
std::string value;
};
struct game_move {
unsigned move_number;
std::string white_move;
std::string black_move;
enum result_t { white_won, black_won, draw, undecided } result;
};
struct pgn_game {
std::vector<pgn_tag> header;
std::vector<game_move> moves;
};
class PgnGamesExtractor {
public:
PgnGamesExtractor(std::string inputFilePath);
PgnGamesExtractor(std::istream &inputFile);
/*
Both constructos may throw PgnParsingException (if bad pgn format) and InputFileException (if missing file)
*/
std::vector<pgn_game> getGames() const { return games; }
virtual ~PgnGamesExtractor();
protected:
private:
std::vector<pgn_game> games;
void parseInput(std::istream &inputFile);
};
class PgnParsingException : public virtual std::runtime_error {
public:
PgnParsingException(std::string message) : std::runtime_error(message) {}
};
class InputFileException : public virtual std::runtime_error {
public:
InputFileException(std::string message) : std::runtime_error(message) {}
};
}
#endif // PGNGAMESEXTRACTOR_HPP
#include <boost/spirit/home/x3.hpp>
#include <boost/spirit/include/support_istream_iterator.hpp>
#include <boost/fusion/include/adapt_struct.hpp>
BOOST_FUSION_ADAPT_STRUCT(loloof64::pgn_tag, key, value)
BOOST_FUSION_ADAPT_STRUCT(loloof64::game_move, move_number, white_move, black_move, result)
BOOST_FUSION_ADAPT_STRUCT(loloof64::pgn_game, header, moves)
namespace loloof64 {
namespace pgn_parser {
using namespace boost::spirit::x3;
static std::string const no_move;
static auto const result = []{
symbols<game_move::result_t> table;
table.add
("1-0", game_move::white_won)
("0-1", game_move::black_won)
("1/2-1/2", game_move::draw)
("*", game_move::undecided);
return table;
}();
static auto const quoted_string = lexeme['"' >> *~char_('"') >> '"'];
static auto const tag = '[' >> +alnum >> quoted_string >> ']';
static auto const header = +tag;
static auto const regular_move = as_parser("O-O-O") | "O-O" | (+char_("a-hNBRQK") >> +char_("a-h1-8x=NBRQK") >> -lit("e.p."));
static auto const single_move = rule<struct single_move_, std::string> { "single_move" }
= raw [ lexeme [ regular_move >> -char_("+#")] ];
static auto const full_move = rule<struct full_move_, game_move> { "full_move" }
= uint_
>> (lexeme["..." >> attr(no_move)] | "." >> single_move)
>> (single_move | attr(no_move))
>> -result;
static auto const game_description = +full_move;
static auto const single_game = rule<struct single_game_, pgn_game> { "single_game" }
= -header >> game_description;
static auto const games = *single_game;
}
}
loloof64::PgnGamesExtractor::PgnGamesExtractor(std::string inputFilePath) {
std::ifstream inputFile(inputFilePath);
parseInput(inputFile);
}
loloof64::PgnGamesExtractor::PgnGamesExtractor(std::istream &inputFile) { parseInput(inputFile); }
loloof64::PgnGamesExtractor::~PgnGamesExtractor() {
// dtor
}
void loloof64::PgnGamesExtractor::parseInput(std::istream &inputFile) {
if (inputFile.fail() || inputFile.bad())
throw new InputFileException("Could not read the input file !");
typedef boost::spirit::istream_iterator It;
std::vector<loloof64::pgn_game> temp_games;
It iter(inputFile >> std::noskipws), end;
bool success = boost::spirit::x3::phrase_parse(iter, end, pgn_parser::games, boost::spirit::x3::space, temp_games);
if (success && iter == end) {
games.swap(temp_games);
} else {
std::string error_fragment(iter, end);
throw PgnParsingException("Failed to parse the input at :'" + error_fragment + "' !");
}
}
#include <iostream>
int main() {
loloof64::PgnGamesExtractor pge("ScotchGambit.pgn");
std::cout << "Parsed " << pge.getGames().size() << " games\n";
for (auto& g : pge.getGames())
for (auto& m : g.moves)
std::cout << m.move_number << ".\t" << m.white_move << "\t" << m.black_move << "\n";
}
关于c++ - 我怎样才能简单地使用无法识别的字符?,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/34163728/