javascript - 使用 parsimmon 库解析基于缩进的语言

我的问题的灵感来自 this one ，但对于 javascript，使用 parsimmon 解析器组合器库。我想解析缩进敏感的语言，例如 python 或 yaml。

我已经设法很容易地将那个答案中的 scala 示例转换为 javascript - 关键是 parsimmon 中的 chain 函数，它等同于 >>> scala 的解析器组合器中的运算符 - 它们都采用解析器和返回解析器的函数，并且第一个解析器的结果传递给函数以选择下一个解析器。

但是，我不太清楚如何使这个递归。该示例适用于单个 block - 我看不到如何创建嵌套 block 、根据需要跟踪去凹痕级别以解析 python 之类的东西。

最佳答案

我是 Parsimmon 的维护者。我意识到这个问题真的很老，但我偶然发现了它并想回答。

GitHub 上 parsimmon 存储库中的 python-ish.js 示例应该可以帮助您了解如何解析基于缩进的语言。

这与 Josh 的回答非常相似，但我认为更容易理解一些。

https://github.com/jneen/parsimmon/blob/master/examples/python-ish.js

"use strict";

// Run me with Node to see my output!

let util = require("util");
let P = require("..");

///////////////////////////////////////////////////////////////////////

// Because parsing indentation-sensitive languages such as Python requires
// tracking state, all of our parsers are created inside a function that takes
// the current parsing state. In this case it's just the current indentation
// level, but a real Python parser would also *at least* need to keep track of
// whether the current parsing is inside of () or [] or {} so that you can know
// to ignore all whitespace, instead of further tracking indentation.
//
// Implementing all of Python's various whitespace requirements, including
// comments and line continuations (backslash at the end of the line) is left as
// an exercise for the reader. I've tried and frankly it's pretty tricky.
function PyX(indent) {
  return P.createLanguage({
    // This is where the magic happens. Basically we need to parse a deeper
    // indentation level on the first statement of the block and keep track of
    // new indentation level. Then we make a whole new set of parsers that use
    // that new indentation level for all their parsing. Each line past the
    // first is required to be indented to the same level as that new deeper
    // indentation level.
    Block: r =>
      P.seqObj(
        P.string("block:"),
        r.NL,
        ["n", r.IndentMore],
        ["first", r.Statement]
      ).chain(args => {
        const { n, first } = args;
        return PyX(n)
          .RestStatement.many()
          .map(rest => ["BLOCK", first, ...rest]);
      }),

    // This is just a statement in our language. To simplify, this is either a
    // block of code or just an identifier
    Statement: r => P.alt(r.Block, r.Ident),

    // This is a statement which is indented to the level of the current parse
    // state. It's called RestStatement because the first statement in a block
    // is indented more than the previous state, but the *rest* of the
    // statements match up with the new state.
    RestStatement: r => r.IndentSame.then(r.Statement),

    // Just a variable and then the end of the line.
    Ident: r => P.regexp(/[a-z]+/i).skip(r.End),

    // Consume zero or more spaces and then return the number consumed. For a
    // more Python-like language, this parser would also accept tabs and then
    // expand them to the correct number of spaces
    //
    // https://docs.python.org/3/reference/lexical_analysis.html#indentation
    CountSpaces: () => P.regexp(/[ ]*/).map(s => s.length),

    // Count the current indentation level and assert it's more than the current
    // parse state's desired indentation
    IndentSame: r =>
      r.CountSpaces.chain(n => {
        if (n === indent) {
          return P.of(n);
        }
        return P.fail(`${n} spaces`);
      }),

    // Count the current indentation level and assert it's equal to the current
    // parse state's desired indentation
    IndentMore: r =>
      r.CountSpaces.chain(n => {
        if (n > indent) {
          return P.of(n);
        }
        return P.fail(`more than ${n} spaces`);
      }),

    // Support all three standard text file line endings
    NL: () => P.alt(P.string("\r\n"), P.oneOf("\r\n")),

    // Lines should always end in a newline sequence, but many files are missing
    // the final newline
    End: r => P.alt(r.NL, P.eof)
  });
}

// Start parsing at zero indentation
let Pythonish = PyX(0);

///////////////////////////////////////////////////////////////////////

let text = `\
block:
  alpha
  bravo
  block:
         charlie
         delta
         echo
         block:
          foxtrot
  golf
`;

function prettyPrint(x) {
  let opts = { depth: null, colors: "auto" };
  let s = util.inspect(x, opts);
  console.log(s);
}

let ast = Pythonish.Statement.tryParse(text);
prettyPrint(ast);

关于javascript - 使用 parsimmon 库解析基于缩进的语言，我们在Stack Overflow上找到一个类似的问题： https://stackoverflow.com/questions/40499588/

javascript - 使用 parsimmon 库解析基于缩进的语言

上一篇：javascript - angular 2 - 如何为组件包含 javascript？

下一篇：javascript - 结合具有相似逻辑的 2 个 Knockout 指令