How to add keyword to acorn or esprima parser

684 views Asked by At

I am working on a language that transpiles to javascript and has a similar syntax. However I want to include some new type of block statements. For syntax purposes they are the same as an IfStatement. How can I get esprima or acorn to parse this program MyStatement {a=1;} without throwing an error? Its fine if it calls it an IfStatement. I would prefer not to fork esprima.

1

There are 1 answers

0
bruceceng On

It turns out, that the plugin capabilities of acorn are not really documented. It seems like forking acorn would be the easiest route. In this case, it is as simple as searching for occurances of _if and following a similar pattern for _MyStatement.

However it is possible to write a plugin to accomplish what I was trying to do. It seems a bit of a hack, but here is the code. The basic steps are:

  1. To exend Parse and add to the list of keywords that will be recognized by the first pass

  2. Create a TokenType for the new keyword and add it to the Parser.acorn.keywordTypes, extend parseStatement so that it processes the new TokenType

  3. Create a handler for the new TokenType which will add information to the Abstract Syntax Tree as required by the keyword functionality and also consume tokens using commands like this.expect(tt.parenR) to eat a '(' or this.parseExpression() to process an entire expression.

Here is the code:

var program = 
`
  MyStatement {
    MyStatement(true) {
      MyStatement() {
        var a = 1;
      }
    }
    if (1) {
      var c = 0;
    }
  }
`;

const acorn = require("acorn");

const Parser = acorn.Parser;
const tt = acorn.tokTypes; //used to access standard token types like "("
const TokenType = acorn.TokenType; //used to create new types of Tokens.

//add a new keyword to Acorn.
Parser.acorn.keywordTypes["MyStatement"] = new TokenType("MyStatement",{keyword: "MyStatement"});

//const isIdentifierStart = acorn.isIdentifierStart;

function wordsRegexp(words) {
  return new RegExp("^(?:" + words.replace(/ /g, "|") + ")$")
}

var bruceware = function(Parser) {
  return class extends Parser {
    parse(program) {
      console.log("hooking parse.");

      //it appears it is necessary to add keywords here also.
      var newKeywords = "break case catch continue debugger default do else finally for function if return switch throw try var while with null true false instanceof typeof void delete new in this const class extends export import super";
      newKeywords += " MyStatement";
      this.keywords = wordsRegexp(newKeywords);

      return(super.parse(program));
    }

    parseStatement(context, topLevel, exports) {
      var starttype = this.type;
      console.log("!!!hooking parseStatement", starttype);

      if (starttype == Parser.acorn.keywordTypes["MyStatement"]) {
        console.log("Parse MyStatement");
        var node = this.startNode();
        return this.parseMyStatement(node);
      }
      else {
        return(super.parseStatement(context, topLevel, exports));
      }
    }

    parseMyStatement(node) {
      console.log("parse MyStatement");
      this.next();

      //In my language, MyStatement doesn't have to have a parameter. It could be called as `MyStatement { ... }`
      if (this.type == tt.parenL) {
        node.test = this.parseOptionalParenExpression();
      }
      else {
        node.test = 0; //If there is no test, just make it 0 for now (note that this may break code generation later).
      }

      node.isMyStatement = true; //set a flag so we know that this if a "MyStatement" instead of an if statement.

      //process the body of the block just like a normal if statement for now.

      // allow function declarations in branches, but only in non-strict mode
      node.consequent = this.parseStatement("if");
      //node.alternate = this.eat(acornTypes["else"]) ? this.parseStatement("if") : null;
      return this.finishNode(node, "IfStatement")
    };

    //In my language, MyStatement, optionally has a parameter. It can also by called as MyStatement() { ... }
    parseOptionalParenExpression() {
      this.expect(tt.parenL);

      //see what type it is
      console.log("Type: ", this.type);

      //allow it to be blank.
      var val = 0; //for now just make the condition 0. Note that this may break code generation later.
      if (this.type == tt.parenR) {
        this.expect(tt.parenR);
      }
      else { 
        val = this.parseExpression();
        this.expect(tt.parenR);
      }

      return val
    };

  }
}

process.stdout.write('\033c'); //cls

var result2 = Parser.extend(bruceware).parse(program); //attempt to parse

console.log(JSON.stringify(result2,null,' ')); //show the results.