You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
203 lines
6.1 KiB
Python
203 lines
6.1 KiB
Python
"Converts Nearley grammars to Lark"
|
|
|
|
import os.path
|
|
import sys
|
|
import codecs
|
|
import argparse
|
|
|
|
|
|
from lark import Lark, Transformer, v_args
|
|
|
|
nearley_grammar = r"""
|
|
start: (ruledef|directive)+
|
|
|
|
directive: "@" NAME (STRING|NAME)
|
|
| "@" JS -> js_code
|
|
ruledef: NAME "->" expansions
|
|
| NAME REGEXP "->" expansions -> macro
|
|
expansions: expansion ("|" expansion)*
|
|
|
|
expansion: expr+ js
|
|
|
|
?expr: item (":" /[+*?]/)?
|
|
|
|
?item: rule|string|regexp|null
|
|
| "(" expansions ")"
|
|
|
|
rule: NAME
|
|
string: STRING
|
|
regexp: REGEXP
|
|
null: "null"
|
|
JS: /{%.*?%}/s
|
|
js: JS?
|
|
|
|
NAME: /[a-zA-Z_$]\w*/
|
|
COMMENT: /#[^\n]*/
|
|
REGEXP: /\[.*?\]/
|
|
|
|
STRING: _STRING "i"?
|
|
|
|
%import common.ESCAPED_STRING -> _STRING
|
|
%import common.WS
|
|
%ignore WS
|
|
%ignore COMMENT
|
|
|
|
"""
|
|
|
|
nearley_grammar_parser = Lark(nearley_grammar, parser='earley', lexer='basic')
|
|
|
|
def _get_rulename(name):
|
|
name = {'_': '_ws_maybe', '__': '_ws'}.get(name, name)
|
|
return 'n_' + name.replace('$', '__DOLLAR__').lower()
|
|
|
|
@v_args(inline=True)
|
|
class NearleyToLark(Transformer):
|
|
def __init__(self):
|
|
self._count = 0
|
|
self.extra_rules = {}
|
|
self.extra_rules_rev = {}
|
|
self.alias_js_code = {}
|
|
|
|
def _new_function(self, code):
|
|
name = 'alias_%d' % self._count
|
|
self._count += 1
|
|
|
|
self.alias_js_code[name] = code
|
|
return name
|
|
|
|
def _extra_rule(self, rule):
|
|
if rule in self.extra_rules_rev:
|
|
return self.extra_rules_rev[rule]
|
|
|
|
name = 'xrule_%d' % len(self.extra_rules)
|
|
assert name not in self.extra_rules
|
|
self.extra_rules[name] = rule
|
|
self.extra_rules_rev[rule] = name
|
|
return name
|
|
|
|
def rule(self, name):
|
|
return _get_rulename(name)
|
|
|
|
def ruledef(self, name, exps):
|
|
return '!%s: %s' % (_get_rulename(name), exps)
|
|
|
|
def expr(self, item, op):
|
|
rule = '(%s)%s' % (item, op)
|
|
return self._extra_rule(rule)
|
|
|
|
def regexp(self, r):
|
|
return '/%s/' % r
|
|
|
|
def null(self):
|
|
return ''
|
|
|
|
def string(self, s):
|
|
return self._extra_rule(s)
|
|
|
|
def expansion(self, *x):
|
|
x, js = x[:-1], x[-1]
|
|
if js.children:
|
|
js_code ,= js.children
|
|
js_code = js_code[2:-2]
|
|
alias = '-> ' + self._new_function(js_code)
|
|
else:
|
|
alias = ''
|
|
return ' '.join(x) + alias
|
|
|
|
def expansions(self, *x):
|
|
return '%s' % ('\n |'.join(x))
|
|
|
|
def start(self, *rules):
|
|
return '\n'.join(filter(None, rules))
|
|
|
|
def _nearley_to_lark(g, builtin_path, n2l, js_code, folder_path, includes):
|
|
rule_defs = []
|
|
|
|
tree = nearley_grammar_parser.parse(g)
|
|
for statement in tree.children:
|
|
if statement.data == 'directive':
|
|
directive, arg = statement.children
|
|
if directive in ('builtin', 'include'):
|
|
folder = builtin_path if directive == 'builtin' else folder_path
|
|
path = os.path.join(folder, arg[1:-1])
|
|
if path not in includes:
|
|
includes.add(path)
|
|
with codecs.open(path, encoding='utf8') as f:
|
|
text = f.read()
|
|
rule_defs += _nearley_to_lark(text, builtin_path, n2l, js_code, os.path.abspath(os.path.dirname(path)), includes)
|
|
else:
|
|
assert False, directive
|
|
elif statement.data == 'js_code':
|
|
code ,= statement.children
|
|
code = code[2:-2]
|
|
js_code.append(code)
|
|
elif statement.data == 'macro':
|
|
pass # TODO Add support for macros!
|
|
elif statement.data == 'ruledef':
|
|
rule_defs.append(n2l.transform(statement))
|
|
else:
|
|
raise Exception("Unknown statement: %s" % statement)
|
|
|
|
return rule_defs
|
|
|
|
|
|
def create_code_for_nearley_grammar(g, start, builtin_path, folder_path, es6=False):
|
|
import js2py
|
|
|
|
emit_code = []
|
|
def emit(x=None):
|
|
if x:
|
|
emit_code.append(x)
|
|
emit_code.append('\n')
|
|
|
|
js_code = ['function id(x) {return x[0];}']
|
|
n2l = NearleyToLark()
|
|
rule_defs = _nearley_to_lark(g, builtin_path, n2l, js_code, folder_path, set())
|
|
lark_g = '\n'.join(rule_defs)
|
|
lark_g += '\n'+'\n'.join('!%s: %s' % item for item in n2l.extra_rules.items())
|
|
|
|
emit('from lark import Lark, Transformer')
|
|
emit()
|
|
emit('grammar = ' + repr(lark_g))
|
|
emit()
|
|
|
|
for alias, code in n2l.alias_js_code.items():
|
|
js_code.append('%s = (%s);' % (alias, code))
|
|
|
|
if es6:
|
|
emit(js2py.translate_js6('\n'.join(js_code)))
|
|
else:
|
|
emit(js2py.translate_js('\n'.join(js_code)))
|
|
emit('class TransformNearley(Transformer):')
|
|
for alias in n2l.alias_js_code:
|
|
emit(" %s = var.get('%s').to_python()" % (alias, alias))
|
|
emit(" __default__ = lambda self, n, c, m: c if c else None")
|
|
|
|
emit()
|
|
emit('parser = Lark(grammar, start="n_%s", maybe_placeholders=False)' % start)
|
|
emit('def parse(text):')
|
|
emit(' return TransformNearley().transform(parser.parse(text))')
|
|
|
|
return ''.join(emit_code)
|
|
|
|
def main(fn, start, nearley_lib, es6=False):
|
|
with codecs.open(fn, encoding='utf8') as f:
|
|
grammar = f.read()
|
|
return create_code_for_nearley_grammar(grammar, start, os.path.join(nearley_lib, 'builtin'), os.path.abspath(os.path.dirname(fn)), es6=es6)
|
|
|
|
def get_arg_parser():
|
|
parser = argparse.ArgumentParser(description='Reads a Nearley grammar (with js functions), and outputs an equivalent lark parser.')
|
|
parser.add_argument('nearley_grammar', help='Path to the file containing the nearley grammar')
|
|
parser.add_argument('start_rule', help='Rule within the nearley grammar to make the base rule')
|
|
parser.add_argument('nearley_lib', help='Path to root directory of nearley codebase (used for including builtins)')
|
|
parser.add_argument('--es6', help='Enable experimental ES6 support', action='store_true')
|
|
return parser
|
|
|
|
if __name__ == '__main__':
|
|
parser = get_arg_parser()
|
|
if len(sys.argv) == 1:
|
|
parser.print_help(sys.stderr)
|
|
sys.exit(1)
|
|
args = parser.parse_args()
|
|
print(main(fn=args.nearley_grammar, start=args.start_rule, nearley_lib=args.nearley_lib, es6=args.es6))
|