You cannot select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
	
	
		
			203 lines
		
	
	
		
			6.1 KiB
		
	
	
	
		
			Python
		
	
			
		
		
	
	
			203 lines
		
	
	
		
			6.1 KiB
		
	
	
	
		
			Python
		
	
"Converts Nearley grammars to Lark"
 | 
						|
 | 
						|
import os.path
 | 
						|
import sys
 | 
						|
import codecs
 | 
						|
import argparse
 | 
						|
 | 
						|
 | 
						|
from lark import Lark, Transformer, v_args
 | 
						|
 | 
						|
nearley_grammar = r"""
 | 
						|
    start: (ruledef|directive)+
 | 
						|
 | 
						|
    directive: "@" NAME (STRING|NAME)
 | 
						|
             | "@" JS  -> js_code
 | 
						|
    ruledef: NAME "->" expansions
 | 
						|
           | NAME REGEXP "->" expansions -> macro
 | 
						|
    expansions: expansion ("|" expansion)*
 | 
						|
 | 
						|
    expansion: expr+ js
 | 
						|
 | 
						|
    ?expr: item (":" /[+*?]/)?
 | 
						|
 | 
						|
    ?item: rule|string|regexp|null
 | 
						|
         | "(" expansions ")"
 | 
						|
 | 
						|
    rule: NAME
 | 
						|
    string: STRING
 | 
						|
    regexp: REGEXP
 | 
						|
    null: "null"
 | 
						|
    JS: /{%.*?%}/s
 | 
						|
    js: JS?
 | 
						|
 | 
						|
    NAME: /[a-zA-Z_$]\w*/
 | 
						|
    COMMENT: /#[^\n]*/
 | 
						|
    REGEXP: /\[.*?\]/
 | 
						|
 | 
						|
    STRING: _STRING "i"?
 | 
						|
 | 
						|
    %import common.ESCAPED_STRING -> _STRING
 | 
						|
    %import common.WS
 | 
						|
    %ignore WS
 | 
						|
    %ignore COMMENT
 | 
						|
 | 
						|
    """
 | 
						|
 | 
						|
nearley_grammar_parser = Lark(nearley_grammar, parser='earley', lexer='basic')
 | 
						|
 | 
						|
def _get_rulename(name):
 | 
						|
    name = {'_': '_ws_maybe', '__': '_ws'}.get(name, name)
 | 
						|
    return 'n_' + name.replace('$', '__DOLLAR__').lower()
 | 
						|
 | 
						|
@v_args(inline=True)
 | 
						|
class NearleyToLark(Transformer):
 | 
						|
    def __init__(self):
 | 
						|
        self._count = 0
 | 
						|
        self.extra_rules = {}
 | 
						|
        self.extra_rules_rev = {}
 | 
						|
        self.alias_js_code = {}
 | 
						|
 | 
						|
    def _new_function(self, code):
 | 
						|
        name = 'alias_%d' % self._count
 | 
						|
        self._count += 1
 | 
						|
 | 
						|
        self.alias_js_code[name] = code
 | 
						|
        return name
 | 
						|
 | 
						|
    def _extra_rule(self, rule):
 | 
						|
        if rule in self.extra_rules_rev:
 | 
						|
            return self.extra_rules_rev[rule]
 | 
						|
 | 
						|
        name = 'xrule_%d' % len(self.extra_rules)
 | 
						|
        assert name not in self.extra_rules
 | 
						|
        self.extra_rules[name] = rule
 | 
						|
        self.extra_rules_rev[rule] = name
 | 
						|
        return name
 | 
						|
 | 
						|
    def rule(self, name):
 | 
						|
        return _get_rulename(name)
 | 
						|
 | 
						|
    def ruledef(self, name, exps):
 | 
						|
        return '!%s: %s' % (_get_rulename(name), exps)
 | 
						|
 | 
						|
    def expr(self, item, op):
 | 
						|
        rule = '(%s)%s' % (item, op)
 | 
						|
        return self._extra_rule(rule)
 | 
						|
 | 
						|
    def regexp(self, r):
 | 
						|
        return '/%s/' % r
 | 
						|
 | 
						|
    def null(self):
 | 
						|
        return ''
 | 
						|
 | 
						|
    def string(self, s):
 | 
						|
        return self._extra_rule(s)
 | 
						|
 | 
						|
    def expansion(self, *x):
 | 
						|
        x, js = x[:-1], x[-1]
 | 
						|
        if js.children:
 | 
						|
            js_code ,= js.children
 | 
						|
            js_code = js_code[2:-2]
 | 
						|
            alias = '-> ' + self._new_function(js_code)
 | 
						|
        else:
 | 
						|
            alias = ''
 | 
						|
        return ' '.join(x) + alias
 | 
						|
 | 
						|
    def expansions(self, *x):
 | 
						|
        return '%s' % ('\n    |'.join(x))
 | 
						|
 | 
						|
    def start(self, *rules):
 | 
						|
        return '\n'.join(filter(None, rules))
 | 
						|
 | 
						|
def _nearley_to_lark(g, builtin_path, n2l, js_code, folder_path, includes):
 | 
						|
    rule_defs = []
 | 
						|
 | 
						|
    tree = nearley_grammar_parser.parse(g)
 | 
						|
    for statement in tree.children:
 | 
						|
        if statement.data == 'directive':
 | 
						|
            directive, arg = statement.children
 | 
						|
            if directive in ('builtin', 'include'):
 | 
						|
                folder = builtin_path if directive == 'builtin' else folder_path
 | 
						|
                path = os.path.join(folder, arg[1:-1])
 | 
						|
                if path not in includes:
 | 
						|
                    includes.add(path)
 | 
						|
                    with codecs.open(path, encoding='utf8') as f:
 | 
						|
                        text = f.read()
 | 
						|
                    rule_defs += _nearley_to_lark(text, builtin_path, n2l, js_code, os.path.abspath(os.path.dirname(path)), includes)
 | 
						|
            else:
 | 
						|
                assert False, directive
 | 
						|
        elif statement.data == 'js_code':
 | 
						|
            code ,= statement.children
 | 
						|
            code = code[2:-2]
 | 
						|
            js_code.append(code)
 | 
						|
        elif statement.data == 'macro':
 | 
						|
            pass    # TODO Add support for macros!
 | 
						|
        elif statement.data == 'ruledef':
 | 
						|
            rule_defs.append(n2l.transform(statement))
 | 
						|
        else:
 | 
						|
            raise Exception("Unknown statement: %s" % statement)
 | 
						|
 | 
						|
    return rule_defs
 | 
						|
 | 
						|
 | 
						|
def create_code_for_nearley_grammar(g, start, builtin_path, folder_path, es6=False):
 | 
						|
    import js2py
 | 
						|
 | 
						|
    emit_code = []
 | 
						|
    def emit(x=None):
 | 
						|
        if x:
 | 
						|
            emit_code.append(x)
 | 
						|
        emit_code.append('\n')
 | 
						|
 | 
						|
    js_code = ['function id(x) {return x[0];}']
 | 
						|
    n2l = NearleyToLark()
 | 
						|
    rule_defs = _nearley_to_lark(g, builtin_path, n2l, js_code, folder_path, set())
 | 
						|
    lark_g = '\n'.join(rule_defs)
 | 
						|
    lark_g += '\n'+'\n'.join('!%s: %s' % item for item in n2l.extra_rules.items())
 | 
						|
 | 
						|
    emit('from lark import Lark, Transformer')
 | 
						|
    emit()
 | 
						|
    emit('grammar = ' + repr(lark_g))
 | 
						|
    emit()
 | 
						|
 | 
						|
    for alias, code in n2l.alias_js_code.items():
 | 
						|
        js_code.append('%s = (%s);' % (alias, code))
 | 
						|
 | 
						|
    if es6:
 | 
						|
        emit(js2py.translate_js6('\n'.join(js_code)))
 | 
						|
    else:
 | 
						|
        emit(js2py.translate_js('\n'.join(js_code)))
 | 
						|
    emit('class TransformNearley(Transformer):')
 | 
						|
    for alias in n2l.alias_js_code:
 | 
						|
        emit("    %s = var.get('%s').to_python()" % (alias, alias))
 | 
						|
    emit("    __default__ = lambda self, n, c, m: c if c else None")
 | 
						|
 | 
						|
    emit()
 | 
						|
    emit('parser = Lark(grammar, start="n_%s", maybe_placeholders=False)' % start)
 | 
						|
    emit('def parse(text):')
 | 
						|
    emit('    return TransformNearley().transform(parser.parse(text))')
 | 
						|
 | 
						|
    return ''.join(emit_code)
 | 
						|
 | 
						|
def main(fn, start, nearley_lib, es6=False):
 | 
						|
    with codecs.open(fn, encoding='utf8') as f:
 | 
						|
        grammar = f.read()
 | 
						|
    return create_code_for_nearley_grammar(grammar, start, os.path.join(nearley_lib, 'builtin'), os.path.abspath(os.path.dirname(fn)), es6=es6)
 | 
						|
 | 
						|
def get_arg_parser():
 | 
						|
    parser = argparse.ArgumentParser(description='Reads a Nearley grammar (with js functions), and outputs an equivalent lark parser.')
 | 
						|
    parser.add_argument('nearley_grammar', help='Path to the file containing the nearley grammar')
 | 
						|
    parser.add_argument('start_rule', help='Rule within the nearley grammar to make the base rule')
 | 
						|
    parser.add_argument('nearley_lib', help='Path to root directory of nearley codebase (used for including builtins)')
 | 
						|
    parser.add_argument('--es6', help='Enable experimental ES6 support', action='store_true')
 | 
						|
    return parser
 | 
						|
 | 
						|
if __name__ == '__main__':
 | 
						|
    parser = get_arg_parser()
 | 
						|
    if len(sys.argv) == 1:
 | 
						|
        parser.print_help(sys.stderr)
 | 
						|
        sys.exit(1)
 | 
						|
    args = parser.parse_args()
 | 
						|
    print(main(fn=args.nearley_grammar, start=args.start_rule, nearley_lib=args.nearley_lib, es6=args.es6))
 |