You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

197 lines
5.5 KiB
Python

###{standalone
#
#
# Lark Stand-alone Generator Tool
# ----------------------------------
# Generates a stand-alone LALR(1) parser
#
# Git: https://github.com/erezsh/lark
# Author: Erez Shinan (erezshin@gmail.com)
#
#
# >>> LICENSE
#
# This tool and its generated code use a separate license from Lark,
# and are subject to the terms of the Mozilla Public License, v. 2.0.
# If a copy of the MPL was not distributed with this
# file, You can obtain one at https://mozilla.org/MPL/2.0/.
#
# If you wish to purchase a commercial license for this tool and its
# generated code, you may contact me via email or otherwise.
#
# If MPL2 is incompatible with your free or open-source project,
# contact me and we'll work it out.
#
#
from copy import deepcopy
from abc import ABC, abstractmethod
from types import ModuleType
from typing import (
TypeVar, Generic, Type, Tuple, List, Dict, Iterator, Collection, Callable, Optional, FrozenSet, Any,
Union, Iterable, IO, TYPE_CHECKING, overload, Sequence,
Pattern as REPattern, ClassVar, Set, Mapping
)
###}
import sys
import token, tokenize
import os
from os import path
from collections import defaultdict
from functools import partial
from argparse import ArgumentParser
import lark
from lark.tools import lalr_argparser, build_lalr, make_warnings_comments
from lark.grammar import Rule
from lark.lexer import TerminalDef
_dir = path.dirname(__file__)
_larkdir = path.join(_dir, path.pardir)
EXTRACT_STANDALONE_FILES = [
'tools/standalone.py',
'exceptions.py',
'utils.py',
'tree.py',
'visitors.py',
'grammar.py',
'lexer.py',
'common.py',
'parse_tree_builder.py',
'parsers/lalr_analysis.py',
'parsers/lalr_parser_state.py',
'parsers/lalr_parser.py',
'parsers/lalr_interactive_parser.py',
'parser_frontends.py',
'lark.py',
'indenter.py',
]
def extract_sections(lines):
section = None
text = []
sections = defaultdict(list)
for line in lines:
if line.startswith('###'):
if line[3] == '{':
section = line[4:].strip()
elif line[3] == '}':
sections[section] += text
section = None
text = []
else:
raise ValueError(line)
elif section:
text.append(line)
return {name: ''.join(text) for name, text in sections.items()}
def strip_docstrings(line_gen):
""" Strip comments and docstrings from a file.
Based on code from: https://stackoverflow.com/questions/1769332/script-to-remove-python-comments-docstrings
"""
res = []
prev_toktype = token.INDENT
last_lineno = -1
last_col = 0
tokgen = tokenize.generate_tokens(line_gen)
for toktype, ttext, (slineno, scol), (elineno, ecol), ltext in tokgen:
if slineno > last_lineno:
last_col = 0
if scol > last_col:
res.append(" " * (scol - last_col))
if toktype == token.STRING and prev_toktype == token.INDENT:
# Docstring
res.append("#--")
elif toktype == tokenize.COMMENT:
# Comment
res.append("##\n")
else:
res.append(ttext)
prev_toktype = toktype
last_col = ecol
last_lineno = elineno
return ''.join(res)
def gen_standalone(lark_inst, output=None, out=sys.stdout, compress=False):
if output is None:
output = partial(print, file=out)
import pickle, zlib, base64
def compressed_output(obj):
s = pickle.dumps(obj, pickle.HIGHEST_PROTOCOL)
c = zlib.compress(s)
output(repr(base64.b64encode(c)))
def output_decompress(name):
output('%(name)s = pickle.loads(zlib.decompress(base64.b64decode(%(name)s)))' % locals())
output('# The file was automatically generated by Lark v%s' % lark.__version__)
output('__version__ = "%s"' % lark.__version__)
output()
for i, pyfile in enumerate(EXTRACT_STANDALONE_FILES):
with open(os.path.join(_larkdir, pyfile)) as f:
code = extract_sections(f)['standalone']
if i: # if not this file
code = strip_docstrings(partial(next, iter(code.splitlines(True))))
output(code)
data, m = lark_inst.memo_serialize([TerminalDef, Rule])
output('import pickle, zlib, base64')
if compress:
output('DATA = (')
compressed_output(data)
output(')')
output_decompress('DATA')
output('MEMO = (')
compressed_output(m)
output(')')
output_decompress('MEMO')
else:
output('DATA = (')
output(data)
output(')')
output('MEMO = (')
output(m)
output(')')
output('Shift = 0')
output('Reduce = 1')
output("def Lark_StandAlone(**kwargs):")
output(" return Lark._load_from_dict(DATA, MEMO, **kwargs)")
def main():
make_warnings_comments()
parser = ArgumentParser(prog="prog='python -m lark.tools.standalone'", description="Lark Stand-alone Generator Tool",
parents=[lalr_argparser], epilog='Look at the Lark documentation for more info on the options')
parser.add_argument('-c', '--compress', action='store_true', default=0, help="Enable compression")
if len(sys.argv) == 1:
parser.print_help(sys.stderr)
sys.exit(1)
ns = parser.parse_args()
lark_inst, out = build_lalr(ns)
gen_standalone(lark_inst, out=out, compress=ns.compress)
ns.out.close()
ns.grammar_file.close()
if __name__ == '__main__':
main()