You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
159 lines
5.6 KiB
Python
159 lines
5.6 KiB
Python
# This module provides a LALR interactive parser, which is used for debugging and error handling
|
|
|
|
from typing import Iterator, List
|
|
from copy import copy
|
|
import warnings
|
|
|
|
from lark.exceptions import UnexpectedToken
|
|
from lark.lexer import Token, LexerThread
|
|
from .lalr_parser_state import ParserState
|
|
|
|
###{standalone
|
|
|
|
class InteractiveParser:
|
|
"""InteractiveParser gives you advanced control over parsing and error handling when parsing with LALR.
|
|
|
|
For a simpler interface, see the ``on_error`` argument to ``Lark.parse()``.
|
|
"""
|
|
def __init__(self, parser, parser_state: ParserState, lexer_thread: LexerThread):
|
|
self.parser = parser
|
|
self.parser_state = parser_state
|
|
self.lexer_thread = lexer_thread
|
|
self.result = None
|
|
|
|
@property
|
|
def lexer_state(self) -> LexerThread:
|
|
warnings.warn("lexer_state will be removed in subsequent releases. Use lexer_thread instead.", DeprecationWarning)
|
|
return self.lexer_thread
|
|
|
|
def feed_token(self, token: Token):
|
|
"""Feed the parser with a token, and advance it to the next state, as if it received it from the lexer.
|
|
|
|
Note that ``token`` has to be an instance of ``Token``.
|
|
"""
|
|
return self.parser_state.feed_token(token, token.type == '$END')
|
|
|
|
def iter_parse(self) -> Iterator[Token]:
|
|
"""Step through the different stages of the parse, by reading tokens from the lexer
|
|
and feeding them to the parser, one per iteration.
|
|
|
|
Returns an iterator of the tokens it encounters.
|
|
|
|
When the parse is over, the resulting tree can be found in ``InteractiveParser.result``.
|
|
"""
|
|
for token in self.lexer_thread.lex(self.parser_state):
|
|
yield token
|
|
self.result = self.feed_token(token)
|
|
|
|
def exhaust_lexer(self) -> List[Token]:
|
|
"""Try to feed the rest of the lexer state into the interactive parser.
|
|
|
|
Note that this modifies the instance in place and does not feed an '$END' Token
|
|
"""
|
|
return list(self.iter_parse())
|
|
|
|
|
|
def feed_eof(self, last_token=None):
|
|
"""Feed a '$END' Token. Borrows from 'last_token' if given."""
|
|
eof = Token.new_borrow_pos('$END', '', last_token) if last_token is not None else self.lexer_thread._Token('$END', '', 0, 1, 1)
|
|
return self.feed_token(eof)
|
|
|
|
|
|
def __copy__(self):
|
|
"""Create a new interactive parser with a separate state.
|
|
|
|
Calls to feed_token() won't affect the old instance, and vice-versa.
|
|
"""
|
|
return self.copy()
|
|
|
|
def copy(self, deepcopy_values=True):
|
|
return type(self)(
|
|
self.parser,
|
|
self.parser_state.copy(deepcopy_values=deepcopy_values),
|
|
copy(self.lexer_thread),
|
|
)
|
|
|
|
def __eq__(self, other):
|
|
if not isinstance(other, InteractiveParser):
|
|
return False
|
|
|
|
return self.parser_state == other.parser_state and self.lexer_thread == other.lexer_thread
|
|
|
|
def as_immutable(self):
|
|
"""Convert to an ``ImmutableInteractiveParser``."""
|
|
p = copy(self)
|
|
return ImmutableInteractiveParser(p.parser, p.parser_state, p.lexer_thread)
|
|
|
|
def pretty(self):
|
|
"""Print the output of ``choices()`` in a way that's easier to read."""
|
|
out = ["Parser choices:"]
|
|
for k, v in self.choices().items():
|
|
out.append('\t- %s -> %r' % (k, v))
|
|
out.append('stack size: %s' % len(self.parser_state.state_stack))
|
|
return '\n'.join(out)
|
|
|
|
def choices(self):
|
|
"""Returns a dictionary of token types, matched to their action in the parser.
|
|
|
|
Only returns token types that are accepted by the current state.
|
|
|
|
Updated by ``feed_token()``.
|
|
"""
|
|
return self.parser_state.parse_conf.parse_table.states[self.parser_state.position]
|
|
|
|
def accepts(self):
|
|
"""Returns the set of possible tokens that will advance the parser into a new valid state."""
|
|
accepts = set()
|
|
conf_no_callbacks = copy(self.parser_state.parse_conf)
|
|
# We don't want to call callbacks here since those might have arbitrary side effects
|
|
# and are unnecessarily slow.
|
|
conf_no_callbacks.callbacks = {}
|
|
for t in self.choices():
|
|
if t.isupper(): # is terminal?
|
|
new_cursor = self.copy(deepcopy_values=False)
|
|
new_cursor.parser_state.parse_conf = conf_no_callbacks
|
|
try:
|
|
new_cursor.feed_token(self.lexer_thread._Token(t, ''))
|
|
except UnexpectedToken:
|
|
pass
|
|
else:
|
|
accepts.add(t)
|
|
return accepts
|
|
|
|
def resume_parse(self):
|
|
"""Resume automated parsing from the current state.
|
|
"""
|
|
return self.parser.parse_from_state(self.parser_state, last_token=self.lexer_thread.state.last_token)
|
|
|
|
|
|
|
|
class ImmutableInteractiveParser(InteractiveParser):
|
|
"""Same as ``InteractiveParser``, but operations create a new instance instead
|
|
of changing it in-place.
|
|
"""
|
|
|
|
result = None
|
|
|
|
def __hash__(self):
|
|
return hash((self.parser_state, self.lexer_thread))
|
|
|
|
def feed_token(self, token):
|
|
c = copy(self)
|
|
c.result = InteractiveParser.feed_token(c, token)
|
|
return c
|
|
|
|
def exhaust_lexer(self):
|
|
"""Try to feed the rest of the lexer state into the parser.
|
|
|
|
Note that this returns a new ImmutableInteractiveParser and does not feed an '$END' Token"""
|
|
cursor = self.as_mutable()
|
|
cursor.exhaust_lexer()
|
|
return cursor.as_immutable()
|
|
|
|
def as_mutable(self):
|
|
"""Convert to an ``InteractiveParser``."""
|
|
p = copy(self)
|
|
return InteractiveParser(p.parser, p.parser_state, p.lexer_thread)
|
|
|
|
###}
|