You cannot select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
	
	
		
			292 lines
		
	
	
		
			11 KiB
		
	
	
	
		
			Python
		
	
			
		
		
	
	
			292 lines
		
	
	
		
			11 KiB
		
	
	
	
		
			Python
		
	
from .utils import logger, NO_VALUE
 | 
						|
from typing import Mapping, Iterable, Callable, Union, TypeVar, Tuple, Any, List, Set, Optional, Collection, TYPE_CHECKING
 | 
						|
 | 
						|
if TYPE_CHECKING:
 | 
						|
    from .lexer import Token
 | 
						|
    from .parsers.lalr_interactive_parser import InteractiveParser
 | 
						|
    from .tree import Tree
 | 
						|
 | 
						|
###{standalone
 | 
						|
 | 
						|
class LarkError(Exception):
 | 
						|
    pass
 | 
						|
 | 
						|
 | 
						|
class ConfigurationError(LarkError, ValueError):
 | 
						|
    pass
 | 
						|
 | 
						|
 | 
						|
def assert_config(value, options: Collection, msg='Got %r, expected one of %s'):
 | 
						|
    if value not in options:
 | 
						|
        raise ConfigurationError(msg % (value, options))
 | 
						|
 | 
						|
 | 
						|
class GrammarError(LarkError):
 | 
						|
    pass
 | 
						|
 | 
						|
 | 
						|
class ParseError(LarkError):
 | 
						|
    pass
 | 
						|
 | 
						|
 | 
						|
class LexError(LarkError):
 | 
						|
    pass
 | 
						|
 | 
						|
T = TypeVar('T')
 | 
						|
 | 
						|
class UnexpectedInput(LarkError):
 | 
						|
    """UnexpectedInput Error.
 | 
						|
 | 
						|
    Used as a base class for the following exceptions:
 | 
						|
 | 
						|
    - ``UnexpectedCharacters``: The lexer encountered an unexpected string
 | 
						|
    - ``UnexpectedToken``: The parser received an unexpected token
 | 
						|
    - ``UnexpectedEOF``: The parser expected a token, but the input ended
 | 
						|
 | 
						|
    After catching one of these exceptions, you may call the following helper methods to create a nicer error message.
 | 
						|
    """
 | 
						|
    line: int
 | 
						|
    column: int
 | 
						|
    pos_in_stream = None
 | 
						|
    state: Any
 | 
						|
    _terminals_by_name = None
 | 
						|
    interactive_parser: 'InteractiveParser'
 | 
						|
 | 
						|
    def get_context(self, text: str, span: int=40) -> str:
 | 
						|
        """Returns a pretty string pinpointing the error in the text,
 | 
						|
        with span amount of context characters around it.
 | 
						|
 | 
						|
        Note:
 | 
						|
            The parser doesn't hold a copy of the text it has to parse,
 | 
						|
            so you have to provide it again
 | 
						|
        """
 | 
						|
        pos = self.pos_in_stream or 0
 | 
						|
        start = max(pos - span, 0)
 | 
						|
        end = pos + span
 | 
						|
        if not isinstance(text, bytes):
 | 
						|
            before = text[start:pos].rsplit('\n', 1)[-1]
 | 
						|
            after = text[pos:end].split('\n', 1)[0]
 | 
						|
            return before + after + '\n' + ' ' * len(before.expandtabs()) + '^\n'
 | 
						|
        else:
 | 
						|
            before = text[start:pos].rsplit(b'\n', 1)[-1]
 | 
						|
            after = text[pos:end].split(b'\n', 1)[0]
 | 
						|
            return (before + after + b'\n' + b' ' * len(before.expandtabs()) + b'^\n').decode("ascii", "backslashreplace")
 | 
						|
 | 
						|
    def match_examples(self, parse_fn: 'Callable[[str], Tree]',
 | 
						|
                             examples: Union[Mapping[T, Iterable[str]], Iterable[Tuple[T, Iterable[str]]]],
 | 
						|
                             token_type_match_fallback: bool=False,
 | 
						|
                             use_accepts: bool=True
 | 
						|
                         ) -> Optional[T]:
 | 
						|
        """Allows you to detect what's wrong in the input text by matching
 | 
						|
        against example errors.
 | 
						|
 | 
						|
        Given a parser instance and a dictionary mapping some label with
 | 
						|
        some malformed syntax examples, it'll return the label for the
 | 
						|
        example that bests matches the current error. The function will
 | 
						|
        iterate the dictionary until it finds a matching error, and
 | 
						|
        return the corresponding value.
 | 
						|
 | 
						|
        For an example usage, see `examples/error_reporting_lalr.py`
 | 
						|
 | 
						|
        Parameters:
 | 
						|
            parse_fn: parse function (usually ``lark_instance.parse``)
 | 
						|
            examples: dictionary of ``{'example_string': value}``.
 | 
						|
            use_accepts: Recommended to keep this as ``use_accepts=True``.
 | 
						|
        """
 | 
						|
        assert self.state is not None, "Not supported for this exception"
 | 
						|
 | 
						|
        if isinstance(examples, Mapping):
 | 
						|
            examples = examples.items()
 | 
						|
 | 
						|
        candidate = (None, False)
 | 
						|
        for i, (label, example) in enumerate(examples):
 | 
						|
            assert not isinstance(example, str), "Expecting a list"
 | 
						|
 | 
						|
            for j, malformed in enumerate(example):
 | 
						|
                try:
 | 
						|
                    parse_fn(malformed)
 | 
						|
                except UnexpectedInput as ut:
 | 
						|
                    if ut.state == self.state:
 | 
						|
                        if (
 | 
						|
                            use_accepts
 | 
						|
                            and isinstance(self, UnexpectedToken)
 | 
						|
                            and isinstance(ut, UnexpectedToken)
 | 
						|
                            and ut.accepts != self.accepts
 | 
						|
                        ):
 | 
						|
                            logger.debug("Different accepts with same state[%d]: %s != %s at example [%s][%s]" %
 | 
						|
                                         (self.state, self.accepts, ut.accepts, i, j))
 | 
						|
                            continue
 | 
						|
                        if (
 | 
						|
                            isinstance(self, (UnexpectedToken, UnexpectedEOF))
 | 
						|
                            and isinstance(ut, (UnexpectedToken, UnexpectedEOF))
 | 
						|
                        ):
 | 
						|
                            if ut.token == self.token:  # Try exact match first
 | 
						|
                                logger.debug("Exact Match at example [%s][%s]" % (i, j))
 | 
						|
                                return label
 | 
						|
 | 
						|
                            if token_type_match_fallback:
 | 
						|
                                # Fallback to token types match
 | 
						|
                                if (ut.token.type == self.token.type) and not candidate[-1]:
 | 
						|
                                    logger.debug("Token Type Fallback at example [%s][%s]" % (i, j))
 | 
						|
                                    candidate = label, True
 | 
						|
 | 
						|
                        if candidate[0] is None:
 | 
						|
                            logger.debug("Same State match at example [%s][%s]" % (i, j))
 | 
						|
                            candidate = label, False
 | 
						|
 | 
						|
        return candidate[0]
 | 
						|
 | 
						|
    def _format_expected(self, expected):
 | 
						|
        if self._terminals_by_name:
 | 
						|
            d = self._terminals_by_name
 | 
						|
            expected = [d[t_name].user_repr() if t_name in d else t_name for t_name in expected]
 | 
						|
        return "Expected one of: \n\t* %s\n" % '\n\t* '.join(expected)
 | 
						|
 | 
						|
 | 
						|
class UnexpectedEOF(ParseError, UnexpectedInput):
 | 
						|
    """An exception that is raised by the parser, when the input ends while it still expects a token.
 | 
						|
    """
 | 
						|
    expected: 'List[Token]'
 | 
						|
 | 
						|
    def __init__(self, expected, state=None, terminals_by_name=None):
 | 
						|
        super(UnexpectedEOF, self).__init__()
 | 
						|
 | 
						|
        self.expected = expected
 | 
						|
        self.state = state
 | 
						|
        from .lexer import Token
 | 
						|
        self.token = Token("<EOF>", "")  # , line=-1, column=-1, pos_in_stream=-1)
 | 
						|
        self.pos_in_stream = -1
 | 
						|
        self.line = -1
 | 
						|
        self.column = -1
 | 
						|
        self._terminals_by_name = terminals_by_name
 | 
						|
 | 
						|
 | 
						|
    def __str__(self):
 | 
						|
        message = "Unexpected end-of-input. "
 | 
						|
        message += self._format_expected(self.expected)
 | 
						|
        return message
 | 
						|
 | 
						|
 | 
						|
class UnexpectedCharacters(LexError, UnexpectedInput):
 | 
						|
    """An exception that is raised by the lexer, when it cannot match the next
 | 
						|
    string of characters to any of its terminals.
 | 
						|
    """
 | 
						|
 | 
						|
    allowed: Set[str]
 | 
						|
    considered_tokens: Set[Any]
 | 
						|
 | 
						|
    def __init__(self, seq, lex_pos, line, column, allowed=None, considered_tokens=None, state=None, token_history=None,
 | 
						|
                 terminals_by_name=None, considered_rules=None):
 | 
						|
        super(UnexpectedCharacters, self).__init__()
 | 
						|
 | 
						|
        # TODO considered_tokens and allowed can be figured out using state
 | 
						|
        self.line = line
 | 
						|
        self.column = column
 | 
						|
        self.pos_in_stream = lex_pos
 | 
						|
        self.state = state
 | 
						|
        self._terminals_by_name = terminals_by_name
 | 
						|
 | 
						|
        self.allowed = allowed
 | 
						|
        self.considered_tokens = considered_tokens
 | 
						|
        self.considered_rules = considered_rules
 | 
						|
        self.token_history = token_history
 | 
						|
 | 
						|
        if isinstance(seq, bytes):
 | 
						|
            self.char = seq[lex_pos:lex_pos + 1].decode("ascii", "backslashreplace")
 | 
						|
        else:
 | 
						|
            self.char = seq[lex_pos]
 | 
						|
        self._context = self.get_context(seq)
 | 
						|
 | 
						|
 | 
						|
    def __str__(self):
 | 
						|
        message = "No terminal matches '%s' in the current parser context, at line %d col %d" % (self.char, self.line, self.column)
 | 
						|
        message += '\n\n' + self._context
 | 
						|
        if self.allowed:
 | 
						|
            message += self._format_expected(self.allowed)
 | 
						|
        if self.token_history:
 | 
						|
            message += '\nPrevious tokens: %s\n' % ', '.join(repr(t) for t in self.token_history)
 | 
						|
        return message
 | 
						|
 | 
						|
 | 
						|
class UnexpectedToken(ParseError, UnexpectedInput):
 | 
						|
    """An exception that is raised by the parser, when the token it received
 | 
						|
    doesn't match any valid step forward.
 | 
						|
 | 
						|
    Parameters:
 | 
						|
        token: The mismatched token
 | 
						|
        expected: The set of expected tokens
 | 
						|
        considered_rules: Which rules were considered, to deduce the expected tokens
 | 
						|
        state: A value representing the parser state. Do not rely on its value or type.
 | 
						|
        interactive_parser: An instance of ``InteractiveParser``, that is initialized to the point of failure,
 | 
						|
                            and can be used for debugging and error handling.
 | 
						|
 | 
						|
    Note: These parameters are available as attributes of the instance.
 | 
						|
    """
 | 
						|
 | 
						|
    expected: Set[str]
 | 
						|
    considered_rules: Set[str]
 | 
						|
 | 
						|
    def __init__(self, token, expected, considered_rules=None, state=None, interactive_parser=None, terminals_by_name=None, token_history=None):
 | 
						|
        super(UnexpectedToken, self).__init__()
 | 
						|
 | 
						|
        # TODO considered_rules and expected can be figured out using state
 | 
						|
        self.line = getattr(token, 'line', '?')
 | 
						|
        self.column = getattr(token, 'column', '?')
 | 
						|
        self.pos_in_stream = getattr(token, 'start_pos', None)
 | 
						|
        self.state = state
 | 
						|
 | 
						|
        self.token = token
 | 
						|
        self.expected = expected  # XXX deprecate? `accepts` is better
 | 
						|
        self._accepts = NO_VALUE
 | 
						|
        self.considered_rules = considered_rules
 | 
						|
        self.interactive_parser = interactive_parser
 | 
						|
        self._terminals_by_name = terminals_by_name
 | 
						|
        self.token_history = token_history
 | 
						|
 | 
						|
 | 
						|
    @property
 | 
						|
    def accepts(self) -> Set[str]:
 | 
						|
        if self._accepts is NO_VALUE:
 | 
						|
            self._accepts = self.interactive_parser and self.interactive_parser.accepts()
 | 
						|
        return self._accepts
 | 
						|
 | 
						|
    def __str__(self):
 | 
						|
        message = ("Unexpected token %r at line %s, column %s.\n%s"
 | 
						|
                   % (self.token, self.line, self.column, self._format_expected(self.accepts or self.expected)))
 | 
						|
        if self.token_history:
 | 
						|
            message += "Previous tokens: %r\n" % self.token_history
 | 
						|
 | 
						|
        return message
 | 
						|
 | 
						|
 | 
						|
 | 
						|
class VisitError(LarkError):
 | 
						|
    """VisitError is raised when visitors are interrupted by an exception
 | 
						|
 | 
						|
    It provides the following attributes for inspection:
 | 
						|
 | 
						|
    Parameters:
 | 
						|
        rule: the name of the visit rule that failed
 | 
						|
        obj: the tree-node or token that was being processed
 | 
						|
        orig_exc: the exception that cause it to fail
 | 
						|
 | 
						|
    Note: These parameters are available as attributes
 | 
						|
    """
 | 
						|
 | 
						|
    obj: 'Union[Tree, Token]'
 | 
						|
    orig_exc: Exception
 | 
						|
 | 
						|
    def __init__(self, rule, obj, orig_exc):
 | 
						|
        message = 'Error trying to process rule "%s":\n\n%s' % (rule, orig_exc)
 | 
						|
        super(VisitError, self).__init__(message)
 | 
						|
 | 
						|
        self.rule = rule
 | 
						|
        self.obj = obj
 | 
						|
        self.orig_exc = orig_exc
 | 
						|
 | 
						|
 | 
						|
class MissingVariableError(LarkError):
 | 
						|
    pass
 | 
						|
 | 
						|
###}
 |