You cannot select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
	
	
		
			145 lines
		
	
	
		
			4.2 KiB
		
	
	
	
		
			Python
		
	
			
		
		
	
	
			145 lines
		
	
	
		
			4.2 KiB
		
	
	
	
		
			Python
		
	
"Provides a post-lexer for implementing Python-style indentation."
 | 
						|
 | 
						|
from abc import ABC, abstractmethod
 | 
						|
from typing import List, Iterator
 | 
						|
 | 
						|
from .exceptions import LarkError
 | 
						|
from .lark import PostLex
 | 
						|
from .lexer import Token
 | 
						|
 | 
						|
###{standalone
 | 
						|
 | 
						|
class DedentError(LarkError):
 | 
						|
    pass
 | 
						|
 | 
						|
class Indenter(PostLex, ABC):
 | 
						|
    """This is a postlexer that "injects" indent/dedent tokens based on indentation.
 | 
						|
 | 
						|
    It keeps track of the current indentation, as well as the current level of parentheses.
 | 
						|
    Inside parentheses, the indentation is ignored, and no indent/dedent tokens get generated.
 | 
						|
 | 
						|
    Note: This is an abstract class. To use it, inherit and implement all its abstract methods:
 | 
						|
        - tab_len
 | 
						|
        - NL_type
 | 
						|
        - OPEN_PAREN_types, CLOSE_PAREN_types
 | 
						|
        - INDENT_type, DEDENT_type
 | 
						|
 | 
						|
    See also: the ``postlex`` option in `Lark`.
 | 
						|
    """
 | 
						|
    paren_level: int
 | 
						|
    indent_level: List[int]
 | 
						|
 | 
						|
    def __init__(self) -> None:
 | 
						|
        self.paren_level = 0
 | 
						|
        self.indent_level = [0]
 | 
						|
        assert self.tab_len > 0
 | 
						|
 | 
						|
    def handle_NL(self, token: Token) -> Iterator[Token]:
 | 
						|
        if self.paren_level > 0:
 | 
						|
            return
 | 
						|
 | 
						|
        yield token
 | 
						|
 | 
						|
        indent_str = token.rsplit('\n', 1)[1] # Tabs and spaces
 | 
						|
        indent = indent_str.count(' ') + indent_str.count('\t') * self.tab_len
 | 
						|
 | 
						|
        if indent > self.indent_level[-1]:
 | 
						|
            self.indent_level.append(indent)
 | 
						|
            yield Token.new_borrow_pos(self.INDENT_type, indent_str, token)
 | 
						|
        else:
 | 
						|
            while indent < self.indent_level[-1]:
 | 
						|
                self.indent_level.pop()
 | 
						|
                yield Token.new_borrow_pos(self.DEDENT_type, indent_str, token)
 | 
						|
 | 
						|
            if indent != self.indent_level[-1]:
 | 
						|
                raise DedentError('Unexpected dedent to column %s. Expected dedent to %s' % (indent, self.indent_level[-1]))
 | 
						|
 | 
						|
    def _process(self, stream):
 | 
						|
        token = None
 | 
						|
        for token in stream:
 | 
						|
            if token.type == self.NL_type:
 | 
						|
                yield from self.handle_NL(token)
 | 
						|
            else:
 | 
						|
                yield token
 | 
						|
 | 
						|
            if token.type in self.OPEN_PAREN_types:
 | 
						|
                self.paren_level += 1
 | 
						|
            elif token.type in self.CLOSE_PAREN_types:
 | 
						|
                self.paren_level -= 1
 | 
						|
                assert self.paren_level >= 0
 | 
						|
 | 
						|
        while len(self.indent_level) > 1:
 | 
						|
            self.indent_level.pop()
 | 
						|
            yield Token.new_borrow_pos(self.DEDENT_type, '', token) if token else Token(self.DEDENT_type, '', 0, 0, 0, 0, 0, 0)
 | 
						|
 | 
						|
        assert self.indent_level == [0], self.indent_level
 | 
						|
 | 
						|
    def process(self, stream):
 | 
						|
        self.paren_level = 0
 | 
						|
        self.indent_level = [0]
 | 
						|
        return self._process(stream)
 | 
						|
 | 
						|
    # XXX Hack for ContextualLexer. Maybe there's a more elegant solution?
 | 
						|
    @property
 | 
						|
    def always_accept(self):
 | 
						|
        return (self.NL_type,)
 | 
						|
 | 
						|
    @property
 | 
						|
    @abstractmethod
 | 
						|
    def NL_type(self) -> str:
 | 
						|
        "The name of the newline token"
 | 
						|
        raise NotImplementedError()
 | 
						|
 | 
						|
    @property
 | 
						|
    @abstractmethod
 | 
						|
    def OPEN_PAREN_types(self) -> List[str]:
 | 
						|
        "The names of the tokens that open a parenthesis"
 | 
						|
        raise NotImplementedError()
 | 
						|
 | 
						|
    @property
 | 
						|
    @abstractmethod
 | 
						|
    def CLOSE_PAREN_types(self) -> List[str]:
 | 
						|
        """The names of the tokens that close a parenthesis
 | 
						|
        """
 | 
						|
        raise NotImplementedError()
 | 
						|
 | 
						|
    @property
 | 
						|
    @abstractmethod
 | 
						|
    def INDENT_type(self) -> str:
 | 
						|
        """The name of the token that starts an indentation in the grammar.
 | 
						|
 | 
						|
        See also: %declare
 | 
						|
        """
 | 
						|
        raise NotImplementedError()
 | 
						|
 | 
						|
    @property
 | 
						|
    @abstractmethod
 | 
						|
    def DEDENT_type(self) -> str:
 | 
						|
        """The name of the token that end an indentation in the grammar.
 | 
						|
 | 
						|
        See also: %declare
 | 
						|
        """
 | 
						|
        raise NotImplementedError()
 | 
						|
 | 
						|
    @property
 | 
						|
    @abstractmethod
 | 
						|
    def tab_len(self) -> int:
 | 
						|
        """How many spaces does a tab equal"""
 | 
						|
        raise NotImplementedError()
 | 
						|
 | 
						|
 | 
						|
class PythonIndenter(Indenter):
 | 
						|
    """A postlexer that "injects" _INDENT/_DEDENT tokens based on indentation, according to the Python syntax.
 | 
						|
 | 
						|
    See also: the ``postlex`` option in `Lark`.
 | 
						|
    """
 | 
						|
 | 
						|
    NL_type = '_NEWLINE'
 | 
						|
    OPEN_PAREN_types = ['LPAR', 'LSQB', 'LBRACE']
 | 
						|
    CLOSE_PAREN_types = ['RPAR', 'RSQB', 'RBRACE']
 | 
						|
    INDENT_type = '_INDENT'
 | 
						|
    DEDENT_type = '_DEDENT'
 | 
						|
    tab_len = 8
 | 
						|
 | 
						|
###}
 |