You cannot select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
	
	
		
			207 lines
		
	
	
		
			7.9 KiB
		
	
	
	
		
			Python
		
	
			
		
		
	
	
			207 lines
		
	
	
		
			7.9 KiB
		
	
	
	
		
			Python
		
	
from parso.python import tree
 | 
						|
from parso.python.token import PythonTokenTypes
 | 
						|
from parso.parser import BaseParser
 | 
						|
 | 
						|
 | 
						|
NAME = PythonTokenTypes.NAME
 | 
						|
INDENT = PythonTokenTypes.INDENT
 | 
						|
DEDENT = PythonTokenTypes.DEDENT
 | 
						|
 | 
						|
 | 
						|
class Parser(BaseParser):
 | 
						|
    """
 | 
						|
    This class is used to parse a Python file, it then divides them into a
 | 
						|
    class structure of different scopes.
 | 
						|
 | 
						|
    :param pgen_grammar: The grammar object of pgen2. Loaded by load_grammar.
 | 
						|
    """
 | 
						|
 | 
						|
    node_map = {
 | 
						|
        'expr_stmt': tree.ExprStmt,
 | 
						|
        'classdef': tree.Class,
 | 
						|
        'funcdef': tree.Function,
 | 
						|
        'file_input': tree.Module,
 | 
						|
        'import_name': tree.ImportName,
 | 
						|
        'import_from': tree.ImportFrom,
 | 
						|
        'break_stmt': tree.KeywordStatement,
 | 
						|
        'continue_stmt': tree.KeywordStatement,
 | 
						|
        'return_stmt': tree.ReturnStmt,
 | 
						|
        'raise_stmt': tree.KeywordStatement,
 | 
						|
        'yield_expr': tree.YieldExpr,
 | 
						|
        'del_stmt': tree.KeywordStatement,
 | 
						|
        'pass_stmt': tree.KeywordStatement,
 | 
						|
        'global_stmt': tree.GlobalStmt,
 | 
						|
        'nonlocal_stmt': tree.KeywordStatement,
 | 
						|
        'print_stmt': tree.KeywordStatement,
 | 
						|
        'assert_stmt': tree.AssertStmt,
 | 
						|
        'if_stmt': tree.IfStmt,
 | 
						|
        'with_stmt': tree.WithStmt,
 | 
						|
        'for_stmt': tree.ForStmt,
 | 
						|
        'while_stmt': tree.WhileStmt,
 | 
						|
        'try_stmt': tree.TryStmt,
 | 
						|
        'sync_comp_for': tree.SyncCompFor,
 | 
						|
        # Not sure if this is the best idea, but IMO it's the easiest way to
 | 
						|
        # avoid extreme amounts of work around the subtle difference of 2/3
 | 
						|
        # grammar in list comoprehensions.
 | 
						|
        'decorator': tree.Decorator,
 | 
						|
        'lambdef': tree.Lambda,
 | 
						|
        'lambdef_nocond': tree.Lambda,
 | 
						|
        'namedexpr_test': tree.NamedExpr,
 | 
						|
    }
 | 
						|
    default_node = tree.PythonNode
 | 
						|
 | 
						|
    # Names/Keywords are handled separately
 | 
						|
    _leaf_map = {
 | 
						|
        PythonTokenTypes.STRING: tree.String,
 | 
						|
        PythonTokenTypes.NUMBER: tree.Number,
 | 
						|
        PythonTokenTypes.NEWLINE: tree.Newline,
 | 
						|
        PythonTokenTypes.ENDMARKER: tree.EndMarker,
 | 
						|
        PythonTokenTypes.FSTRING_STRING: tree.FStringString,
 | 
						|
        PythonTokenTypes.FSTRING_START: tree.FStringStart,
 | 
						|
        PythonTokenTypes.FSTRING_END: tree.FStringEnd,
 | 
						|
    }
 | 
						|
 | 
						|
    def __init__(self, pgen_grammar, error_recovery=True, start_nonterminal='file_input'):
 | 
						|
        super().__init__(pgen_grammar, start_nonterminal,
 | 
						|
                         error_recovery=error_recovery)
 | 
						|
 | 
						|
        self.syntax_errors = []
 | 
						|
        self._omit_dedent_list = []
 | 
						|
        self._indent_counter = 0
 | 
						|
 | 
						|
    def parse(self, tokens):
 | 
						|
        if self._error_recovery:
 | 
						|
            if self._start_nonterminal != 'file_input':
 | 
						|
                raise NotImplementedError
 | 
						|
 | 
						|
            tokens = self._recovery_tokenize(tokens)
 | 
						|
 | 
						|
        return super().parse(tokens)
 | 
						|
 | 
						|
    def convert_node(self, nonterminal, children):
 | 
						|
        """
 | 
						|
        Convert raw node information to a PythonBaseNode instance.
 | 
						|
 | 
						|
        This is passed to the parser driver which calls it whenever a reduction of a
 | 
						|
        grammar rule produces a new complete node, so that the tree is build
 | 
						|
        strictly bottom-up.
 | 
						|
        """
 | 
						|
        try:
 | 
						|
            node = self.node_map[nonterminal](children)
 | 
						|
        except KeyError:
 | 
						|
            if nonterminal == 'suite':
 | 
						|
                # We don't want the INDENT/DEDENT in our parser tree. Those
 | 
						|
                # leaves are just cancer. They are virtual leaves and not real
 | 
						|
                # ones and therefore have pseudo start/end positions and no
 | 
						|
                # prefixes. Just ignore them.
 | 
						|
                children = [children[0]] + children[2:-1]
 | 
						|
            node = self.default_node(nonterminal, children)
 | 
						|
        return node
 | 
						|
 | 
						|
    def convert_leaf(self, type, value, prefix, start_pos):
 | 
						|
        # print('leaf', repr(value), token.tok_name[type])
 | 
						|
        if type == NAME:
 | 
						|
            if value in self._pgen_grammar.reserved_syntax_strings:
 | 
						|
                return tree.Keyword(value, start_pos, prefix)
 | 
						|
            else:
 | 
						|
                return tree.Name(value, start_pos, prefix)
 | 
						|
 | 
						|
        return self._leaf_map.get(type, tree.Operator)(value, start_pos, prefix)
 | 
						|
 | 
						|
    def error_recovery(self, token):
 | 
						|
        tos_nodes = self.stack[-1].nodes
 | 
						|
        if tos_nodes:
 | 
						|
            last_leaf = tos_nodes[-1].get_last_leaf()
 | 
						|
        else:
 | 
						|
            last_leaf = None
 | 
						|
 | 
						|
        if self._start_nonterminal == 'file_input' and \
 | 
						|
                (token.type == PythonTokenTypes.ENDMARKER
 | 
						|
                 or token.type == DEDENT and not last_leaf.value.endswith('\n')
 | 
						|
                 and not last_leaf.value.endswith('\r')):
 | 
						|
            # In Python statements need to end with a newline. But since it's
 | 
						|
            # possible (and valid in Python) that there's no newline at the
 | 
						|
            # end of a file, we have to recover even if the user doesn't want
 | 
						|
            # error recovery.
 | 
						|
            if self.stack[-1].dfa.from_rule == 'simple_stmt':
 | 
						|
                try:
 | 
						|
                    plan = self.stack[-1].dfa.transitions[PythonTokenTypes.NEWLINE]
 | 
						|
                except KeyError:
 | 
						|
                    pass
 | 
						|
                else:
 | 
						|
                    if plan.next_dfa.is_final and not plan.dfa_pushes:
 | 
						|
                        # We are ignoring here that the newline would be
 | 
						|
                        # required for a simple_stmt.
 | 
						|
                        self.stack[-1].dfa = plan.next_dfa
 | 
						|
                        self._add_token(token)
 | 
						|
                        return
 | 
						|
 | 
						|
        if not self._error_recovery:
 | 
						|
            return super().error_recovery(token)
 | 
						|
 | 
						|
        def current_suite(stack):
 | 
						|
            # For now just discard everything that is not a suite or
 | 
						|
            # file_input, if we detect an error.
 | 
						|
            for until_index, stack_node in reversed(list(enumerate(stack))):
 | 
						|
                # `suite` can sometimes be only simple_stmt, not stmt.
 | 
						|
                if stack_node.nonterminal == 'file_input':
 | 
						|
                    break
 | 
						|
                elif stack_node.nonterminal == 'suite':
 | 
						|
                    # In the case where we just have a newline we don't want to
 | 
						|
                    # do error recovery here. In all other cases, we want to do
 | 
						|
                    # error recovery.
 | 
						|
                    if len(stack_node.nodes) != 1:
 | 
						|
                        break
 | 
						|
            return until_index
 | 
						|
 | 
						|
        until_index = current_suite(self.stack)
 | 
						|
 | 
						|
        if self._stack_removal(until_index + 1):
 | 
						|
            self._add_token(token)
 | 
						|
        else:
 | 
						|
            typ, value, start_pos, prefix = token
 | 
						|
            if typ == INDENT:
 | 
						|
                # For every deleted INDENT we have to delete a DEDENT as well.
 | 
						|
                # Otherwise the parser will get into trouble and DEDENT too early.
 | 
						|
                self._omit_dedent_list.append(self._indent_counter)
 | 
						|
 | 
						|
            error_leaf = tree.PythonErrorLeaf(typ.name, value, start_pos, prefix)
 | 
						|
            self.stack[-1].nodes.append(error_leaf)
 | 
						|
 | 
						|
        tos = self.stack[-1]
 | 
						|
        if tos.nonterminal == 'suite':
 | 
						|
            # Need at least one statement in the suite. This happend with the
 | 
						|
            # error recovery above.
 | 
						|
            try:
 | 
						|
                tos.dfa = tos.dfa.arcs['stmt']
 | 
						|
            except KeyError:
 | 
						|
                # We're already in a final state.
 | 
						|
                pass
 | 
						|
 | 
						|
    def _stack_removal(self, start_index):
 | 
						|
        all_nodes = [node for stack_node in self.stack[start_index:] for node in stack_node.nodes]
 | 
						|
 | 
						|
        if all_nodes:
 | 
						|
            node = tree.PythonErrorNode(all_nodes)
 | 
						|
            self.stack[start_index - 1].nodes.append(node)
 | 
						|
 | 
						|
        self.stack[start_index:] = []
 | 
						|
        return bool(all_nodes)
 | 
						|
 | 
						|
    def _recovery_tokenize(self, tokens):
 | 
						|
        for token in tokens:
 | 
						|
            typ = token[0]
 | 
						|
            if typ == DEDENT:
 | 
						|
                # We need to count indents, because if we just omit any DEDENT,
 | 
						|
                # we might omit them in the wrong place.
 | 
						|
                o = self._omit_dedent_list
 | 
						|
                if o and o[-1] == self._indent_counter:
 | 
						|
                    o.pop()
 | 
						|
                    self._indent_counter -= 1
 | 
						|
                    continue
 | 
						|
 | 
						|
                self._indent_counter -= 1
 | 
						|
            elif typ == INDENT:
 | 
						|
                self._indent_counter += 1
 | 
						|
            yield token
 |