You cannot select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
	
	
		
			204 lines
		
	
	
		
			7.0 KiB
		
	
	
	
		
			Python
		
	
			
		
		
	
	
			204 lines
		
	
	
		
			7.0 KiB
		
	
	
	
		
			Python
		
	
"Provides for superficial grammar analysis."
 | 
						|
 | 
						|
from collections import Counter, defaultdict
 | 
						|
from typing import List, Dict, Iterator, FrozenSet, Set
 | 
						|
 | 
						|
from ..utils import bfs, fzset, classify, OrderedSet
 | 
						|
from ..exceptions import GrammarError
 | 
						|
from ..grammar import Rule, Terminal, NonTerminal, Symbol
 | 
						|
from ..common import ParserConf
 | 
						|
 | 
						|
 | 
						|
class RulePtr:
 | 
						|
    __slots__ = ('rule', 'index')
 | 
						|
    rule: Rule
 | 
						|
    index: int
 | 
						|
 | 
						|
    def __init__(self, rule: Rule, index: int):
 | 
						|
        assert isinstance(rule, Rule)
 | 
						|
        assert index <= len(rule.expansion)
 | 
						|
        self.rule = rule
 | 
						|
        self.index = index
 | 
						|
 | 
						|
    def __repr__(self):
 | 
						|
        before = [x.name for x in self.rule.expansion[:self.index]]
 | 
						|
        after = [x.name for x in self.rule.expansion[self.index:]]
 | 
						|
        return '<%s : %s * %s>' % (self.rule.origin.name, ' '.join(before), ' '.join(after))
 | 
						|
 | 
						|
    @property
 | 
						|
    def next(self) -> Symbol:
 | 
						|
        return self.rule.expansion[self.index]
 | 
						|
 | 
						|
    def advance(self, sym: Symbol) -> 'RulePtr':
 | 
						|
        assert self.next == sym
 | 
						|
        return RulePtr(self.rule, self.index+1)
 | 
						|
 | 
						|
    @property
 | 
						|
    def is_satisfied(self) -> bool:
 | 
						|
        return self.index == len(self.rule.expansion)
 | 
						|
 | 
						|
    def __eq__(self, other) -> bool:
 | 
						|
        if not isinstance(other, RulePtr):
 | 
						|
            return NotImplemented
 | 
						|
        return self.rule == other.rule and self.index == other.index
 | 
						|
 | 
						|
    def __hash__(self) -> int:
 | 
						|
        return hash((self.rule, self.index))
 | 
						|
 | 
						|
 | 
						|
State = FrozenSet[RulePtr]
 | 
						|
 | 
						|
# state generation ensures no duplicate LR0ItemSets
 | 
						|
class LR0ItemSet:
 | 
						|
    __slots__ = ('kernel', 'closure', 'transitions', 'lookaheads')
 | 
						|
 | 
						|
    kernel: State
 | 
						|
    closure: State
 | 
						|
    transitions: Dict[Symbol, 'LR0ItemSet']
 | 
						|
    lookaheads: Dict[Symbol, Set[Rule]]
 | 
						|
 | 
						|
    def __init__(self, kernel, closure):
 | 
						|
        self.kernel = fzset(kernel)
 | 
						|
        self.closure = fzset(closure)
 | 
						|
        self.transitions = {}
 | 
						|
        self.lookaheads = defaultdict(set)
 | 
						|
 | 
						|
    def __repr__(self):
 | 
						|
        return '{%s | %s}' % (', '.join([repr(r) for r in self.kernel]), ', '.join([repr(r) for r in self.closure]))
 | 
						|
 | 
						|
 | 
						|
def update_set(set1, set2):
 | 
						|
    if not set2 or set1 > set2:
 | 
						|
        return False
 | 
						|
 | 
						|
    copy = set(set1)
 | 
						|
    set1 |= set2
 | 
						|
    return set1 != copy
 | 
						|
 | 
						|
def calculate_sets(rules):
 | 
						|
    """Calculate FOLLOW sets.
 | 
						|
 | 
						|
    Adapted from: http://lara.epfl.ch/w/cc09:algorithm_for_first_and_follow_sets"""
 | 
						|
    symbols = {sym for rule in rules for sym in rule.expansion} | {rule.origin for rule in rules}
 | 
						|
 | 
						|
    # foreach grammar rule X ::= Y(1) ... Y(k)
 | 
						|
    # if k=0 or {Y(1),...,Y(k)} subset of NULLABLE then
 | 
						|
    #   NULLABLE = NULLABLE union {X}
 | 
						|
    # for i = 1 to k
 | 
						|
    #   if i=1 or {Y(1),...,Y(i-1)} subset of NULLABLE then
 | 
						|
    #     FIRST(X) = FIRST(X) union FIRST(Y(i))
 | 
						|
    #   for j = i+1 to k
 | 
						|
    #     if i=k or {Y(i+1),...Y(k)} subset of NULLABLE then
 | 
						|
    #       FOLLOW(Y(i)) = FOLLOW(Y(i)) union FOLLOW(X)
 | 
						|
    #     if i+1=j or {Y(i+1),...,Y(j-1)} subset of NULLABLE then
 | 
						|
    #       FOLLOW(Y(i)) = FOLLOW(Y(i)) union FIRST(Y(j))
 | 
						|
    # until none of NULLABLE,FIRST,FOLLOW changed in last iteration
 | 
						|
 | 
						|
    NULLABLE = set()
 | 
						|
    FIRST = {}
 | 
						|
    FOLLOW = {}
 | 
						|
    for sym in symbols:
 | 
						|
        FIRST[sym]={sym} if sym.is_term else set()
 | 
						|
        FOLLOW[sym]=set()
 | 
						|
 | 
						|
    # Calculate NULLABLE and FIRST
 | 
						|
    changed = True
 | 
						|
    while changed:
 | 
						|
        changed = False
 | 
						|
 | 
						|
        for rule in rules:
 | 
						|
            if set(rule.expansion) <= NULLABLE:
 | 
						|
                if update_set(NULLABLE, {rule.origin}):
 | 
						|
                    changed = True
 | 
						|
 | 
						|
            for i, sym in enumerate(rule.expansion):
 | 
						|
                if set(rule.expansion[:i]) <= NULLABLE:
 | 
						|
                    if update_set(FIRST[rule.origin], FIRST[sym]):
 | 
						|
                        changed = True
 | 
						|
                else:
 | 
						|
                    break
 | 
						|
 | 
						|
    # Calculate FOLLOW
 | 
						|
    changed = True
 | 
						|
    while changed:
 | 
						|
        changed = False
 | 
						|
 | 
						|
        for rule in rules:
 | 
						|
            for i, sym in enumerate(rule.expansion):
 | 
						|
                if i==len(rule.expansion)-1 or set(rule.expansion[i+1:]) <= NULLABLE:
 | 
						|
                    if update_set(FOLLOW[sym], FOLLOW[rule.origin]):
 | 
						|
                        changed = True
 | 
						|
 | 
						|
                for j in range(i+1, len(rule.expansion)):
 | 
						|
                    if set(rule.expansion[i+1:j]) <= NULLABLE:
 | 
						|
                        if update_set(FOLLOW[sym], FIRST[rule.expansion[j]]):
 | 
						|
                            changed = True
 | 
						|
 | 
						|
    return FIRST, FOLLOW, NULLABLE
 | 
						|
 | 
						|
 | 
						|
class GrammarAnalyzer:
 | 
						|
    def __init__(self, parser_conf: ParserConf, debug: bool=False, strict: bool=False):
 | 
						|
        self.debug = debug
 | 
						|
        self.strict = strict
 | 
						|
 | 
						|
        root_rules = {start: Rule(NonTerminal('$root_' + start), [NonTerminal(start), Terminal('$END')])
 | 
						|
                      for start in parser_conf.start}
 | 
						|
 | 
						|
        rules = parser_conf.rules + list(root_rules.values())
 | 
						|
        self.rules_by_origin: Dict[NonTerminal, List[Rule]] = classify(rules, lambda r: r.origin)
 | 
						|
 | 
						|
        if len(rules) != len(set(rules)):
 | 
						|
            duplicates = [item for item, count in Counter(rules).items() if count > 1]
 | 
						|
            raise GrammarError("Rules defined twice: %s" % ', '.join(str(i) for i in duplicates))
 | 
						|
 | 
						|
        for r in rules:
 | 
						|
            for sym in r.expansion:
 | 
						|
                if not (sym.is_term or sym in self.rules_by_origin):
 | 
						|
                    raise GrammarError("Using an undefined rule: %s" % sym)
 | 
						|
 | 
						|
        self.start_states = {start: self.expand_rule(root_rule.origin)
 | 
						|
                             for start, root_rule in root_rules.items()}
 | 
						|
 | 
						|
        self.end_states = {start: fzset({RulePtr(root_rule, len(root_rule.expansion))})
 | 
						|
                           for start, root_rule in root_rules.items()}
 | 
						|
 | 
						|
        lr0_root_rules = {start: Rule(NonTerminal('$root_' + start), [NonTerminal(start)])
 | 
						|
                for start in parser_conf.start}
 | 
						|
 | 
						|
        lr0_rules = parser_conf.rules + list(lr0_root_rules.values())
 | 
						|
        assert(len(lr0_rules) == len(set(lr0_rules)))
 | 
						|
 | 
						|
        self.lr0_rules_by_origin = classify(lr0_rules, lambda r: r.origin)
 | 
						|
 | 
						|
        # cache RulePtr(r, 0) in r (no duplicate RulePtr objects)
 | 
						|
        self.lr0_start_states = {start: LR0ItemSet([RulePtr(root_rule, 0)], self.expand_rule(root_rule.origin, self.lr0_rules_by_origin))
 | 
						|
                for start, root_rule in lr0_root_rules.items()}
 | 
						|
 | 
						|
        self.FIRST, self.FOLLOW, self.NULLABLE = calculate_sets(rules)
 | 
						|
 | 
						|
    def expand_rule(self, source_rule: NonTerminal, rules_by_origin=None) -> OrderedSet[RulePtr]:
 | 
						|
        "Returns all init_ptrs accessible by rule (recursive)"
 | 
						|
 | 
						|
        if rules_by_origin is None:
 | 
						|
            rules_by_origin = self.rules_by_origin
 | 
						|
 | 
						|
        init_ptrs = OrderedSet[RulePtr]()
 | 
						|
        def _expand_rule(rule: NonTerminal) -> Iterator[NonTerminal]:
 | 
						|
            assert not rule.is_term, rule
 | 
						|
 | 
						|
            for r in rules_by_origin[rule]:
 | 
						|
                init_ptr = RulePtr(r, 0)
 | 
						|
                init_ptrs.add(init_ptr)
 | 
						|
 | 
						|
                if r.expansion: # if not empty rule
 | 
						|
                    new_r = init_ptr.next
 | 
						|
                    if not new_r.is_term:
 | 
						|
                        assert isinstance(new_r, NonTerminal)
 | 
						|
                        yield new_r
 | 
						|
 | 
						|
        for _ in bfs([source_rule], _expand_rule):
 | 
						|
            pass
 | 
						|
 | 
						|
        return init_ptrs
 |