You cannot select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
	
	
		
			196 lines
		
	
	
		
			5.2 KiB
		
	
	
	
		
			Python
		
	
			
		
		
	
	
			196 lines
		
	
	
		
			5.2 KiB
		
	
	
	
		
			Python
		
	
from __future__ import annotations
 | 
						|
 | 
						|
import contextlib
 | 
						|
import re
 | 
						|
from dataclasses import dataclass
 | 
						|
from typing import Iterator, NoReturn
 | 
						|
 | 
						|
from .specifiers import Specifier
 | 
						|
 | 
						|
 | 
						|
@dataclass
 | 
						|
class Token:
 | 
						|
    name: str
 | 
						|
    text: str
 | 
						|
    position: int
 | 
						|
 | 
						|
 | 
						|
class ParserSyntaxError(Exception):
 | 
						|
    """The provided source text could not be parsed correctly."""
 | 
						|
 | 
						|
    def __init__(
 | 
						|
        self,
 | 
						|
        message: str,
 | 
						|
        *,
 | 
						|
        source: str,
 | 
						|
        span: tuple[int, int],
 | 
						|
    ) -> None:
 | 
						|
        self.span = span
 | 
						|
        self.message = message
 | 
						|
        self.source = source
 | 
						|
 | 
						|
        super().__init__()
 | 
						|
 | 
						|
    def __str__(self) -> str:
 | 
						|
        marker = " " * self.span[0] + "~" * (self.span[1] - self.span[0]) + "^"
 | 
						|
        return "\n    ".join([self.message, self.source, marker])
 | 
						|
 | 
						|
 | 
						|
DEFAULT_RULES: dict[str, str | re.Pattern[str]] = {
 | 
						|
    "LEFT_PARENTHESIS": r"\(",
 | 
						|
    "RIGHT_PARENTHESIS": r"\)",
 | 
						|
    "LEFT_BRACKET": r"\[",
 | 
						|
    "RIGHT_BRACKET": r"\]",
 | 
						|
    "SEMICOLON": r";",
 | 
						|
    "COMMA": r",",
 | 
						|
    "QUOTED_STRING": re.compile(
 | 
						|
        r"""
 | 
						|
            (
 | 
						|
                ('[^']*')
 | 
						|
                |
 | 
						|
                ("[^"]*")
 | 
						|
            )
 | 
						|
        """,
 | 
						|
        re.VERBOSE,
 | 
						|
    ),
 | 
						|
    "OP": r"(===|==|~=|!=|<=|>=|<|>)",
 | 
						|
    "BOOLOP": r"\b(or|and)\b",
 | 
						|
    "IN": r"\bin\b",
 | 
						|
    "NOT": r"\bnot\b",
 | 
						|
    "VARIABLE": re.compile(
 | 
						|
        r"""
 | 
						|
            \b(
 | 
						|
                python_version
 | 
						|
                |python_full_version
 | 
						|
                |os[._]name
 | 
						|
                |sys[._]platform
 | 
						|
                |platform_(release|system)
 | 
						|
                |platform[._](version|machine|python_implementation)
 | 
						|
                |python_implementation
 | 
						|
                |implementation_(name|version)
 | 
						|
                |extras?
 | 
						|
                |dependency_groups
 | 
						|
            )\b
 | 
						|
        """,
 | 
						|
        re.VERBOSE,
 | 
						|
    ),
 | 
						|
    "SPECIFIER": re.compile(
 | 
						|
        Specifier._operator_regex_str + Specifier._version_regex_str,
 | 
						|
        re.VERBOSE | re.IGNORECASE,
 | 
						|
    ),
 | 
						|
    "AT": r"\@",
 | 
						|
    "URL": r"[^ \t]+",
 | 
						|
    "IDENTIFIER": r"\b[a-zA-Z0-9][a-zA-Z0-9._-]*\b",
 | 
						|
    "VERSION_PREFIX_TRAIL": r"\.\*",
 | 
						|
    "VERSION_LOCAL_LABEL_TRAIL": r"\+[a-z0-9]+(?:[-_\.][a-z0-9]+)*",
 | 
						|
    "WS": r"[ \t]+",
 | 
						|
    "END": r"$",
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
class Tokenizer:
 | 
						|
    """Context-sensitive token parsing.
 | 
						|
 | 
						|
    Provides methods to examine the input stream to check whether the next token
 | 
						|
    matches.
 | 
						|
    """
 | 
						|
 | 
						|
    def __init__(
 | 
						|
        self,
 | 
						|
        source: str,
 | 
						|
        *,
 | 
						|
        rules: dict[str, str | re.Pattern[str]],
 | 
						|
    ) -> None:
 | 
						|
        self.source = source
 | 
						|
        self.rules: dict[str, re.Pattern[str]] = {
 | 
						|
            name: re.compile(pattern) for name, pattern in rules.items()
 | 
						|
        }
 | 
						|
        self.next_token: Token | None = None
 | 
						|
        self.position = 0
 | 
						|
 | 
						|
    def consume(self, name: str) -> None:
 | 
						|
        """Move beyond provided token name, if at current position."""
 | 
						|
        if self.check(name):
 | 
						|
            self.read()
 | 
						|
 | 
						|
    def check(self, name: str, *, peek: bool = False) -> bool:
 | 
						|
        """Check whether the next token has the provided name.
 | 
						|
 | 
						|
        By default, if the check succeeds, the token *must* be read before
 | 
						|
        another check. If `peek` is set to `True`, the token is not loaded and
 | 
						|
        would need to be checked again.
 | 
						|
        """
 | 
						|
        assert self.next_token is None, (
 | 
						|
            f"Cannot check for {name!r}, already have {self.next_token!r}"
 | 
						|
        )
 | 
						|
        assert name in self.rules, f"Unknown token name: {name!r}"
 | 
						|
 | 
						|
        expression = self.rules[name]
 | 
						|
 | 
						|
        match = expression.match(self.source, self.position)
 | 
						|
        if match is None:
 | 
						|
            return False
 | 
						|
        if not peek:
 | 
						|
            self.next_token = Token(name, match[0], self.position)
 | 
						|
        return True
 | 
						|
 | 
						|
    def expect(self, name: str, *, expected: str) -> Token:
 | 
						|
        """Expect a certain token name next, failing with a syntax error otherwise.
 | 
						|
 | 
						|
        The token is *not* read.
 | 
						|
        """
 | 
						|
        if not self.check(name):
 | 
						|
            raise self.raise_syntax_error(f"Expected {expected}")
 | 
						|
        return self.read()
 | 
						|
 | 
						|
    def read(self) -> Token:
 | 
						|
        """Consume the next token and return it."""
 | 
						|
        token = self.next_token
 | 
						|
        assert token is not None
 | 
						|
 | 
						|
        self.position += len(token.text)
 | 
						|
        self.next_token = None
 | 
						|
 | 
						|
        return token
 | 
						|
 | 
						|
    def raise_syntax_error(
 | 
						|
        self,
 | 
						|
        message: str,
 | 
						|
        *,
 | 
						|
        span_start: int | None = None,
 | 
						|
        span_end: int | None = None,
 | 
						|
    ) -> NoReturn:
 | 
						|
        """Raise ParserSyntaxError at the given position."""
 | 
						|
        span = (
 | 
						|
            self.position if span_start is None else span_start,
 | 
						|
            self.position if span_end is None else span_end,
 | 
						|
        )
 | 
						|
        raise ParserSyntaxError(
 | 
						|
            message,
 | 
						|
            source=self.source,
 | 
						|
            span=span,
 | 
						|
        )
 | 
						|
 | 
						|
    @contextlib.contextmanager
 | 
						|
    def enclosing_tokens(
 | 
						|
        self, open_token: str, close_token: str, *, around: str
 | 
						|
    ) -> Iterator[None]:
 | 
						|
        if self.check(open_token):
 | 
						|
            open_position = self.position
 | 
						|
            self.read()
 | 
						|
        else:
 | 
						|
            open_position = None
 | 
						|
 | 
						|
        yield
 | 
						|
 | 
						|
        if open_position is None:
 | 
						|
            return
 | 
						|
 | 
						|
        if not self.check(close_token):
 | 
						|
            self.raise_syntax_error(
 | 
						|
                f"Expected matching {close_token} for {open_token}, after {around}",
 | 
						|
                span_start=open_position,
 | 
						|
            )
 | 
						|
 | 
						|
        self.read()
 |