You cannot select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
	
	
		
			354 lines
		
	
	
		
			10 KiB
		
	
	
	
		
			Python
		
	
			
		
		
	
	
			354 lines
		
	
	
		
			10 KiB
		
	
	
	
		
			Python
		
	
"""Handwritten parser of dependency specifiers.
 | 
						|
 | 
						|
The docstring for each __parse_* function contains EBNF-inspired grammar representing
 | 
						|
the implementation.
 | 
						|
"""
 | 
						|
 | 
						|
from __future__ import annotations
 | 
						|
 | 
						|
import ast
 | 
						|
from typing import NamedTuple, Sequence, Tuple, Union
 | 
						|
 | 
						|
from ._tokenizer import DEFAULT_RULES, Tokenizer
 | 
						|
 | 
						|
 | 
						|
class Node:
 | 
						|
    def __init__(self, value: str) -> None:
 | 
						|
        self.value = value
 | 
						|
 | 
						|
    def __str__(self) -> str:
 | 
						|
        return self.value
 | 
						|
 | 
						|
    def __repr__(self) -> str:
 | 
						|
        return f"<{self.__class__.__name__}('{self}')>"
 | 
						|
 | 
						|
    def serialize(self) -> str:
 | 
						|
        raise NotImplementedError
 | 
						|
 | 
						|
 | 
						|
class Variable(Node):
 | 
						|
    def serialize(self) -> str:
 | 
						|
        return str(self)
 | 
						|
 | 
						|
 | 
						|
class Value(Node):
 | 
						|
    def serialize(self) -> str:
 | 
						|
        return f'"{self}"'
 | 
						|
 | 
						|
 | 
						|
class Op(Node):
 | 
						|
    def serialize(self) -> str:
 | 
						|
        return str(self)
 | 
						|
 | 
						|
 | 
						|
MarkerVar = Union[Variable, Value]
 | 
						|
MarkerItem = Tuple[MarkerVar, Op, MarkerVar]
 | 
						|
MarkerAtom = Union[MarkerItem, Sequence["MarkerAtom"]]
 | 
						|
MarkerList = Sequence[Union["MarkerList", MarkerAtom, str]]
 | 
						|
 | 
						|
 | 
						|
class ParsedRequirement(NamedTuple):
 | 
						|
    name: str
 | 
						|
    url: str
 | 
						|
    extras: list[str]
 | 
						|
    specifier: str
 | 
						|
    marker: MarkerList | None
 | 
						|
 | 
						|
 | 
						|
# --------------------------------------------------------------------------------------
 | 
						|
# Recursive descent parser for dependency specifier
 | 
						|
# --------------------------------------------------------------------------------------
 | 
						|
def parse_requirement(source: str) -> ParsedRequirement:
 | 
						|
    return _parse_requirement(Tokenizer(source, rules=DEFAULT_RULES))
 | 
						|
 | 
						|
 | 
						|
def _parse_requirement(tokenizer: Tokenizer) -> ParsedRequirement:
 | 
						|
    """
 | 
						|
    requirement = WS? IDENTIFIER WS? extras WS? requirement_details
 | 
						|
    """
 | 
						|
    tokenizer.consume("WS")
 | 
						|
 | 
						|
    name_token = tokenizer.expect(
 | 
						|
        "IDENTIFIER", expected="package name at the start of dependency specifier"
 | 
						|
    )
 | 
						|
    name = name_token.text
 | 
						|
    tokenizer.consume("WS")
 | 
						|
 | 
						|
    extras = _parse_extras(tokenizer)
 | 
						|
    tokenizer.consume("WS")
 | 
						|
 | 
						|
    url, specifier, marker = _parse_requirement_details(tokenizer)
 | 
						|
    tokenizer.expect("END", expected="end of dependency specifier")
 | 
						|
 | 
						|
    return ParsedRequirement(name, url, extras, specifier, marker)
 | 
						|
 | 
						|
 | 
						|
def _parse_requirement_details(
 | 
						|
    tokenizer: Tokenizer,
 | 
						|
) -> tuple[str, str, MarkerList | None]:
 | 
						|
    """
 | 
						|
    requirement_details = AT URL (WS requirement_marker?)?
 | 
						|
                        | specifier WS? (requirement_marker)?
 | 
						|
    """
 | 
						|
 | 
						|
    specifier = ""
 | 
						|
    url = ""
 | 
						|
    marker = None
 | 
						|
 | 
						|
    if tokenizer.check("AT"):
 | 
						|
        tokenizer.read()
 | 
						|
        tokenizer.consume("WS")
 | 
						|
 | 
						|
        url_start = tokenizer.position
 | 
						|
        url = tokenizer.expect("URL", expected="URL after @").text
 | 
						|
        if tokenizer.check("END", peek=True):
 | 
						|
            return (url, specifier, marker)
 | 
						|
 | 
						|
        tokenizer.expect("WS", expected="whitespace after URL")
 | 
						|
 | 
						|
        # The input might end after whitespace.
 | 
						|
        if tokenizer.check("END", peek=True):
 | 
						|
            return (url, specifier, marker)
 | 
						|
 | 
						|
        marker = _parse_requirement_marker(
 | 
						|
            tokenizer, span_start=url_start, after="URL and whitespace"
 | 
						|
        )
 | 
						|
    else:
 | 
						|
        specifier_start = tokenizer.position
 | 
						|
        specifier = _parse_specifier(tokenizer)
 | 
						|
        tokenizer.consume("WS")
 | 
						|
 | 
						|
        if tokenizer.check("END", peek=True):
 | 
						|
            return (url, specifier, marker)
 | 
						|
 | 
						|
        marker = _parse_requirement_marker(
 | 
						|
            tokenizer,
 | 
						|
            span_start=specifier_start,
 | 
						|
            after=(
 | 
						|
                "version specifier"
 | 
						|
                if specifier
 | 
						|
                else "name and no valid version specifier"
 | 
						|
            ),
 | 
						|
        )
 | 
						|
 | 
						|
    return (url, specifier, marker)
 | 
						|
 | 
						|
 | 
						|
def _parse_requirement_marker(
 | 
						|
    tokenizer: Tokenizer, *, span_start: int, after: str
 | 
						|
) -> MarkerList:
 | 
						|
    """
 | 
						|
    requirement_marker = SEMICOLON marker WS?
 | 
						|
    """
 | 
						|
 | 
						|
    if not tokenizer.check("SEMICOLON"):
 | 
						|
        tokenizer.raise_syntax_error(
 | 
						|
            f"Expected end or semicolon (after {after})",
 | 
						|
            span_start=span_start,
 | 
						|
        )
 | 
						|
    tokenizer.read()
 | 
						|
 | 
						|
    marker = _parse_marker(tokenizer)
 | 
						|
    tokenizer.consume("WS")
 | 
						|
 | 
						|
    return marker
 | 
						|
 | 
						|
 | 
						|
def _parse_extras(tokenizer: Tokenizer) -> list[str]:
 | 
						|
    """
 | 
						|
    extras = (LEFT_BRACKET wsp* extras_list? wsp* RIGHT_BRACKET)?
 | 
						|
    """
 | 
						|
    if not tokenizer.check("LEFT_BRACKET", peek=True):
 | 
						|
        return []
 | 
						|
 | 
						|
    with tokenizer.enclosing_tokens(
 | 
						|
        "LEFT_BRACKET",
 | 
						|
        "RIGHT_BRACKET",
 | 
						|
        around="extras",
 | 
						|
    ):
 | 
						|
        tokenizer.consume("WS")
 | 
						|
        extras = _parse_extras_list(tokenizer)
 | 
						|
        tokenizer.consume("WS")
 | 
						|
 | 
						|
    return extras
 | 
						|
 | 
						|
 | 
						|
def _parse_extras_list(tokenizer: Tokenizer) -> list[str]:
 | 
						|
    """
 | 
						|
    extras_list = identifier (wsp* ',' wsp* identifier)*
 | 
						|
    """
 | 
						|
    extras: list[str] = []
 | 
						|
 | 
						|
    if not tokenizer.check("IDENTIFIER"):
 | 
						|
        return extras
 | 
						|
 | 
						|
    extras.append(tokenizer.read().text)
 | 
						|
 | 
						|
    while True:
 | 
						|
        tokenizer.consume("WS")
 | 
						|
        if tokenizer.check("IDENTIFIER", peek=True):
 | 
						|
            tokenizer.raise_syntax_error("Expected comma between extra names")
 | 
						|
        elif not tokenizer.check("COMMA"):
 | 
						|
            break
 | 
						|
 | 
						|
        tokenizer.read()
 | 
						|
        tokenizer.consume("WS")
 | 
						|
 | 
						|
        extra_token = tokenizer.expect("IDENTIFIER", expected="extra name after comma")
 | 
						|
        extras.append(extra_token.text)
 | 
						|
 | 
						|
    return extras
 | 
						|
 | 
						|
 | 
						|
def _parse_specifier(tokenizer: Tokenizer) -> str:
 | 
						|
    """
 | 
						|
    specifier = LEFT_PARENTHESIS WS? version_many WS? RIGHT_PARENTHESIS
 | 
						|
              | WS? version_many WS?
 | 
						|
    """
 | 
						|
    with tokenizer.enclosing_tokens(
 | 
						|
        "LEFT_PARENTHESIS",
 | 
						|
        "RIGHT_PARENTHESIS",
 | 
						|
        around="version specifier",
 | 
						|
    ):
 | 
						|
        tokenizer.consume("WS")
 | 
						|
        parsed_specifiers = _parse_version_many(tokenizer)
 | 
						|
        tokenizer.consume("WS")
 | 
						|
 | 
						|
    return parsed_specifiers
 | 
						|
 | 
						|
 | 
						|
def _parse_version_many(tokenizer: Tokenizer) -> str:
 | 
						|
    """
 | 
						|
    version_many = (SPECIFIER (WS? COMMA WS? SPECIFIER)*)?
 | 
						|
    """
 | 
						|
    parsed_specifiers = ""
 | 
						|
    while tokenizer.check("SPECIFIER"):
 | 
						|
        span_start = tokenizer.position
 | 
						|
        parsed_specifiers += tokenizer.read().text
 | 
						|
        if tokenizer.check("VERSION_PREFIX_TRAIL", peek=True):
 | 
						|
            tokenizer.raise_syntax_error(
 | 
						|
                ".* suffix can only be used with `==` or `!=` operators",
 | 
						|
                span_start=span_start,
 | 
						|
                span_end=tokenizer.position + 1,
 | 
						|
            )
 | 
						|
        if tokenizer.check("VERSION_LOCAL_LABEL_TRAIL", peek=True):
 | 
						|
            tokenizer.raise_syntax_error(
 | 
						|
                "Local version label can only be used with `==` or `!=` operators",
 | 
						|
                span_start=span_start,
 | 
						|
                span_end=tokenizer.position,
 | 
						|
            )
 | 
						|
        tokenizer.consume("WS")
 | 
						|
        if not tokenizer.check("COMMA"):
 | 
						|
            break
 | 
						|
        parsed_specifiers += tokenizer.read().text
 | 
						|
        tokenizer.consume("WS")
 | 
						|
 | 
						|
    return parsed_specifiers
 | 
						|
 | 
						|
 | 
						|
# --------------------------------------------------------------------------------------
 | 
						|
# Recursive descent parser for marker expression
 | 
						|
# --------------------------------------------------------------------------------------
 | 
						|
def parse_marker(source: str) -> MarkerList:
 | 
						|
    return _parse_full_marker(Tokenizer(source, rules=DEFAULT_RULES))
 | 
						|
 | 
						|
 | 
						|
def _parse_full_marker(tokenizer: Tokenizer) -> MarkerList:
 | 
						|
    retval = _parse_marker(tokenizer)
 | 
						|
    tokenizer.expect("END", expected="end of marker expression")
 | 
						|
    return retval
 | 
						|
 | 
						|
 | 
						|
def _parse_marker(tokenizer: Tokenizer) -> MarkerList:
 | 
						|
    """
 | 
						|
    marker = marker_atom (BOOLOP marker_atom)+
 | 
						|
    """
 | 
						|
    expression = [_parse_marker_atom(tokenizer)]
 | 
						|
    while tokenizer.check("BOOLOP"):
 | 
						|
        token = tokenizer.read()
 | 
						|
        expr_right = _parse_marker_atom(tokenizer)
 | 
						|
        expression.extend((token.text, expr_right))
 | 
						|
    return expression
 | 
						|
 | 
						|
 | 
						|
def _parse_marker_atom(tokenizer: Tokenizer) -> MarkerAtom:
 | 
						|
    """
 | 
						|
    marker_atom = WS? LEFT_PARENTHESIS WS? marker WS? RIGHT_PARENTHESIS WS?
 | 
						|
                | WS? marker_item WS?
 | 
						|
    """
 | 
						|
 | 
						|
    tokenizer.consume("WS")
 | 
						|
    if tokenizer.check("LEFT_PARENTHESIS", peek=True):
 | 
						|
        with tokenizer.enclosing_tokens(
 | 
						|
            "LEFT_PARENTHESIS",
 | 
						|
            "RIGHT_PARENTHESIS",
 | 
						|
            around="marker expression",
 | 
						|
        ):
 | 
						|
            tokenizer.consume("WS")
 | 
						|
            marker: MarkerAtom = _parse_marker(tokenizer)
 | 
						|
            tokenizer.consume("WS")
 | 
						|
    else:
 | 
						|
        marker = _parse_marker_item(tokenizer)
 | 
						|
    tokenizer.consume("WS")
 | 
						|
    return marker
 | 
						|
 | 
						|
 | 
						|
def _parse_marker_item(tokenizer: Tokenizer) -> MarkerItem:
 | 
						|
    """
 | 
						|
    marker_item = WS? marker_var WS? marker_op WS? marker_var WS?
 | 
						|
    """
 | 
						|
    tokenizer.consume("WS")
 | 
						|
    marker_var_left = _parse_marker_var(tokenizer)
 | 
						|
    tokenizer.consume("WS")
 | 
						|
    marker_op = _parse_marker_op(tokenizer)
 | 
						|
    tokenizer.consume("WS")
 | 
						|
    marker_var_right = _parse_marker_var(tokenizer)
 | 
						|
    tokenizer.consume("WS")
 | 
						|
    return (marker_var_left, marker_op, marker_var_right)
 | 
						|
 | 
						|
 | 
						|
def _parse_marker_var(tokenizer: Tokenizer) -> MarkerVar:
 | 
						|
    """
 | 
						|
    marker_var = VARIABLE | QUOTED_STRING
 | 
						|
    """
 | 
						|
    if tokenizer.check("VARIABLE"):
 | 
						|
        return process_env_var(tokenizer.read().text.replace(".", "_"))
 | 
						|
    elif tokenizer.check("QUOTED_STRING"):
 | 
						|
        return process_python_str(tokenizer.read().text)
 | 
						|
    else:
 | 
						|
        tokenizer.raise_syntax_error(
 | 
						|
            message="Expected a marker variable or quoted string"
 | 
						|
        )
 | 
						|
 | 
						|
 | 
						|
def process_env_var(env_var: str) -> Variable:
 | 
						|
    if env_var in ("platform_python_implementation", "python_implementation"):
 | 
						|
        return Variable("platform_python_implementation")
 | 
						|
    else:
 | 
						|
        return Variable(env_var)
 | 
						|
 | 
						|
 | 
						|
def process_python_str(python_str: str) -> Value:
 | 
						|
    value = ast.literal_eval(python_str)
 | 
						|
    return Value(str(value))
 | 
						|
 | 
						|
 | 
						|
def _parse_marker_op(tokenizer: Tokenizer) -> Op:
 | 
						|
    """
 | 
						|
    marker_op = IN | NOT IN | OP
 | 
						|
    """
 | 
						|
    if tokenizer.check("IN"):
 | 
						|
        tokenizer.read()
 | 
						|
        return Op("in")
 | 
						|
    elif tokenizer.check("NOT"):
 | 
						|
        tokenizer.read()
 | 
						|
        tokenizer.expect("WS", expected="whitespace after 'not'")
 | 
						|
        tokenizer.expect("IN", expected="'in' after 'not'")
 | 
						|
        return Op("not in")
 | 
						|
    elif tokenizer.check("OP"):
 | 
						|
        return Op(tokenizer.read().text)
 | 
						|
    else:
 | 
						|
        return tokenizer.raise_syntax_error(
 | 
						|
            "Expected marker operator, one of <=, <, !=, ==, >=, >, ~=, ===, in, not in"
 | 
						|
        )
 |