You cannot select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
	
	
		
			264 lines
		
	
	
		
			7.8 KiB
		
	
	
	
		
			Python
		
	
			
		
		
	
	
			264 lines
		
	
	
		
			7.8 KiB
		
	
	
	
		
			Python
		
	
# actions.py
 | 
						|
from __future__ import annotations
 | 
						|
 | 
						|
from typing import Union, Callable, Any
 | 
						|
 | 
						|
from .exceptions import ParseException
 | 
						|
from .util import col, replaced_by_pep8
 | 
						|
from .results import ParseResults
 | 
						|
 | 
						|
 | 
						|
ParseAction = Union[
 | 
						|
    Callable[[], Any],
 | 
						|
    Callable[[ParseResults], Any],
 | 
						|
    Callable[[int, ParseResults], Any],
 | 
						|
    Callable[[str, int, ParseResults], Any],
 | 
						|
]
 | 
						|
 | 
						|
 | 
						|
class OnlyOnce:
 | 
						|
    """
 | 
						|
    Wrapper for parse actions, to ensure they are only called once.
 | 
						|
    Note: parse action signature must include all 3 arguments.
 | 
						|
    """
 | 
						|
 | 
						|
    def __init__(self, method_call: Callable[[str, int, ParseResults], Any]) -> None:
 | 
						|
        from .core import _trim_arity
 | 
						|
 | 
						|
        self.callable = _trim_arity(method_call)
 | 
						|
        self.called = False
 | 
						|
 | 
						|
    def __call__(self, s: str, l: int, t: ParseResults) -> ParseResults:
 | 
						|
        if not self.called:
 | 
						|
            results = self.callable(s, l, t)
 | 
						|
            self.called = True
 | 
						|
            return results
 | 
						|
        raise ParseException(s, l, "OnlyOnce obj called multiple times w/out reset")
 | 
						|
 | 
						|
    def reset(self):
 | 
						|
        """
 | 
						|
        Allow the associated parse action to be called once more.
 | 
						|
        """
 | 
						|
 | 
						|
        self.called = False
 | 
						|
 | 
						|
 | 
						|
def match_only_at_col(n: int) -> ParseAction:
 | 
						|
    """
 | 
						|
    Helper method for defining parse actions that require matching at
 | 
						|
    a specific column in the input text.
 | 
						|
    """
 | 
						|
 | 
						|
    def verify_col(strg: str, locn: int, toks: ParseResults) -> None:
 | 
						|
        if col(locn, strg) != n:
 | 
						|
            raise ParseException(strg, locn, f"matched token not at column {n}")
 | 
						|
 | 
						|
    return verify_col
 | 
						|
 | 
						|
 | 
						|
def replace_with(repl_str: Any) -> ParseAction:
 | 
						|
    """
 | 
						|
    Helper method for common parse actions that simply return
 | 
						|
    a literal value.  Especially useful when used with
 | 
						|
    :meth:`~ParserElement.transform_string`.
 | 
						|
 | 
						|
    Example:
 | 
						|
 | 
						|
    .. doctest::
 | 
						|
 | 
						|
       >>> num = Word(nums).set_parse_action(lambda toks: int(toks[0]))
 | 
						|
       >>> na = one_of("N/A NA").set_parse_action(replace_with(math.nan))
 | 
						|
       >>> term = na | num
 | 
						|
 | 
						|
       >>> term[1, ...].parse_string("324 234 N/A 234")
 | 
						|
       ParseResults([324, 234, nan, 234], {})
 | 
						|
    """
 | 
						|
    return lambda s, l, t: [repl_str]
 | 
						|
 | 
						|
 | 
						|
def remove_quotes(s: str, l: int, t: ParseResults) -> Any:
 | 
						|
    r"""
 | 
						|
    Helper parse action for removing quotation marks from parsed
 | 
						|
    quoted strings, that use a single character for quoting. For parsing
 | 
						|
    strings that may have multiple characters, use the :class:`QuotedString`
 | 
						|
    class.
 | 
						|
 | 
						|
    Example:
 | 
						|
 | 
						|
    .. doctest::
 | 
						|
 | 
						|
       >>> # by default, quotation marks are included in parsed results
 | 
						|
       >>> quoted_string.parse_string("'Now is the Winter of our Discontent'")
 | 
						|
       ParseResults(["'Now is the Winter of our Discontent'"], {})
 | 
						|
 | 
						|
       >>> # use remove_quotes to strip quotation marks from parsed results
 | 
						|
       >>> dequoted = quoted_string().set_parse_action(remove_quotes)
 | 
						|
       >>> dequoted.parse_string("'Now is the Winter of our Discontent'")
 | 
						|
       ParseResults(['Now is the Winter of our Discontent'], {})
 | 
						|
    """
 | 
						|
    return t[0][1:-1]
 | 
						|
 | 
						|
 | 
						|
def with_attribute(*args: tuple[str, str], **attr_dict) -> ParseAction:
 | 
						|
    """
 | 
						|
    Helper to create a validating parse action to be used with start
 | 
						|
    tags created with :class:`make_xml_tags` or
 | 
						|
    :class:`make_html_tags`. Use ``with_attribute`` to qualify
 | 
						|
    a starting tag with a required attribute value, to avoid false
 | 
						|
    matches on common tags such as ``<TD>`` or ``<DIV>``.
 | 
						|
 | 
						|
    Call ``with_attribute`` with a series of attribute names and
 | 
						|
    values. Specify the list of filter attributes names and values as:
 | 
						|
 | 
						|
    - keyword arguments, as in ``(align="right")``, or
 | 
						|
    - as an explicit dict with ``**`` operator, when an attribute
 | 
						|
      name is also a Python reserved word, as in ``**{"class":"Customer", "align":"right"}``
 | 
						|
    - a list of name-value tuples, as in ``(("ns1:class", "Customer"), ("ns2:align", "right"))``
 | 
						|
 | 
						|
    For attribute names with a namespace prefix, you must use the second
 | 
						|
    form.  Attribute names are matched insensitive to upper/lower case.
 | 
						|
 | 
						|
    If just testing for ``class`` (with or without a namespace), use
 | 
						|
    :class:`with_class`.
 | 
						|
 | 
						|
    To verify that the attribute exists, but without specifying a value,
 | 
						|
    pass ``with_attribute.ANY_VALUE`` as the value.
 | 
						|
 | 
						|
    The next two examples use the following input data and tag parsers:
 | 
						|
 | 
						|
    .. testcode::
 | 
						|
 | 
						|
       html = '''
 | 
						|
           <div>
 | 
						|
           Some text
 | 
						|
           <div type="grid">1 4 0 1 0</div>
 | 
						|
           <div type="graph">1,3 2,3 1,1</div>
 | 
						|
           <div>this has no type</div>
 | 
						|
           </div>
 | 
						|
       '''
 | 
						|
       div,div_end = make_html_tags("div")
 | 
						|
 | 
						|
    Only match div tag having a type attribute with value "grid":
 | 
						|
 | 
						|
    .. testcode::
 | 
						|
 | 
						|
       div_grid = div().set_parse_action(with_attribute(type="grid"))
 | 
						|
       grid_expr = div_grid + SkipTo(div | div_end)("body")
 | 
						|
       for grid_header in grid_expr.search_string(html):
 | 
						|
           print(grid_header.body)
 | 
						|
 | 
						|
    prints:
 | 
						|
 | 
						|
    .. testoutput::
 | 
						|
 | 
						|
       1 4 0 1 0
 | 
						|
 | 
						|
    Construct a match with any div tag having a type attribute,
 | 
						|
    regardless of the value:
 | 
						|
 | 
						|
    .. testcode::
 | 
						|
 | 
						|
       div_any_type = div().set_parse_action(
 | 
						|
           with_attribute(type=with_attribute.ANY_VALUE)
 | 
						|
       )
 | 
						|
       div_expr = div_any_type + SkipTo(div | div_end)("body")
 | 
						|
       for div_header in div_expr.search_string(html):
 | 
						|
           print(div_header.body)
 | 
						|
 | 
						|
    prints:
 | 
						|
 | 
						|
    .. testoutput::
 | 
						|
 | 
						|
       1 4 0 1 0
 | 
						|
       1,3 2,3 1,1
 | 
						|
    """
 | 
						|
    attrs_list: list[tuple[str, str]] = []
 | 
						|
    if args:
 | 
						|
        attrs_list.extend(args)
 | 
						|
    else:
 | 
						|
        attrs_list.extend(attr_dict.items())
 | 
						|
 | 
						|
    def pa(s: str, l: int, tokens: ParseResults) -> None:
 | 
						|
        for attrName, attrValue in attrs_list:
 | 
						|
            if attrName not in tokens:
 | 
						|
                raise ParseException(s, l, "no matching attribute " + attrName)
 | 
						|
            if attrValue != with_attribute.ANY_VALUE and tokens[attrName] != attrValue:  # type: ignore [attr-defined]
 | 
						|
                raise ParseException(
 | 
						|
                    s,
 | 
						|
                    l,
 | 
						|
                    f"attribute {attrName!r} has value {tokens[attrName]!r}, must be {attrValue!r}",
 | 
						|
                )
 | 
						|
 | 
						|
    return pa
 | 
						|
 | 
						|
 | 
						|
with_attribute.ANY_VALUE = object()  # type: ignore [attr-defined]
 | 
						|
 | 
						|
 | 
						|
def with_class(classname: str, namespace: str = "") -> ParseAction:
 | 
						|
    """
 | 
						|
    Simplified version of :meth:`with_attribute` when
 | 
						|
    matching on a div class - made difficult because ``class`` is
 | 
						|
    a reserved word in Python.
 | 
						|
 | 
						|
    Using similar input data to the :meth:`with_attribute` examples:
 | 
						|
 | 
						|
    .. testcode::
 | 
						|
 | 
						|
       html = '''
 | 
						|
           <div>
 | 
						|
           Some text
 | 
						|
           <div class="grid">1 4 0 1 0</div>
 | 
						|
           <div class="graph">1,3 2,3 1,1</div>
 | 
						|
           <div>this <div> has no class</div>
 | 
						|
           </div>
 | 
						|
       '''
 | 
						|
       div,div_end = make_html_tags("div")
 | 
						|
 | 
						|
    Only match div tag having the "grid" class:
 | 
						|
 | 
						|
    .. testcode::
 | 
						|
 | 
						|
       div_grid = div().set_parse_action(with_class("grid"))
 | 
						|
       grid_expr = div_grid + SkipTo(div | div_end)("body")
 | 
						|
       for grid_header in grid_expr.search_string(html):
 | 
						|
           print(grid_header.body)
 | 
						|
 | 
						|
    prints:
 | 
						|
 | 
						|
    .. testoutput::
 | 
						|
 | 
						|
       1 4 0 1 0
 | 
						|
 | 
						|
    Construct a match with any div tag having a class attribute,
 | 
						|
    regardless of the value:
 | 
						|
 | 
						|
    .. testcode::
 | 
						|
 | 
						|
       div_any_type = div().set_parse_action(
 | 
						|
           with_class(withAttribute.ANY_VALUE)
 | 
						|
       )
 | 
						|
       div_expr = div_any_type + SkipTo(div | div_end)("body")
 | 
						|
       for div_header in div_expr.search_string(html):
 | 
						|
           print(div_header.body)
 | 
						|
 | 
						|
    prints:
 | 
						|
 | 
						|
    .. testoutput::
 | 
						|
 | 
						|
       1 4 0 1 0
 | 
						|
       1,3 2,3 1,1
 | 
						|
    """
 | 
						|
    classattr = f"{namespace}:class" if namespace else "class"
 | 
						|
    return with_attribute(**{classattr: classname})
 | 
						|
 | 
						|
 | 
						|
# Compatibility synonyms
 | 
						|
# fmt: off
 | 
						|
replaceWith = replaced_by_pep8("replaceWith", replace_with)
 | 
						|
removeQuotes = replaced_by_pep8("removeQuotes", remove_quotes)
 | 
						|
withAttribute = replaced_by_pep8("withAttribute", with_attribute)
 | 
						|
withClass = replaced_by_pep8("withClass", with_class)
 | 
						|
matchOnlyAtCol = replaced_by_pep8("matchOnlyAtCol", match_only_at_col)
 | 
						|
# fmt: on
 |