You cannot select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
	
	
		
			486 lines
		
	
	
		
			14 KiB
		
	
	
	
		
			Python
		
	
			
		
		
	
	
			486 lines
		
	
	
		
			14 KiB
		
	
	
	
		
			Python
		
	
# common.py
 | 
						|
from .core import *
 | 
						|
from .helpers import DelimitedList, any_open_tag, any_close_tag
 | 
						|
from datetime import datetime
 | 
						|
 | 
						|
 | 
						|
# some other useful expressions - using lower-case class name since we are really using this as a namespace
 | 
						|
class pyparsing_common:
 | 
						|
    """Here are some common low-level expressions that may be useful in
 | 
						|
    jump-starting parser development:
 | 
						|
 | 
						|
    - numeric forms (:class:`integers<integer>`, :class:`reals<real>`,
 | 
						|
      :class:`scientific notation<sci_real>`)
 | 
						|
    - common :class:`programming identifiers<identifier>`
 | 
						|
    - network addresses (:class:`MAC<mac_address>`,
 | 
						|
      :class:`IPv4<ipv4_address>`, :class:`IPv6<ipv6_address>`)
 | 
						|
    - ISO8601 :class:`dates<iso8601_date>` and
 | 
						|
      :class:`datetime<iso8601_datetime>`
 | 
						|
    - :class:`UUID<uuid>`
 | 
						|
    - :class:`comma-separated list<comma_separated_list>`
 | 
						|
    - :class:`url`
 | 
						|
 | 
						|
    Parse actions:
 | 
						|
 | 
						|
    - :class:`convert_to_integer`
 | 
						|
    - :class:`convert_to_float`
 | 
						|
    - :class:`convert_to_date`
 | 
						|
    - :class:`convert_to_datetime`
 | 
						|
    - :class:`strip_html_tags`
 | 
						|
    - :class:`upcase_tokens`
 | 
						|
    - :class:`downcase_tokens`
 | 
						|
 | 
						|
    Examples:
 | 
						|
 | 
						|
    .. testcode::
 | 
						|
 | 
						|
        pyparsing_common.number.run_tests('''
 | 
						|
            # any int or real number, returned as the appropriate type
 | 
						|
            100
 | 
						|
            -100
 | 
						|
            +100
 | 
						|
            3.14159
 | 
						|
            6.02e23
 | 
						|
            1e-12
 | 
						|
            ''')
 | 
						|
 | 
						|
    .. testoutput::
 | 
						|
        :options: +NORMALIZE_WHITESPACE
 | 
						|
 | 
						|
 | 
						|
        # any int or real number, returned as the appropriate type
 | 
						|
        100
 | 
						|
        [100]
 | 
						|
 | 
						|
        -100
 | 
						|
        [-100]
 | 
						|
 | 
						|
        +100
 | 
						|
        [100]
 | 
						|
 | 
						|
        3.14159
 | 
						|
        [3.14159]
 | 
						|
 | 
						|
        6.02e23
 | 
						|
        [6.02e+23]
 | 
						|
 | 
						|
        1e-12
 | 
						|
        [1e-12]
 | 
						|
 | 
						|
    .. testcode::
 | 
						|
 | 
						|
        pyparsing_common.fnumber.run_tests('''
 | 
						|
            # any int or real number, returned as float
 | 
						|
            100
 | 
						|
            -100
 | 
						|
            +100
 | 
						|
            3.14159
 | 
						|
            6.02e23
 | 
						|
            1e-12
 | 
						|
            ''')
 | 
						|
 | 
						|
    .. testoutput::
 | 
						|
        :options: +NORMALIZE_WHITESPACE
 | 
						|
 | 
						|
 | 
						|
        # any int or real number, returned as float
 | 
						|
        100
 | 
						|
        [100.0]
 | 
						|
 | 
						|
        -100
 | 
						|
        [-100.0]
 | 
						|
 | 
						|
        +100
 | 
						|
        [100.0]
 | 
						|
 | 
						|
        3.14159
 | 
						|
        [3.14159]
 | 
						|
 | 
						|
        6.02e23
 | 
						|
        [6.02e+23]
 | 
						|
 | 
						|
        1e-12
 | 
						|
        [1e-12]
 | 
						|
 | 
						|
    .. testcode::
 | 
						|
 | 
						|
        pyparsing_common.hex_integer.run_tests('''
 | 
						|
            # hex numbers
 | 
						|
            100
 | 
						|
            FF
 | 
						|
            ''')
 | 
						|
 | 
						|
    .. testoutput::
 | 
						|
        :options: +NORMALIZE_WHITESPACE
 | 
						|
 | 
						|
 | 
						|
        # hex numbers
 | 
						|
        100
 | 
						|
        [256]
 | 
						|
 | 
						|
        FF
 | 
						|
        [255]
 | 
						|
 | 
						|
    .. testcode::
 | 
						|
 | 
						|
        pyparsing_common.fraction.run_tests('''
 | 
						|
            # fractions
 | 
						|
            1/2
 | 
						|
            -3/4
 | 
						|
            ''')
 | 
						|
 | 
						|
    .. testoutput::
 | 
						|
        :options: +NORMALIZE_WHITESPACE
 | 
						|
 | 
						|
 | 
						|
        # fractions
 | 
						|
        1/2
 | 
						|
        [0.5]
 | 
						|
 | 
						|
        -3/4
 | 
						|
        [-0.75]
 | 
						|
 | 
						|
    .. testcode::
 | 
						|
 | 
						|
        pyparsing_common.mixed_integer.run_tests('''
 | 
						|
            # mixed fractions
 | 
						|
            1
 | 
						|
            1/2
 | 
						|
            -3/4
 | 
						|
            1-3/4
 | 
						|
            ''')
 | 
						|
 | 
						|
    .. testoutput::
 | 
						|
        :options: +NORMALIZE_WHITESPACE
 | 
						|
 | 
						|
 | 
						|
        # mixed fractions
 | 
						|
        1
 | 
						|
        [1]
 | 
						|
 | 
						|
        1/2
 | 
						|
        [0.5]
 | 
						|
 | 
						|
        -3/4
 | 
						|
        [-0.75]
 | 
						|
 | 
						|
        1-3/4
 | 
						|
        [1.75]
 | 
						|
    .. testcode::
 | 
						|
 | 
						|
        import uuid
 | 
						|
        pyparsing_common.uuid.set_parse_action(token_map(uuid.UUID))
 | 
						|
        pyparsing_common.uuid.run_tests('''
 | 
						|
            # uuid
 | 
						|
            12345678-1234-5678-1234-567812345678
 | 
						|
            ''')
 | 
						|
 | 
						|
    .. testoutput::
 | 
						|
        :options: +NORMALIZE_WHITESPACE
 | 
						|
 | 
						|
 | 
						|
        # uuid
 | 
						|
        12345678-1234-5678-1234-567812345678
 | 
						|
        [UUID('12345678-1234-5678-1234-567812345678')]
 | 
						|
    """
 | 
						|
 | 
						|
    convert_to_integer = token_map(int)
 | 
						|
    """
 | 
						|
    Parse action for converting parsed integers to Python int
 | 
						|
    """
 | 
						|
 | 
						|
    convert_to_float = token_map(float)
 | 
						|
    """
 | 
						|
    Parse action for converting parsed numbers to Python float
 | 
						|
    """
 | 
						|
 | 
						|
    integer = Word(nums).set_name("integer").set_parse_action(convert_to_integer)
 | 
						|
    """expression that parses an unsigned integer, returns an int"""
 | 
						|
 | 
						|
    hex_integer = (
 | 
						|
        Word(hexnums).set_name("hex integer").set_parse_action(token_map(int, 16))
 | 
						|
    )
 | 
						|
    """expression that parses a hexadecimal integer, returns an int"""
 | 
						|
 | 
						|
    signed_integer = (
 | 
						|
        Regex(r"[+-]?\d+")
 | 
						|
        .set_name("signed integer")
 | 
						|
        .set_parse_action(convert_to_integer)
 | 
						|
    )
 | 
						|
    """expression that parses an integer with optional leading sign, returns an int"""
 | 
						|
 | 
						|
    fraction = (
 | 
						|
        signed_integer().set_parse_action(convert_to_float)
 | 
						|
        + "/"
 | 
						|
        + signed_integer().set_parse_action(convert_to_float)
 | 
						|
    ).set_name("fraction")
 | 
						|
    """fractional expression of an integer divided by an integer, returns a float"""
 | 
						|
    fraction.add_parse_action(lambda tt: tt[0] / tt[-1])
 | 
						|
 | 
						|
    mixed_integer = (
 | 
						|
        fraction | signed_integer + Opt(Opt("-").suppress() + fraction)
 | 
						|
    ).set_name("fraction or mixed integer-fraction")
 | 
						|
    """mixed integer of the form 'integer - fraction', with optional leading integer, returns float"""
 | 
						|
    mixed_integer.add_parse_action(sum)
 | 
						|
 | 
						|
    real = (
 | 
						|
        Regex(r"[+-]?(?:\d+\.\d*|\.\d+)")
 | 
						|
        .set_name("real number")
 | 
						|
        .set_parse_action(convert_to_float)
 | 
						|
    )
 | 
						|
    """expression that parses a floating point number and returns a float"""
 | 
						|
 | 
						|
    sci_real = (
 | 
						|
        Regex(r"[+-]?(?:\d+(?:[eE][+-]?\d+)|(?:\d+\.\d*|\.\d+)(?:[eE][+-]?\d+)?)")
 | 
						|
        .set_name("real number with scientific notation")
 | 
						|
        .set_parse_action(convert_to_float)
 | 
						|
    )
 | 
						|
    """expression that parses a floating point number with optional
 | 
						|
    scientific notation and returns a float"""
 | 
						|
 | 
						|
    # streamlining this expression makes the docs nicer-looking
 | 
						|
    number = (sci_real | real | signed_integer).set_name("number").streamline()
 | 
						|
    """any numeric expression, returns the corresponding Python type"""
 | 
						|
 | 
						|
    fnumber = (
 | 
						|
        Regex(r"[+-]?\d+\.?\d*(?:[eE][+-]?\d+)?")
 | 
						|
        .set_name("fnumber")
 | 
						|
        .set_parse_action(convert_to_float)
 | 
						|
    )
 | 
						|
    """any int or real number, returned as float"""
 | 
						|
 | 
						|
    ieee_float = (
 | 
						|
        Regex(r"(?i:[+-]?(?:(?:\d+\.?\d*(?:e[+-]?\d+)?)|nan|inf(?:inity)?))")
 | 
						|
        .set_name("ieee_float")
 | 
						|
        .set_parse_action(convert_to_float)
 | 
						|
    )
 | 
						|
    """any floating-point literal (int, real number, infinity, or NaN), returned as float"""
 | 
						|
 | 
						|
    identifier = Word(identchars, identbodychars).set_name("identifier")
 | 
						|
    """typical code identifier (leading alpha or '_', followed by 0 or more alphas, nums, or '_')"""
 | 
						|
 | 
						|
    ipv4_address = Regex(
 | 
						|
        r"(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})(\.(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})){3}"
 | 
						|
    ).set_name("IPv4 address")
 | 
						|
    "IPv4 address (``0.0.0.0 - 255.255.255.255``)"
 | 
						|
 | 
						|
    _ipv6_part = Regex(r"[0-9a-fA-F]{1,4}").set_name("hex_integer")
 | 
						|
    _full_ipv6_address = (_ipv6_part + (":" + _ipv6_part) * 7).set_name(
 | 
						|
        "full IPv6 address"
 | 
						|
    )
 | 
						|
    _short_ipv6_address = (
 | 
						|
        Opt(_ipv6_part + (":" + _ipv6_part) * (0, 6))
 | 
						|
        + "::"
 | 
						|
        + Opt(_ipv6_part + (":" + _ipv6_part) * (0, 6))
 | 
						|
    ).set_name("short IPv6 address")
 | 
						|
    _short_ipv6_address.add_condition(
 | 
						|
        lambda t: sum(1 for tt in t if pyparsing_common._ipv6_part.matches(tt)) < 8
 | 
						|
    )
 | 
						|
    _mixed_ipv6_address = ("::ffff:" + ipv4_address).set_name("mixed IPv6 address")
 | 
						|
    ipv6_address = Combine(
 | 
						|
        (_full_ipv6_address | _mixed_ipv6_address | _short_ipv6_address).set_name(
 | 
						|
            "IPv6 address"
 | 
						|
        )
 | 
						|
    ).set_name("IPv6 address")
 | 
						|
    "IPv6 address (long, short, or mixed form)"
 | 
						|
 | 
						|
    mac_address = Regex(
 | 
						|
        r"[0-9a-fA-F]{2}([:.-])[0-9a-fA-F]{2}(?:\1[0-9a-fA-F]{2}){4}"
 | 
						|
    ).set_name("MAC address")
 | 
						|
    "MAC address xx:xx:xx:xx:xx (may also have '-' or '.' delimiters)"
 | 
						|
 | 
						|
    @staticmethod
 | 
						|
    def convert_to_date(fmt: str = "%Y-%m-%d"):
 | 
						|
        """
 | 
						|
        Helper to create a parse action for converting parsed date string to Python datetime.date
 | 
						|
 | 
						|
        Params -
 | 
						|
        - fmt - format to be passed to datetime.strptime (default= ``"%Y-%m-%d"``)
 | 
						|
 | 
						|
        Example:
 | 
						|
 | 
						|
        .. testcode::
 | 
						|
 | 
						|
            date_expr = pyparsing_common.iso8601_date.copy()
 | 
						|
            date_expr.set_parse_action(pyparsing_common.convert_to_date())
 | 
						|
            print(date_expr.parse_string("1999-12-31"))
 | 
						|
 | 
						|
        prints:
 | 
						|
 | 
						|
        .. testoutput::
 | 
						|
 | 
						|
            [datetime.date(1999, 12, 31)]
 | 
						|
        """
 | 
						|
 | 
						|
        def cvt_fn(ss, ll, tt):
 | 
						|
            try:
 | 
						|
                return datetime.strptime(tt[0], fmt).date()
 | 
						|
            except ValueError as ve:
 | 
						|
                raise ParseException(ss, ll, str(ve))
 | 
						|
 | 
						|
        return cvt_fn
 | 
						|
 | 
						|
    @staticmethod
 | 
						|
    def convert_to_datetime(fmt: str = "%Y-%m-%dT%H:%M:%S.%f"):
 | 
						|
        """Helper to create a parse action for converting parsed
 | 
						|
        datetime string to Python datetime.datetime
 | 
						|
 | 
						|
        Params -
 | 
						|
        - fmt - format to be passed to datetime.strptime (default= ``"%Y-%m-%dT%H:%M:%S.%f"``)
 | 
						|
 | 
						|
        Example:
 | 
						|
 | 
						|
        .. testcode::
 | 
						|
 | 
						|
            dt_expr = pyparsing_common.iso8601_datetime.copy()
 | 
						|
            dt_expr.set_parse_action(pyparsing_common.convert_to_datetime())
 | 
						|
            print(dt_expr.parse_string("1999-12-31T23:59:59.999"))
 | 
						|
 | 
						|
        prints:
 | 
						|
 | 
						|
        .. testoutput::
 | 
						|
 | 
						|
            [datetime.datetime(1999, 12, 31, 23, 59, 59, 999000)]
 | 
						|
        """
 | 
						|
 | 
						|
        def cvt_fn(s, l, t):
 | 
						|
            try:
 | 
						|
                return datetime.strptime(t[0], fmt)
 | 
						|
            except ValueError as ve:
 | 
						|
                raise ParseException(s, l, str(ve))
 | 
						|
 | 
						|
        return cvt_fn
 | 
						|
 | 
						|
    iso8601_date = Regex(
 | 
						|
        r"(?P<year>\d{4})(?:-(?P<month>\d\d)(?:-(?P<day>\d\d))?)?"
 | 
						|
    ).set_name("ISO8601 date")
 | 
						|
    "ISO8601 date (``yyyy-mm-dd``)"
 | 
						|
 | 
						|
    iso8601_datetime = Regex(
 | 
						|
        r"(?P<year>\d{4})-(?P<month>\d\d)-(?P<day>\d\d)[T ](?P<hour>\d\d):(?P<minute>\d\d)(:(?P<second>\d\d(\.\d*)?)?)?(?P<tz>Z|[+-]\d\d:?\d\d)?"
 | 
						|
    ).set_name("ISO8601 datetime")
 | 
						|
    "ISO8601 datetime (``yyyy-mm-ddThh:mm:ss.s(Z|+-00:00)``) - trailing seconds, milliseconds, and timezone optional; accepts separating ``'T'`` or ``' '``"
 | 
						|
 | 
						|
    uuid = Regex(r"[0-9a-fA-F]{8}(-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}").set_name("UUID")
 | 
						|
    "UUID (``xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx``)"
 | 
						|
 | 
						|
    _html_stripper = any_open_tag.suppress() | any_close_tag.suppress()
 | 
						|
 | 
						|
    @staticmethod
 | 
						|
    def strip_html_tags(s: str, l: int, tokens: ParseResults):
 | 
						|
        """Parse action to remove HTML tags from web page HTML source
 | 
						|
 | 
						|
        Example:
 | 
						|
 | 
						|
        .. testcode::
 | 
						|
 | 
						|
            # strip HTML links from normal text
 | 
						|
            text = '<td>More info at the <a href="https://github.com/pyparsing/pyparsing/wiki">pyparsing</a> wiki page</td>'
 | 
						|
            td, td_end = make_html_tags("TD")
 | 
						|
            table_text = td + SkipTo(td_end).set_parse_action(
 | 
						|
                pyparsing_common.strip_html_tags)("body") + td_end
 | 
						|
            print(table_text.parse_string(text).body)
 | 
						|
 | 
						|
        Prints:
 | 
						|
 | 
						|
        .. testoutput::
 | 
						|
 | 
						|
            More info at the pyparsing wiki page
 | 
						|
        """
 | 
						|
        return pyparsing_common._html_stripper.transform_string(tokens[0])
 | 
						|
 | 
						|
    _commasepitem = (
 | 
						|
        Combine(
 | 
						|
            OneOrMore(
 | 
						|
                ~Literal(",")
 | 
						|
                + ~LineEnd()
 | 
						|
                + Word(printables, exclude_chars=",")
 | 
						|
                + Opt(White(" \t") + ~FollowedBy(LineEnd() | ","))
 | 
						|
            )
 | 
						|
        )
 | 
						|
        .streamline()
 | 
						|
        .set_name("commaItem")
 | 
						|
    )
 | 
						|
    comma_separated_list = DelimitedList(
 | 
						|
        Opt(quoted_string.copy() | _commasepitem, default="")
 | 
						|
    ).set_name("comma separated list")
 | 
						|
    """Predefined expression of 1 or more printable words or quoted strings, separated by commas."""
 | 
						|
 | 
						|
    upcase_tokens = staticmethod(token_map(lambda t: t.upper()))
 | 
						|
    """Parse action to convert tokens to upper case."""
 | 
						|
 | 
						|
    downcase_tokens = staticmethod(token_map(lambda t: t.lower()))
 | 
						|
    """Parse action to convert tokens to lower case."""
 | 
						|
 | 
						|
    # fmt: off
 | 
						|
    url = Regex(
 | 
						|
        # https://mathiasbynens.be/demo/url-regex
 | 
						|
        # https://gist.github.com/dperini/729294
 | 
						|
        r"(?P<url>" +
 | 
						|
        # protocol identifier (optional)
 | 
						|
        # short syntax // still required
 | 
						|
        r"(?:(?:(?P<scheme>https?|ftp):)?\/\/)" +
 | 
						|
        # user:pass BasicAuth (optional)
 | 
						|
        r"(?:(?P<auth>\S+(?::\S*)?)@)?" +
 | 
						|
        r"(?P<host>" +
 | 
						|
        # IP address exclusion
 | 
						|
        # private & local networks
 | 
						|
        r"(?!(?:10|127)(?:\.\d{1,3}){3})" +
 | 
						|
        r"(?!(?:169\.254|192\.168)(?:\.\d{1,3}){2})" +
 | 
						|
        r"(?!172\.(?:1[6-9]|2\d|3[0-1])(?:\.\d{1,3}){2})" +
 | 
						|
        # IP address dotted notation octets
 | 
						|
        # excludes loopback network 0.0.0.0
 | 
						|
        # excludes reserved space >= 224.0.0.0
 | 
						|
        # excludes network & broadcast addresses
 | 
						|
        # (first & last IP address of each class)
 | 
						|
        r"(?:[1-9]\d?|1\d\d|2[01]\d|22[0-3])" +
 | 
						|
        r"(?:\.(?:1?\d{1,2}|2[0-4]\d|25[0-5])){2}" +
 | 
						|
        r"(?:\.(?:[1-9]\d?|1\d\d|2[0-4]\d|25[0-4]))" +
 | 
						|
        r"|" +
 | 
						|
        # host & domain names, may end with dot
 | 
						|
        # can be replaced by a shortest alternative
 | 
						|
        # (?![-_])(?:[-\w\u00a1-\uffff]{0,63}[^-_]\.)+
 | 
						|
        r"(?:" +
 | 
						|
        r"(?:" +
 | 
						|
        r"[a-z0-9\u00a1-\uffff]" +
 | 
						|
        r"[a-z0-9\u00a1-\uffff_-]{0,62}" +
 | 
						|
        r")?" +
 | 
						|
        r"[a-z0-9\u00a1-\uffff]\." +
 | 
						|
        r")+" +
 | 
						|
        # TLD identifier name, may end with dot
 | 
						|
        r"(?:[a-z\u00a1-\uffff]{2,}\.?)" +
 | 
						|
        r")" +
 | 
						|
        # port number (optional)
 | 
						|
        r"(:(?P<port>\d{2,5}))?" +
 | 
						|
        # resource path (optional)
 | 
						|
        r"(?P<path>\/[^?# ]*)?" +
 | 
						|
        # query string (optional)
 | 
						|
        r"(\?(?P<query>[^#]*))?" +
 | 
						|
        # fragment (optional)
 | 
						|
        r"(#(?P<fragment>\S*))?" +
 | 
						|
        r")"
 | 
						|
    ).set_name("url")
 | 
						|
    """
 | 
						|
    URL (http/https/ftp scheme)
 | 
						|
    
 | 
						|
    .. versionchanged:: 3.1.0
 | 
						|
       ``url`` named group added
 | 
						|
    """
 | 
						|
    # fmt: on
 | 
						|
 | 
						|
    # pre-PEP8 compatibility names
 | 
						|
    # fmt: off
 | 
						|
    convertToInteger = staticmethod(replaced_by_pep8("convertToInteger", convert_to_integer))
 | 
						|
    convertToFloat = staticmethod(replaced_by_pep8("convertToFloat", convert_to_float))
 | 
						|
    convertToDate = staticmethod(replaced_by_pep8("convertToDate", convert_to_date))
 | 
						|
    convertToDatetime = staticmethod(replaced_by_pep8("convertToDatetime", convert_to_datetime))
 | 
						|
    stripHTMLTags = staticmethod(replaced_by_pep8("stripHTMLTags", strip_html_tags))
 | 
						|
    upcaseTokens = staticmethod(replaced_by_pep8("upcaseTokens", upcase_tokens))
 | 
						|
    downcaseTokens = staticmethod(replaced_by_pep8("downcaseTokens", downcase_tokens))
 | 
						|
    # fmt: on
 | 
						|
 | 
						|
 | 
						|
_builtin_exprs = [
 | 
						|
    v for v in vars(pyparsing_common).values() if isinstance(v, ParserElement)
 | 
						|
]
 |