You cannot select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
	
	
		
			105 lines
		
	
	
		
			3.0 KiB
		
	
	
	
		
			Python
		
	
			
		
		
	
	
			105 lines
		
	
	
		
			3.0 KiB
		
	
	
	
		
			Python
		
	
"""
 | 
						|
    pygments.scanner
 | 
						|
    ~~~~~~~~~~~~~~~~
 | 
						|
 | 
						|
    This library implements a regex based scanner. Some languages
 | 
						|
    like Pascal are easy to parse but have some keywords that
 | 
						|
    depend on the context. Because of this it's impossible to lex
 | 
						|
    that just by using a regular expression lexer like the
 | 
						|
    `RegexLexer`.
 | 
						|
 | 
						|
    Have a look at the `DelphiLexer` to get an idea of how to use
 | 
						|
    this scanner.
 | 
						|
 | 
						|
    :copyright: Copyright 2006-2025 by the Pygments team, see AUTHORS.
 | 
						|
    :license: BSD, see LICENSE for details.
 | 
						|
"""
 | 
						|
import re
 | 
						|
 | 
						|
 | 
						|
class EndOfText(RuntimeError):
 | 
						|
    """
 | 
						|
    Raise if end of text is reached and the user
 | 
						|
    tried to call a match function.
 | 
						|
    """
 | 
						|
 | 
						|
 | 
						|
class Scanner:
 | 
						|
    """
 | 
						|
    Simple scanner
 | 
						|
 | 
						|
    All method patterns are regular expression strings (not
 | 
						|
    compiled expressions!)
 | 
						|
    """
 | 
						|
 | 
						|
    def __init__(self, text, flags=0):
 | 
						|
        """
 | 
						|
        :param text:    The text which should be scanned
 | 
						|
        :param flags:   default regular expression flags
 | 
						|
        """
 | 
						|
        self.data = text
 | 
						|
        self.data_length = len(text)
 | 
						|
        self.start_pos = 0
 | 
						|
        self.pos = 0
 | 
						|
        self.flags = flags
 | 
						|
        self.last = None
 | 
						|
        self.match = None
 | 
						|
        self._re_cache = {}
 | 
						|
 | 
						|
    def eos(self):
 | 
						|
        """`True` if the scanner reached the end of text."""
 | 
						|
        return self.pos >= self.data_length
 | 
						|
    eos = property(eos, eos.__doc__)
 | 
						|
 | 
						|
    def check(self, pattern):
 | 
						|
        """
 | 
						|
        Apply `pattern` on the current position and return
 | 
						|
        the match object. (Doesn't touch pos). Use this for
 | 
						|
        lookahead.
 | 
						|
        """
 | 
						|
        if self.eos:
 | 
						|
            raise EndOfText()
 | 
						|
        if pattern not in self._re_cache:
 | 
						|
            self._re_cache[pattern] = re.compile(pattern, self.flags)
 | 
						|
        return self._re_cache[pattern].match(self.data, self.pos)
 | 
						|
 | 
						|
    def test(self, pattern):
 | 
						|
        """Apply a pattern on the current position and check
 | 
						|
        if it patches. Doesn't touch pos.
 | 
						|
        """
 | 
						|
        return self.check(pattern) is not None
 | 
						|
 | 
						|
    def scan(self, pattern):
 | 
						|
        """
 | 
						|
        Scan the text for the given pattern and update pos/match
 | 
						|
        and related fields. The return value is a boolean that
 | 
						|
        indicates if the pattern matched. The matched value is
 | 
						|
        stored on the instance as ``match``, the last value is
 | 
						|
        stored as ``last``. ``start_pos`` is the position of the
 | 
						|
        pointer before the pattern was matched, ``pos`` is the
 | 
						|
        end position.
 | 
						|
        """
 | 
						|
        if self.eos:
 | 
						|
            raise EndOfText()
 | 
						|
        if pattern not in self._re_cache:
 | 
						|
            self._re_cache[pattern] = re.compile(pattern, self.flags)
 | 
						|
        self.last = self.match
 | 
						|
        m = self._re_cache[pattern].match(self.data, self.pos)
 | 
						|
        if m is None:
 | 
						|
            return False
 | 
						|
        self.start_pos = m.start()
 | 
						|
        self.pos = m.end()
 | 
						|
        self.match = m.group()
 | 
						|
        return True
 | 
						|
 | 
						|
    def get_char(self):
 | 
						|
        """Scan exactly one char."""
 | 
						|
        self.scan('.')
 | 
						|
 | 
						|
    def __repr__(self):
 | 
						|
        return '<%s %d/%d>' % (
 | 
						|
            self.__class__.__name__,
 | 
						|
            self.pos,
 | 
						|
            self.data_length
 | 
						|
        )
 |