You cannot select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
	
	
		
			161 lines
		
	
	
		
			4.9 KiB
		
	
	
	
		
			Python
		
	
			
		
		
	
	
			161 lines
		
	
	
		
			4.9 KiB
		
	
	
	
		
			Python
		
	
"""
 | 
						|
    pygments.formatters.other
 | 
						|
    ~~~~~~~~~~~~~~~~~~~~~~~~~
 | 
						|
 | 
						|
    Other formatters: NullFormatter, RawTokenFormatter.
 | 
						|
 | 
						|
    :copyright: Copyright 2006-2025 by the Pygments team, see AUTHORS.
 | 
						|
    :license: BSD, see LICENSE for details.
 | 
						|
"""
 | 
						|
 | 
						|
from pygments.formatter import Formatter
 | 
						|
from pygments.util import get_choice_opt
 | 
						|
from pygments.token import Token
 | 
						|
from pygments.console import colorize
 | 
						|
 | 
						|
__all__ = ['NullFormatter', 'RawTokenFormatter', 'TestcaseFormatter']
 | 
						|
 | 
						|
 | 
						|
class NullFormatter(Formatter):
 | 
						|
    """
 | 
						|
    Output the text unchanged without any formatting.
 | 
						|
    """
 | 
						|
    name = 'Text only'
 | 
						|
    aliases = ['text', 'null']
 | 
						|
    filenames = ['*.txt']
 | 
						|
 | 
						|
    def format(self, tokensource, outfile):
 | 
						|
        enc = self.encoding
 | 
						|
        for ttype, value in tokensource:
 | 
						|
            if enc:
 | 
						|
                outfile.write(value.encode(enc))
 | 
						|
            else:
 | 
						|
                outfile.write(value)
 | 
						|
 | 
						|
 | 
						|
class RawTokenFormatter(Formatter):
 | 
						|
    r"""
 | 
						|
    Format tokens as a raw representation for storing token streams.
 | 
						|
 | 
						|
    The format is ``tokentype<TAB>repr(tokenstring)\n``. The output can later
 | 
						|
    be converted to a token stream with the `RawTokenLexer`, described in the
 | 
						|
    :doc:`lexer list <lexers>`.
 | 
						|
 | 
						|
    Only two options are accepted:
 | 
						|
 | 
						|
    `compress`
 | 
						|
        If set to ``'gz'`` or ``'bz2'``, compress the output with the given
 | 
						|
        compression algorithm after encoding (default: ``''``).
 | 
						|
    `error_color`
 | 
						|
        If set to a color name, highlight error tokens using that color.  If
 | 
						|
        set but with no value, defaults to ``'red'``.
 | 
						|
 | 
						|
        .. versionadded:: 0.11
 | 
						|
 | 
						|
    """
 | 
						|
    name = 'Raw tokens'
 | 
						|
    aliases = ['raw', 'tokens']
 | 
						|
    filenames = ['*.raw']
 | 
						|
 | 
						|
    unicodeoutput = False
 | 
						|
 | 
						|
    def __init__(self, **options):
 | 
						|
        Formatter.__init__(self, **options)
 | 
						|
        # We ignore self.encoding if it is set, since it gets set for lexer
 | 
						|
        # and formatter if given with -Oencoding on the command line.
 | 
						|
        # The RawTokenFormatter outputs only ASCII. Override here.
 | 
						|
        self.encoding = 'ascii'  # let pygments.format() do the right thing
 | 
						|
        self.compress = get_choice_opt(options, 'compress',
 | 
						|
                                       ['', 'none', 'gz', 'bz2'], '')
 | 
						|
        self.error_color = options.get('error_color', None)
 | 
						|
        if self.error_color is True:
 | 
						|
            self.error_color = 'red'
 | 
						|
        if self.error_color is not None:
 | 
						|
            try:
 | 
						|
                colorize(self.error_color, '')
 | 
						|
            except KeyError:
 | 
						|
                raise ValueError(f"Invalid color {self.error_color!r} specified")
 | 
						|
 | 
						|
    def format(self, tokensource, outfile):
 | 
						|
        try:
 | 
						|
            outfile.write(b'')
 | 
						|
        except TypeError:
 | 
						|
            raise TypeError('The raw tokens formatter needs a binary '
 | 
						|
                            'output file')
 | 
						|
        if self.compress == 'gz':
 | 
						|
            import gzip
 | 
						|
            outfile = gzip.GzipFile('', 'wb', 9, outfile)
 | 
						|
 | 
						|
            write = outfile.write
 | 
						|
            flush = outfile.close
 | 
						|
        elif self.compress == 'bz2':
 | 
						|
            import bz2
 | 
						|
            compressor = bz2.BZ2Compressor(9)
 | 
						|
 | 
						|
            def write(text):
 | 
						|
                outfile.write(compressor.compress(text))
 | 
						|
 | 
						|
            def flush():
 | 
						|
                outfile.write(compressor.flush())
 | 
						|
                outfile.flush()
 | 
						|
        else:
 | 
						|
            write = outfile.write
 | 
						|
            flush = outfile.flush
 | 
						|
 | 
						|
        if self.error_color:
 | 
						|
            for ttype, value in tokensource:
 | 
						|
                line = b"%r\t%r\n" % (ttype, value)
 | 
						|
                if ttype is Token.Error:
 | 
						|
                    write(colorize(self.error_color, line))
 | 
						|
                else:
 | 
						|
                    write(line)
 | 
						|
        else:
 | 
						|
            for ttype, value in tokensource:
 | 
						|
                write(b"%r\t%r\n" % (ttype, value))
 | 
						|
        flush()
 | 
						|
 | 
						|
 | 
						|
TESTCASE_BEFORE = '''\
 | 
						|
    def testNeedsName(lexer):
 | 
						|
        fragment = %r
 | 
						|
        tokens = [
 | 
						|
'''
 | 
						|
TESTCASE_AFTER = '''\
 | 
						|
        ]
 | 
						|
        assert list(lexer.get_tokens(fragment)) == tokens
 | 
						|
'''
 | 
						|
 | 
						|
 | 
						|
class TestcaseFormatter(Formatter):
 | 
						|
    """
 | 
						|
    Format tokens as appropriate for a new testcase.
 | 
						|
 | 
						|
    .. versionadded:: 2.0
 | 
						|
    """
 | 
						|
    name = 'Testcase'
 | 
						|
    aliases = ['testcase']
 | 
						|
 | 
						|
    def __init__(self, **options):
 | 
						|
        Formatter.__init__(self, **options)
 | 
						|
        if self.encoding is not None and self.encoding != 'utf-8':
 | 
						|
            raise ValueError("Only None and utf-8 are allowed encodings.")
 | 
						|
 | 
						|
    def format(self, tokensource, outfile):
 | 
						|
        indentation = ' ' * 12
 | 
						|
        rawbuf = []
 | 
						|
        outbuf = []
 | 
						|
        for ttype, value in tokensource:
 | 
						|
            rawbuf.append(value)
 | 
						|
            outbuf.append(f'{indentation}({ttype}, {value!r}),\n')
 | 
						|
 | 
						|
        before = TESTCASE_BEFORE % (''.join(rawbuf),)
 | 
						|
        during = ''.join(outbuf)
 | 
						|
        after = TESTCASE_AFTER
 | 
						|
        if self.encoding is None:
 | 
						|
            outfile.write(before + during + after)
 | 
						|
        else:
 | 
						|
            outfile.write(before.encode('utf-8'))
 | 
						|
            outfile.write(during.encode('utf-8'))
 | 
						|
            outfile.write(after.encode('utf-8'))
 | 
						|
        outfile.flush()
 |