You cannot select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
	
	
		
			437 lines
		
	
	
		
			15 KiB
		
	
	
	
		
			Python
		
	
			
		
		
	
	
			437 lines
		
	
	
		
			15 KiB
		
	
	
	
		
			Python
		
	
"""
 | 
						|
    pygments.lexers.textfmts
 | 
						|
    ~~~~~~~~~~~~~~~~~~~~~~~~
 | 
						|
 | 
						|
    Lexers for various text formats.
 | 
						|
 | 
						|
    :copyright: Copyright 2006-2025 by the Pygments team, see AUTHORS.
 | 
						|
    :license: BSD, see LICENSE for details.
 | 
						|
"""
 | 
						|
 | 
						|
import re
 | 
						|
 | 
						|
from pygments.lexers import guess_lexer, get_lexer_by_name
 | 
						|
from pygments.lexer import RegexLexer, bygroups, default, include
 | 
						|
from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
 | 
						|
    Number, Generic, Literal, Punctuation
 | 
						|
from pygments.util import ClassNotFound
 | 
						|
 | 
						|
__all__ = ['IrcLogsLexer', 'TodotxtLexer', 'HttpLexer', 'GettextLexer',
 | 
						|
           'NotmuchLexer', 'KernelLogLexer']
 | 
						|
 | 
						|
 | 
						|
class IrcLogsLexer(RegexLexer):
 | 
						|
    """
 | 
						|
    Lexer for IRC logs in *irssi*, *xchat* or *weechat* style.
 | 
						|
    """
 | 
						|
 | 
						|
    name = 'IRC logs'
 | 
						|
    aliases = ['irc']
 | 
						|
    filenames = ['*.weechatlog']
 | 
						|
    mimetypes = ['text/x-irclog']
 | 
						|
    url = 'https://en.wikipedia.org/wiki/Internet_Relay_Chat'
 | 
						|
    version_added = ''
 | 
						|
 | 
						|
    flags = re.VERBOSE | re.MULTILINE
 | 
						|
    timestamp = r"""
 | 
						|
        (
 | 
						|
          # irssi / xchat and others
 | 
						|
          (?: \[|\()?                  # Opening bracket or paren for the timestamp
 | 
						|
            (?:                        # Timestamp
 | 
						|
                (?: (?:\d{1,4} [-/])*  # Date as - or /-separated groups of digits
 | 
						|
                    (?:\d{1,4})
 | 
						|
                 [T ])?                # Date/time separator: T or space
 | 
						|
                (?: \d?\d [:.])*       # Time as :/.-separated groups of 1 or 2 digits
 | 
						|
                    (?: \d?\d)
 | 
						|
            )
 | 
						|
          (?: \]|\))?\s+               # Closing bracket or paren for the timestamp
 | 
						|
        |
 | 
						|
          # weechat
 | 
						|
          \d{4}\s\w{3}\s\d{2}\s        # Date
 | 
						|
          \d{2}:\d{2}:\d{2}\s+         # Time + Whitespace
 | 
						|
        |
 | 
						|
          # xchat
 | 
						|
          \w{3}\s\d{2}\s               # Date
 | 
						|
          \d{2}:\d{2}:\d{2}\s+         # Time + Whitespace
 | 
						|
        )?
 | 
						|
    """
 | 
						|
    tokens = {
 | 
						|
        'root': [
 | 
						|
            # log start/end
 | 
						|
            (r'^\*\*\*\*(.*)\*\*\*\*$', Comment),
 | 
						|
            # hack
 | 
						|
            ("^" + timestamp + r'(\s*<[^>]*>\s*)$', bygroups(Comment.Preproc, Name.Tag)),
 | 
						|
            # normal msgs
 | 
						|
            ("^" + timestamp + r"""
 | 
						|
                (\s*<.*?>\s*)          # Nick """,
 | 
						|
             bygroups(Comment.Preproc, Name.Tag), 'msg'),
 | 
						|
            # /me msgs
 | 
						|
            ("^" + timestamp + r"""
 | 
						|
                (\s*[*]\s+)            # Star
 | 
						|
                (\S+\s+.*?\n)          # Nick + rest of message """,
 | 
						|
             bygroups(Comment.Preproc, Keyword, Generic.Inserted)),
 | 
						|
            # join/part msgs
 | 
						|
            ("^" + timestamp + r"""
 | 
						|
                (\s*(?:\*{3}|<?-[!@=P]?->?)\s*)  # Star(s) or symbols
 | 
						|
                (\S+\s+)                     # Nick + Space
 | 
						|
                (.*?\n)                         # Rest of message """,
 | 
						|
             bygroups(Comment.Preproc, Keyword, String, Comment)),
 | 
						|
            (r"^.*?\n", Text),
 | 
						|
        ],
 | 
						|
        'msg': [
 | 
						|
            (r"\S+:(?!//)", Name.Attribute),  # Prefix
 | 
						|
            (r".*\n", Text, '#pop'),
 | 
						|
        ],
 | 
						|
    }
 | 
						|
 | 
						|
 | 
						|
class GettextLexer(RegexLexer):
 | 
						|
    """
 | 
						|
    Lexer for Gettext catalog files.
 | 
						|
    """
 | 
						|
    name = 'Gettext Catalog'
 | 
						|
    aliases = ['pot', 'po']
 | 
						|
    filenames = ['*.pot', '*.po']
 | 
						|
    mimetypes = ['application/x-gettext', 'text/x-gettext', 'text/gettext']
 | 
						|
    url = 'https://www.gnu.org/software/gettext'
 | 
						|
    version_added = '0.9'
 | 
						|
 | 
						|
    tokens = {
 | 
						|
        'root': [
 | 
						|
            (r'^#,\s.*?$', Keyword.Type),
 | 
						|
            (r'^#:\s.*?$', Keyword.Declaration),
 | 
						|
            # (r'^#$', Comment),
 | 
						|
            (r'^(#|#\.\s|#\|\s|#~\s|#\s).*$', Comment.Single),
 | 
						|
            (r'^(")([A-Za-z-]+:)(.*")$',
 | 
						|
             bygroups(String, Name.Property, String)),
 | 
						|
            (r'^".*"$', String),
 | 
						|
            (r'^(msgid|msgid_plural|msgstr|msgctxt)(\s+)(".*")$',
 | 
						|
             bygroups(Name.Variable, Text, String)),
 | 
						|
            (r'^(msgstr\[)(\d)(\])(\s+)(".*")$',
 | 
						|
             bygroups(Name.Variable, Number.Integer, Name.Variable, Text, String)),
 | 
						|
        ]
 | 
						|
    }
 | 
						|
 | 
						|
 | 
						|
class HttpLexer(RegexLexer):
 | 
						|
    """
 | 
						|
    Lexer for HTTP sessions.
 | 
						|
    """
 | 
						|
 | 
						|
    name = 'HTTP'
 | 
						|
    aliases = ['http']
 | 
						|
    url = 'https://httpwg.org/specs'
 | 
						|
    version_added = '1.5'
 | 
						|
 | 
						|
    flags = re.DOTALL
 | 
						|
 | 
						|
    def get_tokens_unprocessed(self, text, stack=('root',)):
 | 
						|
        """Reset the content-type state."""
 | 
						|
        self.content_type = None
 | 
						|
        return RegexLexer.get_tokens_unprocessed(self, text, stack)
 | 
						|
 | 
						|
    def header_callback(self, match):
 | 
						|
        if match.group(1).lower() == 'content-type':
 | 
						|
            content_type = match.group(5).strip()
 | 
						|
            if ';' in content_type:
 | 
						|
                content_type = content_type[:content_type.find(';')].strip()
 | 
						|
            self.content_type = content_type
 | 
						|
        yield match.start(1), Name.Attribute, match.group(1)
 | 
						|
        yield match.start(2), Text, match.group(2)
 | 
						|
        yield match.start(3), Operator, match.group(3)
 | 
						|
        yield match.start(4), Text, match.group(4)
 | 
						|
        yield match.start(5), Literal, match.group(5)
 | 
						|
        yield match.start(6), Text, match.group(6)
 | 
						|
 | 
						|
    def continuous_header_callback(self, match):
 | 
						|
        yield match.start(1), Text, match.group(1)
 | 
						|
        yield match.start(2), Literal, match.group(2)
 | 
						|
        yield match.start(3), Text, match.group(3)
 | 
						|
 | 
						|
    def content_callback(self, match):
 | 
						|
        content_type = getattr(self, 'content_type', None)
 | 
						|
        content = match.group()
 | 
						|
        offset = match.start()
 | 
						|
        if content_type:
 | 
						|
            from pygments.lexers import get_lexer_for_mimetype
 | 
						|
            possible_lexer_mimetypes = [content_type]
 | 
						|
            if '+' in content_type:
 | 
						|
                # application/calendar+xml can be treated as application/xml
 | 
						|
                # if there's not a better match.
 | 
						|
                general_type = re.sub(r'^(.*)/.*\+(.*)$', r'\1/\2',
 | 
						|
                                      content_type)
 | 
						|
                possible_lexer_mimetypes.append(general_type)
 | 
						|
 | 
						|
            for i in possible_lexer_mimetypes:
 | 
						|
                try:
 | 
						|
                    lexer = get_lexer_for_mimetype(i)
 | 
						|
                except ClassNotFound:
 | 
						|
                    pass
 | 
						|
                else:
 | 
						|
                    for idx, token, value in lexer.get_tokens_unprocessed(content):
 | 
						|
                        yield offset + idx, token, value
 | 
						|
                    return
 | 
						|
        yield offset, Text, content
 | 
						|
 | 
						|
    tokens = {
 | 
						|
        'root': [
 | 
						|
            (r'([a-zA-Z][-_a-zA-Z]+)( +)([^ ]+)( +)'
 | 
						|
             r'(HTTP)(/)(1\.[01]|2(?:\.0)?|3)(\r?\n|\Z)',
 | 
						|
             bygroups(Name.Function, Text, Name.Namespace, Text,
 | 
						|
                      Keyword.Reserved, Operator, Number, Text),
 | 
						|
             'headers'),
 | 
						|
            (r'(HTTP)(/)(1\.[01]|2(?:\.0)?|3)( +)(\d{3})(?:( +)([^\r\n]*))?(\r?\n|\Z)',
 | 
						|
             bygroups(Keyword.Reserved, Operator, Number, Text, Number, Text,
 | 
						|
                      Name.Exception, Text),
 | 
						|
             'headers'),
 | 
						|
        ],
 | 
						|
        'headers': [
 | 
						|
            (r'([^\s:]+)( *)(:)( *)([^\r\n]*)(\r?\n|\Z)', header_callback),
 | 
						|
            (r'([\t ]+)([^\r\n]+)(\r?\n|\Z)', continuous_header_callback),
 | 
						|
            (r'\r?\n', Text, 'content')
 | 
						|
        ],
 | 
						|
        'content': [
 | 
						|
            (r'.+', content_callback)
 | 
						|
        ]
 | 
						|
    }
 | 
						|
 | 
						|
    def analyse_text(text):
 | 
						|
        return any (
 | 
						|
            re.search(pattern, text) is not None
 | 
						|
            for pattern in (
 | 
						|
                r'^([a-zA-Z][-_a-zA-Z]+)( +)([^ ]+)( +)(HTTP)(/)(1\.[01]|2(?:\.0)?|3)(\r?\n|\Z)',
 | 
						|
                r'^(HTTP)(/)(1\.[01]|2(?:\.0)?|3)( +)(\d{3})(?:( +)([^\r\n]*))?(\r?\n|\Z)',
 | 
						|
            )
 | 
						|
        )
 | 
						|
 | 
						|
 | 
						|
class TodotxtLexer(RegexLexer):
 | 
						|
    """
 | 
						|
    Lexer for Todo.txt todo list format.
 | 
						|
    """
 | 
						|
 | 
						|
    name = 'Todotxt'
 | 
						|
    url = 'http://todotxt.com/'
 | 
						|
    aliases = ['todotxt']
 | 
						|
    version_added = '2.0'
 | 
						|
    # *.todotxt is not a standard extension for Todo.txt files; including it
 | 
						|
    # makes testing easier, and also makes autodetecting file type easier.
 | 
						|
    filenames = ['todo.txt', '*.todotxt']
 | 
						|
    mimetypes = ['text/x-todo']
 | 
						|
 | 
						|
    # Aliases mapping standard token types of Todo.txt format concepts
 | 
						|
    CompleteTaskText = Operator  # Chosen to de-emphasize complete tasks
 | 
						|
    IncompleteTaskText = Text    # Incomplete tasks should look like plain text
 | 
						|
 | 
						|
    # Priority should have most emphasis to indicate importance of tasks
 | 
						|
    Priority = Generic.Heading
 | 
						|
    # Dates should have next most emphasis because time is important
 | 
						|
    Date = Generic.Subheading
 | 
						|
 | 
						|
    # Project and context should have equal weight, and be in different colors
 | 
						|
    Project = Generic.Error
 | 
						|
    Context = String
 | 
						|
 | 
						|
    # If tag functionality is added, it should have the same weight as Project
 | 
						|
    # and Context, and a different color. Generic.Traceback would work well.
 | 
						|
 | 
						|
    # Regex patterns for building up rules; dates, priorities, projects, and
 | 
						|
    # contexts are all atomic
 | 
						|
    # TODO: Make date regex more ISO 8601 compliant
 | 
						|
    date_regex = r'\d{4,}-\d{2}-\d{2}'
 | 
						|
    priority_regex = r'\([A-Z]\)'
 | 
						|
    project_regex = r'\+\S+'
 | 
						|
    context_regex = r'@\S+'
 | 
						|
 | 
						|
    # Compound regex expressions
 | 
						|
    complete_one_date_regex = r'(x )(' + date_regex + r')'
 | 
						|
    complete_two_date_regex = (complete_one_date_regex + r'( )(' +
 | 
						|
                               date_regex + r')')
 | 
						|
    priority_date_regex = r'(' + priority_regex + r')( )(' + date_regex + r')'
 | 
						|
 | 
						|
    tokens = {
 | 
						|
        # Should parse starting at beginning of line; each line is a task
 | 
						|
        'root': [
 | 
						|
            # Complete task entry points: two total:
 | 
						|
            # 1. Complete task with two dates
 | 
						|
            (complete_two_date_regex, bygroups(CompleteTaskText, Date,
 | 
						|
                                               CompleteTaskText, Date),
 | 
						|
             'complete'),
 | 
						|
            # 2. Complete task with one date
 | 
						|
            (complete_one_date_regex, bygroups(CompleteTaskText, Date),
 | 
						|
             'complete'),
 | 
						|
 | 
						|
            # Incomplete task entry points: six total:
 | 
						|
            # 1. Priority plus date
 | 
						|
            (priority_date_regex, bygroups(Priority, IncompleteTaskText, Date),
 | 
						|
             'incomplete'),
 | 
						|
            # 2. Priority only
 | 
						|
            (priority_regex, Priority, 'incomplete'),
 | 
						|
            # 3. Leading date
 | 
						|
            (date_regex, Date, 'incomplete'),
 | 
						|
            # 4. Leading context
 | 
						|
            (context_regex, Context, 'incomplete'),
 | 
						|
            # 5. Leading project
 | 
						|
            (project_regex, Project, 'incomplete'),
 | 
						|
            # 6. Non-whitespace catch-all
 | 
						|
            (r'\S+', IncompleteTaskText, 'incomplete'),
 | 
						|
        ],
 | 
						|
 | 
						|
        # Parse a complete task
 | 
						|
        'complete': [
 | 
						|
            # Newline indicates end of task, should return to root
 | 
						|
            (r'\s*\n', CompleteTaskText, '#pop'),
 | 
						|
            # Tokenize contexts and projects
 | 
						|
            (context_regex, Context),
 | 
						|
            (project_regex, Project),
 | 
						|
            # Tokenize non-whitespace text
 | 
						|
            (r'\S+', CompleteTaskText),
 | 
						|
            # Tokenize whitespace not containing a newline
 | 
						|
            (r'\s+', CompleteTaskText),
 | 
						|
        ],
 | 
						|
 | 
						|
        # Parse an incomplete task
 | 
						|
        'incomplete': [
 | 
						|
            # Newline indicates end of task, should return to root
 | 
						|
            (r'\s*\n', IncompleteTaskText, '#pop'),
 | 
						|
            # Tokenize contexts and projects
 | 
						|
            (context_regex, Context),
 | 
						|
            (project_regex, Project),
 | 
						|
            # Tokenize non-whitespace text
 | 
						|
            (r'\S+', IncompleteTaskText),
 | 
						|
            # Tokenize whitespace not containing a newline
 | 
						|
            (r'\s+', IncompleteTaskText),
 | 
						|
        ],
 | 
						|
    }
 | 
						|
 | 
						|
 | 
						|
class NotmuchLexer(RegexLexer):
 | 
						|
    """
 | 
						|
    For Notmuch email text format.
 | 
						|
 | 
						|
    Additional options accepted:
 | 
						|
 | 
						|
    `body_lexer`
 | 
						|
        If given, highlight the contents of the message body with the specified
 | 
						|
        lexer, else guess it according to the body content (default: ``None``).
 | 
						|
    """
 | 
						|
 | 
						|
    name = 'Notmuch'
 | 
						|
    url = 'https://notmuchmail.org/'
 | 
						|
    aliases = ['notmuch']
 | 
						|
    version_added = '2.5'
 | 
						|
 | 
						|
    def _highlight_code(self, match):
 | 
						|
        code = match.group(1)
 | 
						|
 | 
						|
        try:
 | 
						|
            if self.body_lexer:
 | 
						|
                lexer = get_lexer_by_name(self.body_lexer)
 | 
						|
            else:
 | 
						|
                lexer = guess_lexer(code.strip())
 | 
						|
        except ClassNotFound:
 | 
						|
            lexer = get_lexer_by_name('text')
 | 
						|
 | 
						|
        yield from lexer.get_tokens_unprocessed(code)
 | 
						|
 | 
						|
    tokens = {
 | 
						|
        'root': [
 | 
						|
            (r'\fmessage\{\s*', Keyword, ('message', 'message-attr')),
 | 
						|
        ],
 | 
						|
        'message-attr': [
 | 
						|
            (r'(\s*id:\s*)(\S+)', bygroups(Name.Attribute, String)),
 | 
						|
            (r'(\s*(?:depth|match|excluded):\s*)(\d+)',
 | 
						|
             bygroups(Name.Attribute, Number.Integer)),
 | 
						|
            (r'(\s*filename:\s*)(.+\n)',
 | 
						|
             bygroups(Name.Attribute, String)),
 | 
						|
            default('#pop'),
 | 
						|
        ],
 | 
						|
        'message': [
 | 
						|
            (r'\fmessage\}\n', Keyword, '#pop'),
 | 
						|
            (r'\fheader\{\n', Keyword, 'header'),
 | 
						|
            (r'\fbody\{\n', Keyword, 'body'),
 | 
						|
        ],
 | 
						|
        'header': [
 | 
						|
            (r'\fheader\}\n', Keyword, '#pop'),
 | 
						|
            (r'((?:Subject|From|To|Cc|Date):\s*)(.*\n)',
 | 
						|
             bygroups(Name.Attribute, String)),
 | 
						|
            (r'(.*)(\s*\(.*\))(\s*\(.*\)\n)',
 | 
						|
             bygroups(Generic.Strong, Literal, Name.Tag)),
 | 
						|
        ],
 | 
						|
        'body': [
 | 
						|
            (r'\fpart\{\n', Keyword, 'part'),
 | 
						|
            (r'\f(part|attachment)\{\s*', Keyword, ('part', 'part-attr')),
 | 
						|
            (r'\fbody\}\n', Keyword, '#pop'),
 | 
						|
        ],
 | 
						|
        'part-attr': [
 | 
						|
            (r'(ID:\s*)(\d+)', bygroups(Name.Attribute, Number.Integer)),
 | 
						|
            (r'(,\s*)((?:Filename|Content-id):\s*)([^,]+)',
 | 
						|
             bygroups(Punctuation, Name.Attribute, String)),
 | 
						|
            (r'(,\s*)(Content-type:\s*)(.+\n)',
 | 
						|
             bygroups(Punctuation, Name.Attribute, String)),
 | 
						|
            default('#pop'),
 | 
						|
        ],
 | 
						|
        'part': [
 | 
						|
            (r'\f(?:part|attachment)\}\n', Keyword, '#pop'),
 | 
						|
            (r'\f(?:part|attachment)\{\s*', Keyword, ('#push', 'part-attr')),
 | 
						|
            (r'^Non-text part: .*\n', Comment),
 | 
						|
            (r'(?s)(.*?(?=\f(?:part|attachment)\}\n))', _highlight_code),
 | 
						|
        ],
 | 
						|
    }
 | 
						|
 | 
						|
    def analyse_text(text):
 | 
						|
        return 1.0 if text.startswith('\fmessage{') else 0.0
 | 
						|
 | 
						|
    def __init__(self, **options):
 | 
						|
        self.body_lexer = options.get('body_lexer', None)
 | 
						|
        RegexLexer.__init__(self, **options)
 | 
						|
 | 
						|
 | 
						|
class KernelLogLexer(RegexLexer):
 | 
						|
    """
 | 
						|
    For Linux Kernel log ("dmesg") output.
 | 
						|
    """
 | 
						|
    name = 'Kernel log'
 | 
						|
    aliases = ['kmsg', 'dmesg']
 | 
						|
    filenames = ['*.kmsg', '*.dmesg']
 | 
						|
    url = 'https://fr.wikipedia.org/wiki/Dmesg'
 | 
						|
    version_added = '2.6'
 | 
						|
 | 
						|
    tokens = {
 | 
						|
        'root': [
 | 
						|
            (r'^[^:]+:debug : (?=\[)', Text, 'debug'),
 | 
						|
            (r'^[^:]+:info  : (?=\[)', Text, 'info'),
 | 
						|
            (r'^[^:]+:warn  : (?=\[)', Text, 'warn'),
 | 
						|
            (r'^[^:]+:notice: (?=\[)', Text, 'warn'),
 | 
						|
            (r'^[^:]+:err   : (?=\[)', Text, 'error'),
 | 
						|
            (r'^[^:]+:crit  : (?=\[)', Text, 'error'),
 | 
						|
            (r'^(?=\[)', Text, 'unknown'),
 | 
						|
        ],
 | 
						|
        'unknown': [
 | 
						|
            (r'^(?=.+(warning|notice|audit|deprecated))', Text, 'warn'),
 | 
						|
            (r'^(?=.+(error|critical|fail|Bug))', Text, 'error'),
 | 
						|
            default('info'),
 | 
						|
        ],
 | 
						|
        'base': [
 | 
						|
            (r'\[[0-9. ]+\] ', Number),
 | 
						|
            (r'(?<=\] ).+?:', Keyword),
 | 
						|
            (r'\n', Text, '#pop'),
 | 
						|
        ],
 | 
						|
        'debug': [
 | 
						|
            include('base'),
 | 
						|
            (r'.+\n', Comment, '#pop')
 | 
						|
        ],
 | 
						|
        'info': [
 | 
						|
            include('base'),
 | 
						|
            (r'.+\n', Text, '#pop')
 | 
						|
        ],
 | 
						|
        'warn': [
 | 
						|
            include('base'),
 | 
						|
            (r'.+\n', Generic.Strong, '#pop')
 | 
						|
        ],
 | 
						|
        'error': [
 | 
						|
            include('base'),
 | 
						|
            (r'.+\n', Generic.Error, '#pop')
 | 
						|
        ]
 | 
						|
    }
 |