You cannot select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
	
	
		
			371 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			Python
		
	
			
		
		
	
	
			371 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			Python
		
	
import io as StringIO
 | 
						|
import re
 | 
						|
import string
 | 
						|
from typing import Dict, Iterable, List, Match, Optional, TextIO, Tuple
 | 
						|
 | 
						|
from .metrics_core import Metric
 | 
						|
from .samples import Sample
 | 
						|
from .validation import (
 | 
						|
    _is_valid_legacy_metric_name, _validate_labelname, _validate_metric_name,
 | 
						|
)
 | 
						|
 | 
						|
 | 
						|
def text_string_to_metric_families(text: str) -> Iterable[Metric]:
 | 
						|
    """Parse Prometheus text format from a unicode string.
 | 
						|
 | 
						|
    See text_fd_to_metric_families.
 | 
						|
    """
 | 
						|
    yield from text_fd_to_metric_families(StringIO.StringIO(text))
 | 
						|
 | 
						|
 | 
						|
ESCAPE_SEQUENCES = {
 | 
						|
    '\\\\': '\\',
 | 
						|
    '\\n': '\n',
 | 
						|
    '\\"': '"',
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
def replace_escape_sequence(match: Match[str]) -> str:
 | 
						|
    return ESCAPE_SEQUENCES[match.group(0)]
 | 
						|
 | 
						|
 | 
						|
HELP_ESCAPING_RE = re.compile(r'\\[\\n]')
 | 
						|
ESCAPING_RE = re.compile(r'\\[\\n"]')
 | 
						|
 | 
						|
 | 
						|
def _replace_help_escaping(s: str) -> str:
 | 
						|
    return HELP_ESCAPING_RE.sub(replace_escape_sequence, s)
 | 
						|
 | 
						|
 | 
						|
def _replace_escaping(s: str) -> str:
 | 
						|
    return ESCAPING_RE.sub(replace_escape_sequence, s)
 | 
						|
 | 
						|
 | 
						|
def _is_character_escaped(s: str, charpos: int) -> bool:
 | 
						|
    num_bslashes = 0
 | 
						|
    while (charpos > num_bslashes
 | 
						|
           and s[charpos - 1 - num_bslashes] == '\\'):
 | 
						|
        num_bslashes += 1
 | 
						|
    return num_bslashes % 2 == 1
 | 
						|
 | 
						|
 | 
						|
def parse_labels(labels_string: str, openmetrics: bool = False) -> Dict[str, str]:
 | 
						|
    labels: Dict[str, str] = {}
 | 
						|
 | 
						|
    # Copy original labels
 | 
						|
    sub_labels = labels_string.strip()
 | 
						|
    if openmetrics and sub_labels and sub_labels[0] == ',':
 | 
						|
        raise ValueError("leading comma: " + labels_string)
 | 
						|
    try:
 | 
						|
        # Process one label at a time
 | 
						|
        while sub_labels:
 | 
						|
            # The label name is before the equal, or if there's no equal, that's the
 | 
						|
            # metric name.
 | 
						|
            
 | 
						|
            name_term, value_term, sub_labels = _next_term(sub_labels, openmetrics)
 | 
						|
            if not value_term:
 | 
						|
                if openmetrics:
 | 
						|
                    raise ValueError("empty term in line: " + labels_string)
 | 
						|
                continue
 | 
						|
            
 | 
						|
            label_name, quoted_name = _unquote_unescape(name_term)
 | 
						|
                
 | 
						|
            if not quoted_name and not _is_valid_legacy_metric_name(label_name):
 | 
						|
                raise ValueError("unquoted UTF-8 metric name")
 | 
						|
                
 | 
						|
            # Check for missing quotes 
 | 
						|
            if not value_term or value_term[0] != '"':
 | 
						|
                raise ValueError
 | 
						|
 | 
						|
            # The first quote is guaranteed to be after the equal.
 | 
						|
            # Make sure that the next unescaped quote is the last character.
 | 
						|
            i = 1
 | 
						|
            while i < len(value_term):
 | 
						|
                i = value_term.index('"', i)
 | 
						|
                if not _is_character_escaped(value_term[:i], i):
 | 
						|
                    break
 | 
						|
                i += 1
 | 
						|
            # The label value is between the first and last quote
 | 
						|
            quote_end = i + 1
 | 
						|
            if quote_end != len(value_term):
 | 
						|
                raise ValueError("unexpected text after quote: " + labels_string)
 | 
						|
 | 
						|
            label_value, _ = _unquote_unescape(value_term)
 | 
						|
            if label_name == '__name__':
 | 
						|
                _validate_metric_name(label_name)
 | 
						|
            else:
 | 
						|
                _validate_labelname(label_name)
 | 
						|
            if label_name in labels:
 | 
						|
                raise ValueError("invalid line, duplicate label name: " + labels_string)
 | 
						|
            labels[label_name] = label_value
 | 
						|
        return labels
 | 
						|
    except ValueError:
 | 
						|
        raise ValueError("Invalid labels: " + labels_string)
 | 
						|
    
 | 
						|
 | 
						|
def _next_term(text: str, openmetrics: bool) -> Tuple[str, str, str]:
 | 
						|
    """Extract the next comma-separated label term from the text. The results
 | 
						|
    are stripped terms for the label name, label value, and then the remainder
 | 
						|
    of the string including the final , or }.
 | 
						|
    
 | 
						|
    Raises ValueError if the term is empty and we're in openmetrics mode.
 | 
						|
    """
 | 
						|
    
 | 
						|
    # There may be a leading comma, which is fine here.
 | 
						|
    if text[0] == ',':
 | 
						|
        text = text[1:]
 | 
						|
        if not text:
 | 
						|
            return "", "", ""
 | 
						|
        if text[0] == ',':
 | 
						|
            raise ValueError("multiple commas")
 | 
						|
 | 
						|
    splitpos = _next_unquoted_char(text, '=,}')
 | 
						|
    if splitpos >= 0 and text[splitpos] == "=":
 | 
						|
        labelname = text[:splitpos]
 | 
						|
        text = text[splitpos + 1:]
 | 
						|
        splitpos = _next_unquoted_char(text, ',}')
 | 
						|
    else:
 | 
						|
        labelname = "__name__"
 | 
						|
 | 
						|
    if splitpos == -1:
 | 
						|
        splitpos = len(text)
 | 
						|
    term = text[:splitpos]
 | 
						|
    if not term and openmetrics:
 | 
						|
        raise ValueError("empty term:", term)
 | 
						|
    
 | 
						|
    rest = text[splitpos:]
 | 
						|
    return labelname, term.strip(), rest.strip()
 | 
						|
 | 
						|
 | 
						|
def _next_unquoted_char(text: str, chs: Optional[str], startidx: int = 0) -> int:
 | 
						|
    """Return position of next unquoted character in tuple, or -1 if not found.
 | 
						|
    
 | 
						|
    It is always assumed that the first character being checked is not already
 | 
						|
    inside quotes.
 | 
						|
    """
 | 
						|
    in_quotes = False
 | 
						|
    if chs is None:
 | 
						|
        chs = string.whitespace
 | 
						|
 | 
						|
    for i, c in enumerate(text[startidx:]):
 | 
						|
        if c == '"' and not _is_character_escaped(text, startidx + i):
 | 
						|
            in_quotes = not in_quotes
 | 
						|
        if not in_quotes:
 | 
						|
            if c in chs:
 | 
						|
                return startidx + i
 | 
						|
    return -1
 | 
						|
 | 
						|
 | 
						|
def _last_unquoted_char(text: str, chs: Optional[str]) -> int:
 | 
						|
    """Return position of last unquoted character in list, or -1 if not found."""
 | 
						|
    i = len(text) - 1
 | 
						|
    in_quotes = False
 | 
						|
    if chs is None:
 | 
						|
        chs = string.whitespace
 | 
						|
    while i > 0:
 | 
						|
        if text[i] == '"' and not _is_character_escaped(text, i):
 | 
						|
            in_quotes = not in_quotes
 | 
						|
            
 | 
						|
        if not in_quotes:
 | 
						|
            if text[i] in chs:
 | 
						|
                return i
 | 
						|
        i -= 1
 | 
						|
    return -1
 | 
						|
 | 
						|
 | 
						|
def _split_quoted(text, separator, maxsplit=0):
 | 
						|
    """Splits on split_ch similarly to strings.split, skipping separators if
 | 
						|
    they are inside quotes.
 | 
						|
    """
 | 
						|
 | 
						|
    tokens = ['']
 | 
						|
    x = 0
 | 
						|
    while x < len(text):
 | 
						|
        split_pos = _next_unquoted_char(text, separator, x)
 | 
						|
        if split_pos == -1:
 | 
						|
            tokens[-1] = text[x:]
 | 
						|
            x = len(text)
 | 
						|
            continue
 | 
						|
        # If the first character is the separator keep going. This happens when
 | 
						|
        # there are double whitespace characters separating symbols.
 | 
						|
        if split_pos == x:
 | 
						|
            x += 1
 | 
						|
            continue
 | 
						|
 | 
						|
        if maxsplit > 0 and len(tokens) > maxsplit:
 | 
						|
            tokens[-1] = text[x:]
 | 
						|
            break
 | 
						|
        tokens[-1] = text[x:split_pos]
 | 
						|
        x = split_pos + 1
 | 
						|
        tokens.append('')
 | 
						|
    return tokens
 | 
						|
 | 
						|
 | 
						|
def _unquote_unescape(text):
 | 
						|
    """Returns the string, and true if it was quoted."""
 | 
						|
    if not text:
 | 
						|
        return text, False
 | 
						|
    quoted = False
 | 
						|
    text = text.strip()
 | 
						|
    if text[0] == '"':
 | 
						|
        if len(text) == 1 or text[-1] != '"':
 | 
						|
            raise ValueError("missing close quote")
 | 
						|
        text = text[1:-1]
 | 
						|
        quoted = True
 | 
						|
    if "\\" in text:
 | 
						|
        text = _replace_escaping(text)
 | 
						|
    return text, quoted
 | 
						|
 | 
						|
 | 
						|
# If we have multiple values only consider the first
 | 
						|
def _parse_value_and_timestamp(s: str) -> Tuple[float, Optional[float]]:
 | 
						|
    s = s.lstrip()
 | 
						|
    separator = " "
 | 
						|
    if separator not in s:
 | 
						|
        separator = "\t"
 | 
						|
    values = [value.strip() for value in s.split(separator) if value.strip()]
 | 
						|
    if not values:
 | 
						|
        return float(s), None
 | 
						|
    value = _parse_value(values[0])
 | 
						|
    timestamp = (_parse_value(values[-1]) / 1000) if len(values) > 1 else None
 | 
						|
    return value, timestamp
 | 
						|
 | 
						|
 | 
						|
def _parse_value(value):
 | 
						|
    value = ''.join(value)
 | 
						|
    if value != value.strip() or '_' in value:
 | 
						|
        raise ValueError(f"Invalid value: {value!r}")
 | 
						|
    try:
 | 
						|
        return int(value)
 | 
						|
    except ValueError:
 | 
						|
        return float(value)
 | 
						|
    
 | 
						|
 | 
						|
def _parse_sample(text):
 | 
						|
    separator = " # "
 | 
						|
    # Detect the labels in the text
 | 
						|
    label_start = _next_unquoted_char(text, '{')
 | 
						|
    if label_start == -1 or separator in text[:label_start]:
 | 
						|
        # We don't have labels, but there could be an exemplar.
 | 
						|
        name_end = _next_unquoted_char(text, ' \t')
 | 
						|
        name = text[:name_end].strip()
 | 
						|
        if not _is_valid_legacy_metric_name(name):
 | 
						|
            raise ValueError("invalid metric name:" + text)
 | 
						|
        # Parse the remaining text after the name
 | 
						|
        remaining_text = text[name_end + 1:]
 | 
						|
        value, timestamp = _parse_value_and_timestamp(remaining_text)
 | 
						|
        return Sample(name, {}, value, timestamp)
 | 
						|
    name = text[:label_start].strip()
 | 
						|
    label_end = _next_unquoted_char(text[label_start:], '}') + label_start
 | 
						|
    labels = parse_labels(text[label_start + 1:label_end], False)
 | 
						|
    if not name:
 | 
						|
        # Name might be in the labels
 | 
						|
        if '__name__' not in labels:
 | 
						|
            raise ValueError
 | 
						|
        name = labels['__name__']
 | 
						|
        del labels['__name__']
 | 
						|
    elif '__name__' in labels:
 | 
						|
        raise ValueError("metric name specified more than once")
 | 
						|
    # Parsing labels succeeded, continue parsing the remaining text
 | 
						|
    remaining_text = text[label_end + 1:]
 | 
						|
    value, timestamp = _parse_value_and_timestamp(remaining_text)
 | 
						|
    return Sample(name, labels, value, timestamp)
 | 
						|
 | 
						|
 | 
						|
def text_fd_to_metric_families(fd: TextIO) -> Iterable[Metric]:
 | 
						|
    """Parse Prometheus text format from a file descriptor.
 | 
						|
 | 
						|
    This is a laxer parser than the main Go parser,
 | 
						|
    so successful parsing does not imply that the parsed
 | 
						|
    text meets the specification.
 | 
						|
 | 
						|
    Yields Metric's.
 | 
						|
    """
 | 
						|
    name = ''
 | 
						|
    documentation = ''
 | 
						|
    typ = 'untyped'
 | 
						|
    samples: List[Sample] = []
 | 
						|
    allowed_names = []
 | 
						|
 | 
						|
    def build_metric(name: str, documentation: str, typ: str, samples: List[Sample]) -> Metric:
 | 
						|
        # Munge counters into OpenMetrics representation
 | 
						|
        # used internally.
 | 
						|
        if typ == 'counter':
 | 
						|
            if name.endswith('_total'):
 | 
						|
                name = name[:-6]
 | 
						|
            else:
 | 
						|
                new_samples = []
 | 
						|
                for s in samples:
 | 
						|
                    new_samples.append(Sample(s[0] + '_total', *s[1:]))
 | 
						|
                    samples = new_samples
 | 
						|
        metric = Metric(name, documentation, typ)
 | 
						|
        metric.samples = samples
 | 
						|
        return metric
 | 
						|
 | 
						|
    for line in fd:
 | 
						|
        line = line.strip()
 | 
						|
 | 
						|
        if line.startswith('#'):
 | 
						|
            parts = _split_quoted(line, None, 3)
 | 
						|
            if len(parts) < 2:
 | 
						|
                continue
 | 
						|
            candidate_name, quoted = '', False
 | 
						|
            if len(parts) > 2:
 | 
						|
                # Ignore comment tokens
 | 
						|
                if parts[1] != 'TYPE' and parts[1] != 'HELP':
 | 
						|
                    continue
 | 
						|
                candidate_name, quoted = _unquote_unescape(parts[2])
 | 
						|
                if not quoted and not _is_valid_legacy_metric_name(candidate_name):
 | 
						|
                    raise ValueError
 | 
						|
            if parts[1] == 'HELP':
 | 
						|
                if candidate_name != name:
 | 
						|
                    if name != '':
 | 
						|
                        yield build_metric(name, documentation, typ, samples)
 | 
						|
                    # New metric
 | 
						|
                    name = candidate_name
 | 
						|
                    typ = 'untyped'
 | 
						|
                    samples = []
 | 
						|
                    allowed_names = [candidate_name]
 | 
						|
                if len(parts) == 4:
 | 
						|
                    documentation = _replace_help_escaping(parts[3])
 | 
						|
                else:
 | 
						|
                    documentation = ''
 | 
						|
            elif parts[1] == 'TYPE':
 | 
						|
                if len(parts) < 4:
 | 
						|
                    raise ValueError
 | 
						|
                if candidate_name != name:
 | 
						|
                    if name != '':
 | 
						|
                        yield build_metric(name, documentation, typ, samples)
 | 
						|
                    # New metric
 | 
						|
                    name = candidate_name
 | 
						|
                    documentation = ''
 | 
						|
                    samples = []
 | 
						|
                typ = parts[3]
 | 
						|
                allowed_names = {
 | 
						|
                    'counter': [''],
 | 
						|
                    'gauge': [''],
 | 
						|
                    'summary': ['_count', '_sum', ''],
 | 
						|
                    'histogram': ['_count', '_sum', '_bucket'],
 | 
						|
                }.get(typ, [''])
 | 
						|
                allowed_names = [name + n for n in allowed_names]
 | 
						|
        elif line == '':
 | 
						|
            # Ignore blank lines
 | 
						|
            pass
 | 
						|
        else:
 | 
						|
            sample = _parse_sample(line)
 | 
						|
            if sample.name not in allowed_names:
 | 
						|
                if name != '':
 | 
						|
                    yield build_metric(name, documentation, typ, samples)
 | 
						|
                # New metric, yield immediately as untyped singleton
 | 
						|
                name = ''
 | 
						|
                documentation = ''
 | 
						|
                typ = 'untyped'
 | 
						|
                samples = []
 | 
						|
                allowed_names = []
 | 
						|
                yield build_metric(sample[0], documentation, typ, [sample])
 | 
						|
            else:
 | 
						|
                samples.append(sample)
 | 
						|
 | 
						|
    if name != '':
 | 
						|
        yield build_metric(name, documentation, typ, samples)
 |