You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
371 lines
12 KiB
Python
371 lines
12 KiB
Python
import io as StringIO
|
|
import re
|
|
import string
|
|
from typing import Dict, Iterable, List, Match, Optional, TextIO, Tuple
|
|
|
|
from .metrics_core import Metric
|
|
from .samples import Sample
|
|
from .validation import (
|
|
_is_valid_legacy_metric_name, _validate_labelname, _validate_metric_name,
|
|
)
|
|
|
|
|
|
def text_string_to_metric_families(text: str) -> Iterable[Metric]:
|
|
"""Parse Prometheus text format from a unicode string.
|
|
|
|
See text_fd_to_metric_families.
|
|
"""
|
|
yield from text_fd_to_metric_families(StringIO.StringIO(text))
|
|
|
|
|
|
ESCAPE_SEQUENCES = {
|
|
'\\\\': '\\',
|
|
'\\n': '\n',
|
|
'\\"': '"',
|
|
}
|
|
|
|
|
|
def replace_escape_sequence(match: Match[str]) -> str:
|
|
return ESCAPE_SEQUENCES[match.group(0)]
|
|
|
|
|
|
HELP_ESCAPING_RE = re.compile(r'\\[\\n]')
|
|
ESCAPING_RE = re.compile(r'\\[\\n"]')
|
|
|
|
|
|
def _replace_help_escaping(s: str) -> str:
|
|
return HELP_ESCAPING_RE.sub(replace_escape_sequence, s)
|
|
|
|
|
|
def _replace_escaping(s: str) -> str:
|
|
return ESCAPING_RE.sub(replace_escape_sequence, s)
|
|
|
|
|
|
def _is_character_escaped(s: str, charpos: int) -> bool:
|
|
num_bslashes = 0
|
|
while (charpos > num_bslashes
|
|
and s[charpos - 1 - num_bslashes] == '\\'):
|
|
num_bslashes += 1
|
|
return num_bslashes % 2 == 1
|
|
|
|
|
|
def parse_labels(labels_string: str, openmetrics: bool = False) -> Dict[str, str]:
|
|
labels: Dict[str, str] = {}
|
|
|
|
# Copy original labels
|
|
sub_labels = labels_string.strip()
|
|
if openmetrics and sub_labels and sub_labels[0] == ',':
|
|
raise ValueError("leading comma: " + labels_string)
|
|
try:
|
|
# Process one label at a time
|
|
while sub_labels:
|
|
# The label name is before the equal, or if there's no equal, that's the
|
|
# metric name.
|
|
|
|
name_term, value_term, sub_labels = _next_term(sub_labels, openmetrics)
|
|
if not value_term:
|
|
if openmetrics:
|
|
raise ValueError("empty term in line: " + labels_string)
|
|
continue
|
|
|
|
label_name, quoted_name = _unquote_unescape(name_term)
|
|
|
|
if not quoted_name and not _is_valid_legacy_metric_name(label_name):
|
|
raise ValueError("unquoted UTF-8 metric name")
|
|
|
|
# Check for missing quotes
|
|
if not value_term or value_term[0] != '"':
|
|
raise ValueError
|
|
|
|
# The first quote is guaranteed to be after the equal.
|
|
# Make sure that the next unescaped quote is the last character.
|
|
i = 1
|
|
while i < len(value_term):
|
|
i = value_term.index('"', i)
|
|
if not _is_character_escaped(value_term[:i], i):
|
|
break
|
|
i += 1
|
|
# The label value is between the first and last quote
|
|
quote_end = i + 1
|
|
if quote_end != len(value_term):
|
|
raise ValueError("unexpected text after quote: " + labels_string)
|
|
|
|
label_value, _ = _unquote_unescape(value_term)
|
|
if label_name == '__name__':
|
|
_validate_metric_name(label_name)
|
|
else:
|
|
_validate_labelname(label_name)
|
|
if label_name in labels:
|
|
raise ValueError("invalid line, duplicate label name: " + labels_string)
|
|
labels[label_name] = label_value
|
|
return labels
|
|
except ValueError:
|
|
raise ValueError("Invalid labels: " + labels_string)
|
|
|
|
|
|
def _next_term(text: str, openmetrics: bool) -> Tuple[str, str, str]:
|
|
"""Extract the next comma-separated label term from the text. The results
|
|
are stripped terms for the label name, label value, and then the remainder
|
|
of the string including the final , or }.
|
|
|
|
Raises ValueError if the term is empty and we're in openmetrics mode.
|
|
"""
|
|
|
|
# There may be a leading comma, which is fine here.
|
|
if text[0] == ',':
|
|
text = text[1:]
|
|
if not text:
|
|
return "", "", ""
|
|
if text[0] == ',':
|
|
raise ValueError("multiple commas")
|
|
|
|
splitpos = _next_unquoted_char(text, '=,}')
|
|
if splitpos >= 0 and text[splitpos] == "=":
|
|
labelname = text[:splitpos]
|
|
text = text[splitpos + 1:]
|
|
splitpos = _next_unquoted_char(text, ',}')
|
|
else:
|
|
labelname = "__name__"
|
|
|
|
if splitpos == -1:
|
|
splitpos = len(text)
|
|
term = text[:splitpos]
|
|
if not term and openmetrics:
|
|
raise ValueError("empty term:", term)
|
|
|
|
rest = text[splitpos:]
|
|
return labelname, term.strip(), rest.strip()
|
|
|
|
|
|
def _next_unquoted_char(text: str, chs: Optional[str], startidx: int = 0) -> int:
|
|
"""Return position of next unquoted character in tuple, or -1 if not found.
|
|
|
|
It is always assumed that the first character being checked is not already
|
|
inside quotes.
|
|
"""
|
|
in_quotes = False
|
|
if chs is None:
|
|
chs = string.whitespace
|
|
|
|
for i, c in enumerate(text[startidx:]):
|
|
if c == '"' and not _is_character_escaped(text, startidx + i):
|
|
in_quotes = not in_quotes
|
|
if not in_quotes:
|
|
if c in chs:
|
|
return startidx + i
|
|
return -1
|
|
|
|
|
|
def _last_unquoted_char(text: str, chs: Optional[str]) -> int:
|
|
"""Return position of last unquoted character in list, or -1 if not found."""
|
|
i = len(text) - 1
|
|
in_quotes = False
|
|
if chs is None:
|
|
chs = string.whitespace
|
|
while i > 0:
|
|
if text[i] == '"' and not _is_character_escaped(text, i):
|
|
in_quotes = not in_quotes
|
|
|
|
if not in_quotes:
|
|
if text[i] in chs:
|
|
return i
|
|
i -= 1
|
|
return -1
|
|
|
|
|
|
def _split_quoted(text, separator, maxsplit=0):
|
|
"""Splits on split_ch similarly to strings.split, skipping separators if
|
|
they are inside quotes.
|
|
"""
|
|
|
|
tokens = ['']
|
|
x = 0
|
|
while x < len(text):
|
|
split_pos = _next_unquoted_char(text, separator, x)
|
|
if split_pos == -1:
|
|
tokens[-1] = text[x:]
|
|
x = len(text)
|
|
continue
|
|
# If the first character is the separator keep going. This happens when
|
|
# there are double whitespace characters separating symbols.
|
|
if split_pos == x:
|
|
x += 1
|
|
continue
|
|
|
|
if maxsplit > 0 and len(tokens) > maxsplit:
|
|
tokens[-1] = text[x:]
|
|
break
|
|
tokens[-1] = text[x:split_pos]
|
|
x = split_pos + 1
|
|
tokens.append('')
|
|
return tokens
|
|
|
|
|
|
def _unquote_unescape(text):
|
|
"""Returns the string, and true if it was quoted."""
|
|
if not text:
|
|
return text, False
|
|
quoted = False
|
|
text = text.strip()
|
|
if text[0] == '"':
|
|
if len(text) == 1 or text[-1] != '"':
|
|
raise ValueError("missing close quote")
|
|
text = text[1:-1]
|
|
quoted = True
|
|
if "\\" in text:
|
|
text = _replace_escaping(text)
|
|
return text, quoted
|
|
|
|
|
|
# If we have multiple values only consider the first
|
|
def _parse_value_and_timestamp(s: str) -> Tuple[float, Optional[float]]:
|
|
s = s.lstrip()
|
|
separator = " "
|
|
if separator not in s:
|
|
separator = "\t"
|
|
values = [value.strip() for value in s.split(separator) if value.strip()]
|
|
if not values:
|
|
return float(s), None
|
|
value = _parse_value(values[0])
|
|
timestamp = (_parse_value(values[-1]) / 1000) if len(values) > 1 else None
|
|
return value, timestamp
|
|
|
|
|
|
def _parse_value(value):
|
|
value = ''.join(value)
|
|
if value != value.strip() or '_' in value:
|
|
raise ValueError(f"Invalid value: {value!r}")
|
|
try:
|
|
return int(value)
|
|
except ValueError:
|
|
return float(value)
|
|
|
|
|
|
def _parse_sample(text):
|
|
separator = " # "
|
|
# Detect the labels in the text
|
|
label_start = _next_unquoted_char(text, '{')
|
|
if label_start == -1 or separator in text[:label_start]:
|
|
# We don't have labels, but there could be an exemplar.
|
|
name_end = _next_unquoted_char(text, ' \t')
|
|
name = text[:name_end].strip()
|
|
if not _is_valid_legacy_metric_name(name):
|
|
raise ValueError("invalid metric name:" + text)
|
|
# Parse the remaining text after the name
|
|
remaining_text = text[name_end + 1:]
|
|
value, timestamp = _parse_value_and_timestamp(remaining_text)
|
|
return Sample(name, {}, value, timestamp)
|
|
name = text[:label_start].strip()
|
|
label_end = _next_unquoted_char(text[label_start:], '}') + label_start
|
|
labels = parse_labels(text[label_start + 1:label_end], False)
|
|
if not name:
|
|
# Name might be in the labels
|
|
if '__name__' not in labels:
|
|
raise ValueError
|
|
name = labels['__name__']
|
|
del labels['__name__']
|
|
elif '__name__' in labels:
|
|
raise ValueError("metric name specified more than once")
|
|
# Parsing labels succeeded, continue parsing the remaining text
|
|
remaining_text = text[label_end + 1:]
|
|
value, timestamp = _parse_value_and_timestamp(remaining_text)
|
|
return Sample(name, labels, value, timestamp)
|
|
|
|
|
|
def text_fd_to_metric_families(fd: TextIO) -> Iterable[Metric]:
|
|
"""Parse Prometheus text format from a file descriptor.
|
|
|
|
This is a laxer parser than the main Go parser,
|
|
so successful parsing does not imply that the parsed
|
|
text meets the specification.
|
|
|
|
Yields Metric's.
|
|
"""
|
|
name = ''
|
|
documentation = ''
|
|
typ = 'untyped'
|
|
samples: List[Sample] = []
|
|
allowed_names = []
|
|
|
|
def build_metric(name: str, documentation: str, typ: str, samples: List[Sample]) -> Metric:
|
|
# Munge counters into OpenMetrics representation
|
|
# used internally.
|
|
if typ == 'counter':
|
|
if name.endswith('_total'):
|
|
name = name[:-6]
|
|
else:
|
|
new_samples = []
|
|
for s in samples:
|
|
new_samples.append(Sample(s[0] + '_total', *s[1:]))
|
|
samples = new_samples
|
|
metric = Metric(name, documentation, typ)
|
|
metric.samples = samples
|
|
return metric
|
|
|
|
for line in fd:
|
|
line = line.strip()
|
|
|
|
if line.startswith('#'):
|
|
parts = _split_quoted(line, None, 3)
|
|
if len(parts) < 2:
|
|
continue
|
|
candidate_name, quoted = '', False
|
|
if len(parts) > 2:
|
|
# Ignore comment tokens
|
|
if parts[1] != 'TYPE' and parts[1] != 'HELP':
|
|
continue
|
|
candidate_name, quoted = _unquote_unescape(parts[2])
|
|
if not quoted and not _is_valid_legacy_metric_name(candidate_name):
|
|
raise ValueError
|
|
if parts[1] == 'HELP':
|
|
if candidate_name != name:
|
|
if name != '':
|
|
yield build_metric(name, documentation, typ, samples)
|
|
# New metric
|
|
name = candidate_name
|
|
typ = 'untyped'
|
|
samples = []
|
|
allowed_names = [candidate_name]
|
|
if len(parts) == 4:
|
|
documentation = _replace_help_escaping(parts[3])
|
|
else:
|
|
documentation = ''
|
|
elif parts[1] == 'TYPE':
|
|
if len(parts) < 4:
|
|
raise ValueError
|
|
if candidate_name != name:
|
|
if name != '':
|
|
yield build_metric(name, documentation, typ, samples)
|
|
# New metric
|
|
name = candidate_name
|
|
documentation = ''
|
|
samples = []
|
|
typ = parts[3]
|
|
allowed_names = {
|
|
'counter': [''],
|
|
'gauge': [''],
|
|
'summary': ['_count', '_sum', ''],
|
|
'histogram': ['_count', '_sum', '_bucket'],
|
|
}.get(typ, [''])
|
|
allowed_names = [name + n for n in allowed_names]
|
|
elif line == '':
|
|
# Ignore blank lines
|
|
pass
|
|
else:
|
|
sample = _parse_sample(line)
|
|
if sample.name not in allowed_names:
|
|
if name != '':
|
|
yield build_metric(name, documentation, typ, samples)
|
|
# New metric, yield immediately as untyped singleton
|
|
name = ''
|
|
documentation = ''
|
|
typ = 'untyped'
|
|
samples = []
|
|
allowed_names = []
|
|
yield build_metric(sample[0], documentation, typ, [sample])
|
|
else:
|
|
samples.append(sample)
|
|
|
|
if name != '':
|
|
yield build_metric(name, documentation, typ, samples)
|