You cannot select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
	
	
		
			654 lines
		
	
	
		
			24 KiB
		
	
	
	
		
			Python
		
	
			
		
		
	
	
			654 lines
		
	
	
		
			24 KiB
		
	
	
	
		
			Python
		
	
#!/usr/bin/env python
 | 
						|
 | 
						|
 | 
						|
import io as StringIO
 | 
						|
import math
 | 
						|
import re
 | 
						|
 | 
						|
from ..metrics_core import Metric
 | 
						|
from ..parser import (
 | 
						|
    _last_unquoted_char, _next_unquoted_char, _parse_value, _split_quoted,
 | 
						|
    _unquote_unescape, parse_labels,
 | 
						|
)
 | 
						|
from ..samples import BucketSpan, Exemplar, NativeHistogram, Sample, Timestamp
 | 
						|
from ..utils import floatToGoString
 | 
						|
from ..validation import _is_valid_legacy_metric_name, _validate_metric_name
 | 
						|
 | 
						|
 | 
						|
def text_string_to_metric_families(text):
 | 
						|
    """Parse Openmetrics text format from a unicode string.
 | 
						|
 | 
						|
    See text_fd_to_metric_families.
 | 
						|
    """
 | 
						|
    yield from text_fd_to_metric_families(StringIO.StringIO(text))
 | 
						|
 | 
						|
 | 
						|
_CANONICAL_NUMBERS = {float("inf")}
 | 
						|
 | 
						|
 | 
						|
def _isUncanonicalNumber(s):
 | 
						|
    f = float(s)
 | 
						|
    if f not in _CANONICAL_NUMBERS:
 | 
						|
        return False  # Only the canonical numbers are required to be canonical.
 | 
						|
    return s != floatToGoString(f)
 | 
						|
 | 
						|
 | 
						|
ESCAPE_SEQUENCES = {
 | 
						|
    '\\\\': '\\',
 | 
						|
    '\\n': '\n',
 | 
						|
    '\\"': '"',
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
def _replace_escape_sequence(match):
 | 
						|
    return ESCAPE_SEQUENCES[match.group(0)]
 | 
						|
 | 
						|
 | 
						|
ESCAPING_RE = re.compile(r'\\[\\n"]')
 | 
						|
 | 
						|
 | 
						|
def _replace_escaping(s):
 | 
						|
    return ESCAPING_RE.sub(_replace_escape_sequence, s)
 | 
						|
 | 
						|
 | 
						|
def _unescape_help(text):
 | 
						|
    result = []
 | 
						|
    slash = False
 | 
						|
 | 
						|
    for char in text:
 | 
						|
        if slash:
 | 
						|
            if char == '\\':
 | 
						|
                result.append('\\')
 | 
						|
            elif char == '"':
 | 
						|
                result.append('"')
 | 
						|
            elif char == 'n':
 | 
						|
                result.append('\n')
 | 
						|
            else:
 | 
						|
                result.append('\\' + char)
 | 
						|
            slash = False
 | 
						|
        else:
 | 
						|
            if char == '\\':
 | 
						|
                slash = True
 | 
						|
            else:
 | 
						|
                result.append(char)
 | 
						|
 | 
						|
    if slash:
 | 
						|
        result.append('\\')
 | 
						|
 | 
						|
    return ''.join(result)
 | 
						|
 | 
						|
 | 
						|
def _parse_timestamp(timestamp):
 | 
						|
    timestamp = ''.join(timestamp)
 | 
						|
    if not timestamp:
 | 
						|
        return None
 | 
						|
    if timestamp != timestamp.strip() or '_' in timestamp:
 | 
						|
        raise ValueError(f"Invalid timestamp: {timestamp!r}")
 | 
						|
    try:
 | 
						|
        # Simple int.
 | 
						|
        return Timestamp(int(timestamp), 0)
 | 
						|
    except ValueError:
 | 
						|
        try:
 | 
						|
            # aaaa.bbbb. Nanosecond resolution supported.
 | 
						|
            parts = timestamp.split('.', 1)
 | 
						|
            return Timestamp(int(parts[0]), int(parts[1][:9].ljust(9, "0")))
 | 
						|
        except ValueError:
 | 
						|
            # Float.
 | 
						|
            ts = float(timestamp)
 | 
						|
            if math.isnan(ts) or math.isinf(ts):
 | 
						|
                raise ValueError(f"Invalid timestamp: {timestamp!r}")
 | 
						|
            return ts
 | 
						|
 | 
						|
 | 
						|
def _is_character_escaped(s, charpos):
 | 
						|
    num_bslashes = 0
 | 
						|
    while (charpos > num_bslashes
 | 
						|
           and s[charpos - 1 - num_bslashes] == '\\'):
 | 
						|
        num_bslashes += 1
 | 
						|
    return num_bslashes % 2 == 1
 | 
						|
 | 
						|
 | 
						|
def _parse_sample(text):
 | 
						|
    separator = " # "
 | 
						|
    # Detect the labels in the text
 | 
						|
    label_start = _next_unquoted_char(text, '{')
 | 
						|
    if label_start == -1 or separator in text[:label_start]:
 | 
						|
        # We don't have labels, but there could be an exemplar.
 | 
						|
        name_end = _next_unquoted_char(text, ' ')
 | 
						|
        name = text[:name_end]
 | 
						|
        if not _is_valid_legacy_metric_name(name):
 | 
						|
            raise ValueError("invalid metric name:" + text)
 | 
						|
        # Parse the remaining text after the name
 | 
						|
        remaining_text = text[name_end + 1:]
 | 
						|
        value, timestamp, exemplar = _parse_remaining_text(remaining_text)
 | 
						|
        return Sample(name, {}, value, timestamp, exemplar)
 | 
						|
    name = text[:label_start]
 | 
						|
    label_end = _next_unquoted_char(text, '}')
 | 
						|
    labels = parse_labels(text[label_start + 1:label_end], True)
 | 
						|
    if not name:
 | 
						|
        # Name might be in the labels
 | 
						|
        if '__name__' not in labels:
 | 
						|
            raise ValueError
 | 
						|
        name = labels['__name__']
 | 
						|
        del labels['__name__']
 | 
						|
    elif '__name__' in labels:
 | 
						|
        raise ValueError("metric name specified more than once")
 | 
						|
    # Parsing labels succeeded, continue parsing the remaining text
 | 
						|
    remaining_text = text[label_end + 2:]
 | 
						|
    value, timestamp, exemplar = _parse_remaining_text(remaining_text)
 | 
						|
    return Sample(name, labels, value, timestamp, exemplar)
 | 
						|
 | 
						|
 | 
						|
def _parse_remaining_text(text):
 | 
						|
    split_text = text.split(" ", 1)
 | 
						|
    val = _parse_value(split_text[0])
 | 
						|
    if len(split_text) == 1:
 | 
						|
        # We don't have timestamp or exemplar
 | 
						|
        return val, None, None  
 | 
						|
 | 
						|
    timestamp = []
 | 
						|
    exemplar_value = []
 | 
						|
    exemplar_timestamp = []
 | 
						|
    exemplar_labels = None
 | 
						|
 | 
						|
    state = 'timestamp'
 | 
						|
    text = split_text[1]
 | 
						|
 | 
						|
    it = iter(text)
 | 
						|
    in_quotes = False
 | 
						|
    for char in it:
 | 
						|
        if char == '"':
 | 
						|
            in_quotes = not in_quotes
 | 
						|
        if in_quotes:
 | 
						|
            continue
 | 
						|
        if state == 'timestamp':
 | 
						|
            if char == '#' and not timestamp:
 | 
						|
                state = 'exemplarspace'
 | 
						|
            elif char == ' ':
 | 
						|
                state = 'exemplarhash'
 | 
						|
            else:
 | 
						|
                timestamp.append(char)
 | 
						|
        elif state == 'exemplarhash':
 | 
						|
            if char == '#':
 | 
						|
                state = 'exemplarspace'
 | 
						|
            else:
 | 
						|
                raise ValueError("Invalid line: " + text)
 | 
						|
        elif state == 'exemplarspace':
 | 
						|
            if char == ' ':
 | 
						|
                state = 'exemplarstartoflabels'
 | 
						|
            else:
 | 
						|
                raise ValueError("Invalid line: " + text)
 | 
						|
        elif state == 'exemplarstartoflabels':
 | 
						|
            if char == '{':
 | 
						|
                label_start = _next_unquoted_char(text, '{')
 | 
						|
                label_end = _last_unquoted_char(text, '}')
 | 
						|
                exemplar_labels = parse_labels(text[label_start + 1:label_end], True)
 | 
						|
                state = 'exemplarparsedlabels'
 | 
						|
            else:
 | 
						|
                raise ValueError("Invalid line: " + text)
 | 
						|
        elif state == 'exemplarparsedlabels':
 | 
						|
            if char == '}':
 | 
						|
                state = 'exemplarvaluespace'
 | 
						|
        elif state == 'exemplarvaluespace':
 | 
						|
            if char == ' ':
 | 
						|
                state = 'exemplarvalue'
 | 
						|
            else:
 | 
						|
                raise ValueError("Invalid line: " + text)
 | 
						|
        elif state == 'exemplarvalue':
 | 
						|
            if char == ' ' and not exemplar_value:
 | 
						|
                raise ValueError("Invalid line: " + text)
 | 
						|
            elif char == ' ':
 | 
						|
                state = 'exemplartimestamp'
 | 
						|
            else:
 | 
						|
                exemplar_value.append(char)
 | 
						|
        elif state == 'exemplartimestamp':
 | 
						|
            exemplar_timestamp.append(char)
 | 
						|
 | 
						|
    # Trailing space after value.
 | 
						|
    if state == 'timestamp' and not timestamp:
 | 
						|
        raise ValueError("Invalid line: " + text)
 | 
						|
 | 
						|
    # Trailing space after value.
 | 
						|
    if state == 'exemplartimestamp' and not exemplar_timestamp:
 | 
						|
        raise ValueError("Invalid line: " + text)
 | 
						|
 | 
						|
    # Incomplete exemplar.
 | 
						|
    if state in ['exemplarhash', 'exemplarspace', 'exemplarstartoflabels', 'exemplarparsedlabels']:
 | 
						|
        raise ValueError("Invalid line: " + text)
 | 
						|
 | 
						|
    ts = _parse_timestamp(timestamp)
 | 
						|
    exemplar = None
 | 
						|
    if exemplar_labels is not None:
 | 
						|
        exemplar_length = sum(len(k) + len(v) for k, v in exemplar_labels.items())
 | 
						|
        if exemplar_length > 128:
 | 
						|
            raise ValueError("Exemplar labels are too long: " + text)
 | 
						|
        exemplar = Exemplar(
 | 
						|
            exemplar_labels,
 | 
						|
            _parse_value(exemplar_value),
 | 
						|
            _parse_timestamp(exemplar_timestamp),
 | 
						|
        )
 | 
						|
 | 
						|
    return val, ts, exemplar
 | 
						|
 | 
						|
 | 
						|
def _parse_nh_sample(text, suffixes):
 | 
						|
    """Determines if the line has a native histogram sample, and parses it if so."""
 | 
						|
    labels_start = _next_unquoted_char(text, '{')
 | 
						|
    labels_end = -1
 | 
						|
 | 
						|
    # Finding a native histogram sample requires careful parsing of
 | 
						|
    # possibly-quoted text, which can appear in metric names, label names, and
 | 
						|
    # values.
 | 
						|
    # 
 | 
						|
    # First, we need to determine if there are metric labels. Find the space
 | 
						|
    # between the metric definition and the rest of the line. Look for unquoted
 | 
						|
    # space or {.
 | 
						|
    i = 0
 | 
						|
    has_metric_labels = False
 | 
						|
    i = _next_unquoted_char(text, ' {')
 | 
						|
    if i == -1:
 | 
						|
        return
 | 
						|
 | 
						|
    # If the first unquoted char was a {, then that is the metric labels (which
 | 
						|
    # could contain a UTF-8 metric name).
 | 
						|
    if text[i] == '{':
 | 
						|
        has_metric_labels = True
 | 
						|
        # Consume the labels -- jump ahead to the close bracket.
 | 
						|
        labels_end = i = _next_unquoted_char(text, '}', i)
 | 
						|
        if labels_end == -1:
 | 
						|
            raise ValueError
 | 
						|
    
 | 
						|
    # If there is no subsequent unquoted {, then it's definitely not a nh.
 | 
						|
    nh_value_start = _next_unquoted_char(text, '{', i + 1)
 | 
						|
    if nh_value_start == -1:
 | 
						|
        return
 | 
						|
    
 | 
						|
    # Edge case: if there is an unquoted # between the metric definition and the {,
 | 
						|
    # then this is actually an exemplar
 | 
						|
    exemplar = _next_unquoted_char(text, '#', i + 1)
 | 
						|
    if exemplar != -1 and exemplar < nh_value_start:
 | 
						|
        return
 | 
						|
    
 | 
						|
    nh_value_end = _next_unquoted_char(text, '}', nh_value_start)
 | 
						|
    if nh_value_end == -1:
 | 
						|
        raise ValueError
 | 
						|
    
 | 
						|
    if has_metric_labels:
 | 
						|
        labelstext = text[labels_start + 1:labels_end]
 | 
						|
        labels = parse_labels(labelstext, True)
 | 
						|
        name_end = labels_start
 | 
						|
        name = text[:name_end]
 | 
						|
        if name.endswith(suffixes):
 | 
						|
            raise ValueError("the sample name of a native histogram with labels should have no suffixes", name)
 | 
						|
        if not name:
 | 
						|
            # Name might be in the labels
 | 
						|
            if '__name__' not in labels:
 | 
						|
                raise ValueError
 | 
						|
            name = labels['__name__']
 | 
						|
            del labels['__name__']
 | 
						|
            # Edge case: the only "label" is the name definition.
 | 
						|
            if not labels:
 | 
						|
                labels = None
 | 
						|
             
 | 
						|
        nh_value = text[nh_value_start:]
 | 
						|
        nat_hist_value = _parse_nh_struct(nh_value)
 | 
						|
        return Sample(name, labels, None, None, None, nat_hist_value)
 | 
						|
    # check if it's a native histogram
 | 
						|
    else:
 | 
						|
        nh_value = text[nh_value_start:]
 | 
						|
        name_end = nh_value_start - 1
 | 
						|
        name = text[:name_end]
 | 
						|
        if name.endswith(suffixes):
 | 
						|
            raise ValueError("the sample name of a native histogram should have no suffixes", name)
 | 
						|
        # Not possible for UTF-8 name here, that would have been caught as having a labelset.
 | 
						|
        nat_hist_value = _parse_nh_struct(nh_value)
 | 
						|
        return Sample(name, None, None, None, None, nat_hist_value)      
 | 
						|
 | 
						|
 | 
						|
def _parse_nh_struct(text):
 | 
						|
    pattern = r'(\w+):\s*([^,}]+)'
 | 
						|
    re_spans = re.compile(r'(positive_spans|negative_spans):\[(\d+:\d+(,\d+:\d+)*)\]')
 | 
						|
    re_deltas = re.compile(r'(positive_deltas|negative_deltas):\[(-?\d+(?:,-?\d+)*)\]')
 | 
						|
 | 
						|
    items = dict(re.findall(pattern, text))
 | 
						|
    span_matches = re_spans.findall(text)
 | 
						|
    deltas = dict(re_deltas.findall(text))
 | 
						|
 | 
						|
    count_value = int(items['count'])
 | 
						|
    sum_value = int(items['sum'])
 | 
						|
    schema = int(items['schema'])
 | 
						|
    zero_threshold = float(items['zero_threshold'])
 | 
						|
    zero_count = int(items['zero_count'])
 | 
						|
 | 
						|
    pos_spans = _compose_spans(span_matches, 'positive_spans')
 | 
						|
    neg_spans = _compose_spans(span_matches, 'negative_spans')
 | 
						|
    pos_deltas = _compose_deltas(deltas, 'positive_deltas')
 | 
						|
    neg_deltas = _compose_deltas(deltas, 'negative_deltas')
 | 
						|
      
 | 
						|
    return NativeHistogram(
 | 
						|
        count_value=count_value,
 | 
						|
        sum_value=sum_value,
 | 
						|
        schema=schema,
 | 
						|
        zero_threshold=zero_threshold,
 | 
						|
        zero_count=zero_count,
 | 
						|
        pos_spans=pos_spans,
 | 
						|
        neg_spans=neg_spans,
 | 
						|
        pos_deltas=pos_deltas,
 | 
						|
        neg_deltas=neg_deltas
 | 
						|
    )
 | 
						|
  
 | 
						|
 | 
						|
def _compose_spans(span_matches, spans_name):
 | 
						|
    """Takes a list of span matches (expected to be a list of tuples) and a string 
 | 
						|
    (the expected span list name) and processes the list so that the values extracted 
 | 
						|
    from the span matches can be used to compose a tuple of BucketSpan objects"""
 | 
						|
    spans = {}
 | 
						|
    for match in span_matches:
 | 
						|
        # Extract the key from the match (first element of the tuple).
 | 
						|
        key = match[0]
 | 
						|
        # Extract the value from the match (second element of the tuple).
 | 
						|
        # Split the value string by commas to get individual pairs, 
 | 
						|
        # split each pair by ':' to get start and end, and convert them to integers.
 | 
						|
        value = [tuple(map(int, pair.split(':'))) for pair in match[1].split(',')]
 | 
						|
        # Store the processed value in the spans dictionary with the key.
 | 
						|
        spans[key] = value
 | 
						|
    if spans_name not in spans:
 | 
						|
        return None
 | 
						|
    out_spans = []
 | 
						|
    # Iterate over each start and end tuple in the list of tuples for the specified spans_name.
 | 
						|
    for start, end in spans[spans_name]:
 | 
						|
        # Compose a BucketSpan object with the start and end values 
 | 
						|
        # and append it to the out_spans list.
 | 
						|
        out_spans.append(BucketSpan(start, end))
 | 
						|
        # Convert to tuple
 | 
						|
    out_spans_tuple = tuple(out_spans)
 | 
						|
    return out_spans_tuple
 | 
						|
 | 
						|
 | 
						|
def _compose_deltas(deltas, deltas_name):
 | 
						|
    """Takes a list of deltas matches (a dictionary) and a string (the expected delta list name),
 | 
						|
    and processes its elements to compose a tuple of integers representing the deltas"""
 | 
						|
    if deltas_name not in deltas:
 | 
						|
        return None
 | 
						|
    out_deltas = deltas.get(deltas_name)
 | 
						|
    if out_deltas is not None and out_deltas.strip():
 | 
						|
        elems = out_deltas.split(',')
 | 
						|
    # Convert each element in the list elems to an integer 
 | 
						|
    # after stripping whitespace and create a tuple from these integers.
 | 
						|
    out_deltas_tuple = tuple(int(x.strip()) for x in elems)
 | 
						|
    return out_deltas_tuple
 | 
						|
        
 | 
						|
 | 
						|
def _group_for_sample(sample, name, typ):
 | 
						|
    if typ == 'info':
 | 
						|
        # We can't distinguish between groups for info metrics.
 | 
						|
        return {}
 | 
						|
    if typ == 'summary' and sample.name == name:
 | 
						|
        d = sample.labels.copy()
 | 
						|
        del d['quantile']
 | 
						|
        return d
 | 
						|
    if typ == 'stateset':
 | 
						|
        d = sample.labels.copy()
 | 
						|
        del d[name]
 | 
						|
        return d
 | 
						|
    if typ in ['histogram', 'gaugehistogram'] and sample.name == name + '_bucket':
 | 
						|
        d = sample.labels.copy()
 | 
						|
        del d['le']
 | 
						|
        return d
 | 
						|
    return sample.labels
 | 
						|
 | 
						|
 | 
						|
def _check_histogram(samples, name):
 | 
						|
    group = None
 | 
						|
    timestamp = None
 | 
						|
 | 
						|
    def do_checks():
 | 
						|
        if bucket != float('+Inf'):
 | 
						|
            raise ValueError("+Inf bucket missing: " + name)
 | 
						|
        if count is not None and value != count:
 | 
						|
            raise ValueError("Count does not match +Inf value: " + name)
 | 
						|
        if has_sum and count is None:
 | 
						|
            raise ValueError("_count must be present if _sum is present: " + name)
 | 
						|
        if has_gsum and count is None:
 | 
						|
            raise ValueError("_gcount must be present if _gsum is present: " + name)
 | 
						|
        if not (has_sum or has_gsum) and count is not None:
 | 
						|
            raise ValueError("_sum/_gsum must be present if _count is present: " + name)
 | 
						|
        if has_negative_buckets and has_sum:
 | 
						|
            raise ValueError("Cannot have _sum with negative buckets: " + name)
 | 
						|
        if not has_negative_buckets and has_negative_gsum:
 | 
						|
            raise ValueError("Cannot have negative _gsum with non-negative buckets: " + name)
 | 
						|
 | 
						|
    for s in samples:
 | 
						|
        suffix = s.name[len(name):]
 | 
						|
        g = _group_for_sample(s, name, 'histogram')
 | 
						|
        if len(suffix) == 0:
 | 
						|
            continue
 | 
						|
        if g != group or s.timestamp != timestamp:
 | 
						|
            if group is not None:
 | 
						|
                do_checks()
 | 
						|
            count = None
 | 
						|
            bucket = None
 | 
						|
            has_negative_buckets = False
 | 
						|
            has_sum = False
 | 
						|
            has_gsum = False
 | 
						|
            has_negative_gsum = False
 | 
						|
            value = 0
 | 
						|
        group = g
 | 
						|
        timestamp = s.timestamp
 | 
						|
 | 
						|
        if suffix == '_bucket':
 | 
						|
            b = float(s.labels['le'])
 | 
						|
            if b < 0:
 | 
						|
                has_negative_buckets = True
 | 
						|
            if bucket is not None and b <= bucket:
 | 
						|
                raise ValueError("Buckets out of order: " + name)
 | 
						|
            if s.value < value:
 | 
						|
                raise ValueError("Bucket values out of order: " + name)
 | 
						|
            bucket = b
 | 
						|
            value = s.value
 | 
						|
        elif suffix in ['_count', '_gcount']:
 | 
						|
            count = s.value
 | 
						|
        elif suffix in ['_sum']:
 | 
						|
            has_sum = True
 | 
						|
        elif suffix in ['_gsum']:
 | 
						|
            has_gsum = True
 | 
						|
            if s.value < 0:
 | 
						|
                has_negative_gsum = True
 | 
						|
 | 
						|
    if group is not None:
 | 
						|
        do_checks()
 | 
						|
 | 
						|
 | 
						|
def text_fd_to_metric_families(fd):
 | 
						|
    """Parse Prometheus text format from a file descriptor.
 | 
						|
 | 
						|
    This is a laxer parser than the main Go parser,
 | 
						|
    so successful parsing does not imply that the parsed
 | 
						|
    text meets the specification.
 | 
						|
 | 
						|
    Yields Metric's.
 | 
						|
    """
 | 
						|
    name = None
 | 
						|
    allowed_names = []
 | 
						|
    eof = False
 | 
						|
 | 
						|
    seen_names = set()
 | 
						|
    type_suffixes = {
 | 
						|
        'counter': ['_total', '_created'],
 | 
						|
        'summary': ['', '_count', '_sum', '_created'],
 | 
						|
        'histogram': ['_count', '_sum', '_bucket', '_created'],
 | 
						|
        'gaugehistogram': ['_gcount', '_gsum', '_bucket'],
 | 
						|
        'info': ['_info'],
 | 
						|
    }
 | 
						|
 | 
						|
    def build_metric(name, documentation, typ, unit, samples):
 | 
						|
        if typ is None:
 | 
						|
            typ = 'unknown'
 | 
						|
        for suffix in set(type_suffixes.get(typ, []) + [""]):
 | 
						|
            if name + suffix in seen_names:
 | 
						|
                raise ValueError("Clashing name: " + name + suffix)
 | 
						|
            seen_names.add(name + suffix)
 | 
						|
        if documentation is None:
 | 
						|
            documentation = ''
 | 
						|
        if unit is None:
 | 
						|
            unit = ''
 | 
						|
        if unit and not name.endswith("_" + unit):
 | 
						|
            raise ValueError("Unit does not match metric name: " + name)
 | 
						|
        if unit and typ in ['info', 'stateset']:
 | 
						|
            raise ValueError("Units not allowed for this metric type: " + name)
 | 
						|
        if typ in ['histogram', 'gaugehistogram']:
 | 
						|
            _check_histogram(samples, name)
 | 
						|
        _validate_metric_name(name)
 | 
						|
        metric = Metric(name, documentation, typ, unit)
 | 
						|
        # TODO: check labelvalues are valid utf8
 | 
						|
        metric.samples = samples
 | 
						|
        return metric
 | 
						|
 | 
						|
    is_nh = False
 | 
						|
    typ = None
 | 
						|
    for line in fd:
 | 
						|
        if line[-1] == '\n':
 | 
						|
            line = line[:-1]
 | 
						|
 | 
						|
        if eof:
 | 
						|
            raise ValueError("Received line after # EOF: " + line)
 | 
						|
 | 
						|
        if not line:
 | 
						|
            raise ValueError("Received blank line")
 | 
						|
 | 
						|
        if line == '# EOF':
 | 
						|
            eof = True
 | 
						|
        elif line.startswith('#'):
 | 
						|
            parts = _split_quoted(line, ' ', 3)
 | 
						|
            if len(parts) < 4:
 | 
						|
                raise ValueError("Invalid line: " + line)
 | 
						|
            candidate_name, quoted = _unquote_unescape(parts[2])
 | 
						|
            if not quoted and not _is_valid_legacy_metric_name(candidate_name):
 | 
						|
                raise ValueError
 | 
						|
            if candidate_name == name and samples:
 | 
						|
                raise ValueError("Received metadata after samples: " + line)
 | 
						|
            if candidate_name != name:
 | 
						|
                if name is not None:
 | 
						|
                    yield build_metric(name, documentation, typ, unit, samples)
 | 
						|
                # New metric
 | 
						|
                name = candidate_name
 | 
						|
                unit = None
 | 
						|
                typ = None
 | 
						|
                documentation = None
 | 
						|
                group = None
 | 
						|
                seen_groups = set()
 | 
						|
                group_timestamp = None
 | 
						|
                group_timestamp_samples = set()
 | 
						|
                samples = []
 | 
						|
                allowed_names = [candidate_name]
 | 
						|
            
 | 
						|
            if parts[1] == 'HELP':
 | 
						|
                if documentation is not None:
 | 
						|
                    raise ValueError("More than one HELP for metric: " + line)
 | 
						|
                documentation = _unescape_help(parts[3])
 | 
						|
            elif parts[1] == 'TYPE':
 | 
						|
                if typ is not None:
 | 
						|
                    raise ValueError("More than one TYPE for metric: " + line)
 | 
						|
                typ = parts[3]
 | 
						|
                if typ == 'untyped':
 | 
						|
                    raise ValueError("Invalid TYPE for metric: " + line)
 | 
						|
                allowed_names = [name + n for n in type_suffixes.get(typ, [''])]
 | 
						|
            elif parts[1] == 'UNIT':
 | 
						|
                if unit is not None:
 | 
						|
                    raise ValueError("More than one UNIT for metric: " + line)
 | 
						|
                unit = parts[3]
 | 
						|
            else:
 | 
						|
                raise ValueError("Invalid line: " + line)
 | 
						|
        else:
 | 
						|
            if typ == 'histogram':
 | 
						|
                # set to true to account for native histograms naming exceptions/sanitizing differences
 | 
						|
                is_nh = True
 | 
						|
                sample = _parse_nh_sample(line, tuple(type_suffixes['histogram']))
 | 
						|
                # It's not a native histogram
 | 
						|
                if sample is None:
 | 
						|
                    is_nh = False
 | 
						|
                    sample = _parse_sample(line)              
 | 
						|
            else:
 | 
						|
                is_nh = False
 | 
						|
                sample = _parse_sample(line)
 | 
						|
            if sample.name not in allowed_names and not is_nh:
 | 
						|
                if name is not None:
 | 
						|
                    yield build_metric(name, documentation, typ, unit, samples)
 | 
						|
                # Start an unknown metric.
 | 
						|
                candidate_name, quoted = _unquote_unescape(sample.name)
 | 
						|
                if not quoted and not _is_valid_legacy_metric_name(candidate_name):
 | 
						|
                    raise ValueError
 | 
						|
                name = candidate_name
 | 
						|
                documentation = None
 | 
						|
                unit = None
 | 
						|
                typ = 'unknown'
 | 
						|
                samples = []
 | 
						|
                group = None
 | 
						|
                group_timestamp = None
 | 
						|
                group_timestamp_samples = set()
 | 
						|
                seen_groups = set()
 | 
						|
                allowed_names = [sample.name]
 | 
						|
 | 
						|
            if typ == 'stateset' and name not in sample.labels:
 | 
						|
                raise ValueError("Stateset missing label: " + line)
 | 
						|
            if (name + '_bucket' == sample.name
 | 
						|
                    and (sample.labels.get('le', "NaN") == "NaN"
 | 
						|
                         or _isUncanonicalNumber(sample.labels['le']))):
 | 
						|
                raise ValueError("Invalid le label: " + line)
 | 
						|
            if (name + '_bucket' == sample.name
 | 
						|
                    and (not isinstance(sample.value, int) and not sample.value.is_integer())):
 | 
						|
                raise ValueError("Bucket value must be an integer: " + line)
 | 
						|
            if ((name + '_count' == sample.name or name + '_gcount' == sample.name)
 | 
						|
                    and (not isinstance(sample.value, int) and not sample.value.is_integer())):
 | 
						|
                raise ValueError("Count value must be an integer: " + line)
 | 
						|
            if (typ == 'summary' and name == sample.name
 | 
						|
                    and (not (0 <= float(sample.labels.get('quantile', -1)) <= 1)
 | 
						|
                         or _isUncanonicalNumber(sample.labels['quantile']))):
 | 
						|
                raise ValueError("Invalid quantile label: " + line)
 | 
						|
 | 
						|
            if not is_nh:
 | 
						|
                g = tuple(sorted(_group_for_sample(sample, name, typ).items()))
 | 
						|
                if group is not None and g != group and g in seen_groups:
 | 
						|
                    raise ValueError("Invalid metric grouping: " + line)
 | 
						|
                if group is not None and g == group:
 | 
						|
                    if (sample.timestamp is None) != (group_timestamp is None):
 | 
						|
                        raise ValueError("Mix of timestamp presence within a group: " + line)
 | 
						|
                    if group_timestamp is not None and group_timestamp > sample.timestamp and typ != 'info':
 | 
						|
                        raise ValueError("Timestamps went backwards within a group: " + line)
 | 
						|
                else:
 | 
						|
                    group_timestamp_samples = set()
 | 
						|
 | 
						|
                series_id = (sample.name, tuple(sorted(sample.labels.items())))
 | 
						|
                if sample.timestamp != group_timestamp or series_id not in group_timestamp_samples:
 | 
						|
                    # Not a duplicate due to timestamp truncation.
 | 
						|
                    samples.append(sample)
 | 
						|
                group_timestamp_samples.add(series_id)
 | 
						|
 | 
						|
                group = g
 | 
						|
                group_timestamp = sample.timestamp
 | 
						|
                seen_groups.add(g)
 | 
						|
            else:
 | 
						|
                samples.append(sample)
 | 
						|
 | 
						|
            if typ == 'stateset' and sample.value not in [0, 1]:
 | 
						|
                raise ValueError("Stateset samples can only have values zero and one: " + line)
 | 
						|
            if typ == 'info' and sample.value != 1:
 | 
						|
                raise ValueError("Info samples can only have value one: " + line)
 | 
						|
            if typ == 'summary' and name == sample.name and sample.value < 0:
 | 
						|
                raise ValueError("Quantile values cannot be negative: " + line)
 | 
						|
            if sample.name[len(name):] in ['_total', '_sum', '_count', '_bucket', '_gcount', '_gsum'] and math.isnan(
 | 
						|
                    sample.value):
 | 
						|
                raise ValueError("Counter-like samples cannot be NaN: " + line)
 | 
						|
            if sample.name[len(name):] in ['_total', '_sum', '_count', '_bucket', '_gcount'] and sample.value < 0:
 | 
						|
                raise ValueError("Counter-like samples cannot be negative: " + line)
 | 
						|
            if sample.exemplar and not (
 | 
						|
                    (typ in ['histogram', 'gaugehistogram'] and sample.name.endswith('_bucket'))
 | 
						|
                    or (typ in ['counter'] and sample.name.endswith('_total'))):
 | 
						|
                raise ValueError("Invalid line only histogram/gaugehistogram buckets and counters can have exemplars: " + line)
 | 
						|
    
 | 
						|
    if name is not None:
 | 
						|
        yield build_metric(name, documentation, typ, unit, samples)
 | 
						|
 | 
						|
    if not eof:
 | 
						|
        raise ValueError("Missing # EOF at end")
 |