Avoid unnecessary iterating across the same term

csmarchbanks · csmarchbanks · commit 2a2ca5276fff · 2025-07-03T08:36:15.000-06:00
Split the term into the label name and label value portions in one swoop
rather than starting from the beginning to find an = character after
already going through the full term. This saves ~5% on the benchmark.

Signed-off-by: Chris Marchbanks &lt;csmarchbanks@gmail.com&gt;
diff --git a/prometheus_client/parser.py b/prometheus_client/parser.py
@@ -62,44 +62,35 @@ def parse_labels(labels_string: str, openmetrics: bool = False) -> Dict[str, str
             # The label name is before the equal, or if there's no equal, that's the
             # metric name.
             
-            term, sub_labels = _next_term(sub_labels, openmetrics)
-            if not term:
+            name_term, value_term, sub_labels = _next_term(sub_labels, openmetrics)
+            if not value_term:
                 if openmetrics:
                     raise ValueError("empty term in line: " + labels_string)
                 continue
             
-            quoted_name = False
-            operator_pos = _next_unquoted_char(term, '=')
-            if operator_pos == -1:
-                quoted_name = True
-                label_name = "__name__"
-            else:
-                value_start = _next_unquoted_char(term, '=')
-                label_name, quoted_name = _unquote_unescape(term[:value_start])
-                term = term[value_start + 1:]
+            label_name, quoted_name = _unquote_unescape(name_term)
                 
             if not quoted_name and not _is_valid_legacy_metric_name(label_name):
                 raise ValueError("unquoted UTF-8 metric name")
                 
             # Check for missing quotes 
-            term = term.strip()
-            if not term or term[0] != '"':
+            if not value_term or value_term[0] != '"':
                 raise ValueError
 
             # The first quote is guaranteed to be after the equal.
-            # Find the last unescaped quote.
+            # Make sure that the next unescaped quote is the last character.
             i = 1
-            while i < len(term):
-                i = term.index('"', i)
-                if not _is_character_escaped(term[:i], i):
+            while i < len(value_term):
+                i = value_term.index('"', i)
+                if not _is_character_escaped(value_term[:i], i):
                     break
                 i += 1
-
             # The label value is between the first and last quote
             quote_end = i + 1
-            if quote_end != len(term):
+            if quote_end != len(value_term):
                 raise ValueError("unexpected text after quote: " + labels_string)
-            label_value, _ = _unquote_unescape(term[:quote_end])
+
+            label_value, _ = _unquote_unescape(value_term)
             if label_name == '__name__':
                 _validate_metric_name(label_name)
             else:
@@ -112,11 +103,10 @@ def parse_labels(labels_string: str, openmetrics: bool = False) -> Dict[str, str
         raise ValueError("Invalid labels: " + labels_string)
     
 
-def _next_term(text: str, openmetrics: bool) -> Tuple[str, str]:
-    """Extract the next comma-separated label term from the text.
-    
-    Returns the stripped term and the stripped remainder of the string, 
-    including the comma.
+def _next_term(text: str, openmetrics: bool) -> Tuple[str, str, str]:
+    """Extract the next comma-separated label term from the text. The results
+    are stripped terms for the label name, label value, and then the remainder
+    of the string including the final , or }.
     
     Raises ValueError if the term is empty and we're in openmetrics mode.
     """
@@ -125,18 +115,26 @@ def _next_term(text: str, openmetrics: bool) -> Tuple[str, str]:
     if text[0] == ',':
         text = text[1:]
         if not text:
-            return "", ""
+            return "", "", ""
         if text[0] == ',':
             raise ValueError("multiple commas")
-    splitpos = _next_unquoted_char(text, ',}')
+
+    splitpos = _next_unquoted_char(text, '=,}')
+    if splitpos >= 0 and text[splitpos] == "=":
+        labelname = text[:splitpos]
+        text = text[splitpos + 1:]
+        splitpos = _next_unquoted_char(text, ',}')
+    else:
+        labelname = "__name__"
+
     if splitpos == -1:
         splitpos = len(text)
     term = text[:splitpos]
     if not term and openmetrics:
         raise ValueError("empty term:", term)
     
-    sublabels = text[splitpos:]
-    return term.strip(), sublabels.strip()
+    rest = text[splitpos:]
+    return labelname, term.strip(), rest.strip()
 
 
 def _next_unquoted_char(text: str, chs: Optional[str], startidx: int = 0) -> int: