diff --git a/tools/perf/check-canary-metrics.py b/tools/perf/check-canary-metrics.py old mode 100755 new mode 100644 index d119bafc..4879ebef --- a/tools/perf/check-canary-metrics.py +++ b/tools/perf/check-canary-metrics.py @@ -36,11 +36,23 @@ def exit_success(message: str) -> None: + """ + Prints a message to standard output and exits the process with status code 0. + + Parameters: + message (str): Text to print before exiting. + """ print(message) sys.exit(0) def exit_failure(message: str) -> None: + """ + Prints an error message to standard error and terminates the process with exit code 1. + + Parameters: + message (str): Error message to emit to stderr before exiting. + """ print(message, file=sys.stderr) sys.exit(1) @@ -56,6 +68,15 @@ def exit_failure(message: str) -> None: def query_prometheus(url: str, query: str) -> Optional[float]: + """ + Query a Prometheus HTTP API for an instant vector and extract the first numeric value. + + Returns: + The numeric value from the first result row as a float, or `None` if Prometheus returned no results. + + Raises: + RuntimeError: If the HTTP request fails, Prometheus responds with a non-"success" status, or the response payload has an unexpected format. + """ encoded_query = urllib.parse.urlencode({'query': query}) endpoint = f"{url.rstrip('/')}/api/v1/query?{encoded_query}" try: @@ -77,6 +98,16 @@ def query_prometheus(url: str, query: str) -> Optional[float]: def render_query(template: str, build: str) -> str: + """ + Substitutes the `$BUILD` placeholder in a query template with the provided build identifier. + + Parameters: + template (str): Prometheus query template containing the `$BUILD` placeholder. + build (str): Build identifier to substitute into the template. + + Returns: + rendered (str): The query string with `$BUILD` replaced by `build`. + """ return template.replace('$BUILD', build) @@ -104,6 +135,20 @@ def evaluate_metric( unit: str, description: str, ) -> MetricResult: + """ + Evaluate a single canary metric against an absolute threshold and an optional regression allowance. + + Parameters: + name (str): Human-readable metric identifier used in messages and the resulting MetricResult.name. + template (str): Prometheus query template; the placeholder `$BUILD` will be replaced with the build SHA. + threshold (float): Absolute budget value the current metric must be less than or equal to. + regression_pct (float): Allowed relative increase over the previous build (e.g., 0.1 for 10%); ignored when previous baseline is near zero or unavailable. + unit (str): Unit string appended to numeric values in human-readable messages (e.g., "ms", or empty). + description (str): Short textual description stored on the MetricResult.description for reporting. + + Returns: + MetricResult: Populated result containing current and optional previous values, threshold and regression parameters, a pass/fail flag, and a human-readable message explaining the outcome. + """ current_query = render_query(template, CURRENT_BUILD) current_value = query_prometheus(PROMETHEUS_URL, current_query) previous_value = None @@ -161,6 +206,15 @@ def evaluate_metric( def maybe_check_tempo() -> Optional[MetricResult]: + """ + Check Tempo for traces slower than the configured threshold for the canary service. + + If TEMPO_URL is unset the function prints a skip message and returns None. Otherwise it queries Tempo for any trace with duration at or above TEMPO_SLOW_TRACE_THRESHOLD_MS within TEMPO_LOOKBACK_SECONDS for TEMPO_SERVICE and produces a MetricResult summarizing whether slow traces were found. + + Returns: + MetricResult: a result named 'tempo_slow_traces' where `passed` is `False` if a slow trace was found and `current_value` is the slow-threshold in milliseconds (or `0.0` if none was found). + None: if TEMPO_URL is not configured. + """ if not TEMPO_URL: print('TEMPO_URL not provided; skipping trace regression checks.') return None @@ -237,11 +291,28 @@ def maybe_check_tempo() -> Optional[MetricResult]: def write_junit(results_list: list[MetricResult]) -> None: + """ + Write a JUnit-format XML report summarizing the provided MetricResult entries and save it to the configured artifacts location. + + Each MetricResult becomes a ; passing results include a block with metric details, failing results include a element and the same details. The file is written to RESULT_DIR/JUNIT_FILENAME and a message with the written path is printed. + + Parameters: + results_list (list[MetricResult]): List of metric results to include in the JUnit report. + """ tests = len(results_list) failures_count = len([result for result in results_list if not result.passed]) suite_name = 'canary-budget' def xml_escape(value: str) -> str: + """ + Escape characters in a string so it is safe to include in XML content. + + Parameters: + value (str): The raw string to escape. + + Returns: + str: The input string with XML special characters replaced by their entity equivalents (`&`, `"`, `'`, `<`, `>`). + """ return ( value.replace('&', '&') .replace('"', '"') @@ -292,4 +363,4 @@ def xml_escape(value: str) -> str: if failures: exit_failure('Canary metrics exceeded budgets; see log for details.') -exit_success('Canary metrics are within budgets.') +exit_success('Canary metrics are within budgets.') \ No newline at end of file