Skip to content

Commit 978e01e

Browse files
authored
Improve output of performance tests (#96)
* Improve output of peformance tests Both in CLI and in GitHub JUnit report * Don't spit out package path * Revert workflow triggers for tests-performance
1 parent d1be7c8 commit 978e01e

File tree

5 files changed

+301
-168
lines changed

5 files changed

+301
-168
lines changed

.github/workflows/tests-performance.yml

Lines changed: 4 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -38,26 +38,16 @@ jobs:
3838
3939
- name: Run performance tests
4040
run: |
41-
pytest --junitxml="test-results/test-performance.xml" tests/performance
42-
43-
- name: Report measurements
44-
uses: mikepenz/action-junit-report@v5
45-
if: always()
46-
with:
47-
check_name: 'Performance measurements'
48-
report_paths: "./test-results/test-performance.xml"
49-
detailed_summary: true
50-
include_passed: true
51-
include_time_in_summary: true
52-
resolve_ignore_classname: true
41+
pytest --junitxml=test-results/test-performance.xml tests/performance
5342
5443
- name: Performance validation
5544
uses: mikepenz/action-junit-report@v5
5645
if: always()
5746
with:
58-
check_name: 'Performance validation'
59-
report_paths: "./test-results/test-performance__validation.xml"
47+
check_name: Performance validation
48+
report_paths: ./test-results/test-performance.xml
6049
detailed_summary: true
6150
include_passed: true
6251
include_time_in_summary: true
6352
resolve_ignore_classname: true
53+
fail_on_failure: true

dev_requirements.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
-e .
22
behave
3+
junit-xml
34
mock
45
pre-commit
56
pytest
@@ -9,6 +10,7 @@ pytest-retry
910
pytest-timeout
1011
pytest-xdist
1112
icecream
13+
tabulate
1214
git+https://github.com/neptune-ai/neptune-client-scale.git@main#egg=neptune-scale
1315
fastapi == 0.116.2
1416
uvicorn == 0.35.0

tests/performance/conftest.py

Lines changed: 45 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -1,66 +1,66 @@
11
import json
22
import os
3-
import subprocess
4-
import sys
53
import tempfile
64
from io import BytesIO
75
from pathlib import Path
86

9-
if not os.getenv("BENCHMARK_VALIDATE_FILE"):
10-
# Create a temp dir for the benchmark results:
11-
tmp_dir = tempfile.mkdtemp(prefix="neptune-query-benchmark-")
12-
report_path = Path(tmp_dir) / "benchmark.json"
7+
from .validation import (
8+
generate_junit_report,
9+
generate_text_report,
10+
)
11+
12+
# Create a temp dir for the benchmark results:
13+
tmp_dir = tempfile.mkdtemp(prefix="neptune-query-benchmark-")
14+
benchmark_json_path = Path(tmp_dir) / "benchmark.json"
15+
16+
17+
def cleanup():
18+
try:
19+
os.unlink(benchmark_json_path)
20+
except Exception:
21+
pass
22+
try:
23+
os.rmdir(tmp_dir)
24+
except Exception:
25+
pass
1326

1427

1528
def pytest_configure(config):
16-
if not os.getenv("BENCHMARK_VALIDATE_FILE"):
17-
# Perform at least 15 rounds per test
18-
# Testing at least for 10 seconds per test
19-
config.option.benchmark_min_rounds = 15
20-
config.option.benchmark_max_time = 10.0
21-
config.option.benchmark_disable_gc = True
22-
config.option.benchmark_time_unit = "ms"
23-
config.option.benchmark_sort = "name"
24-
config.option.benchmark_json = BytesIO()
25-
config.option.junitxml = "benchmark_measurement.xml"
26-
else:
27-
if config.option.xmlpath:
28-
# For --junitxml = /path/abc.xml, create /path/abc__validation.xml
29-
path = Path(config.option.xmlpath)
30-
config.option.xmlpath = str(path.with_stem(path.stem + "__validation"))
29+
# Perform at least 15 rounds per test
30+
# Testing at least for 10 seconds per test
31+
config.option.benchmark_min_rounds = 15
32+
config.option.benchmark_max_time = 10.0
33+
config.option.benchmark_disable_gc = True
34+
config.option.benchmark_time_unit = "ms"
35+
config.option.benchmark_sort = "name"
36+
config.option.benchmark_json = BytesIO()
37+
config.option.benchmark_quiet = True
38+
39+
config.option.original_xmlpath = config.option.xmlpath
40+
config.option.xmlpath = None
3141

3242

3343
def pytest_benchmark_update_json(config, benchmarks, output_json):
34-
with open(report_path, "w") as f:
44+
with open(benchmark_json_path, "w") as f:
3545
json.dump(output_json, f, indent=2)
46+
3647
with open("benchmark_results.json", "w") as f:
3748
json.dump(output_json, f, indent=2)
3849

3950

40-
def pytest_sessionfinish(session, exitstatus):
51+
def pytest_terminal_summary(terminalreporter, exitstatus, config):
4152
try:
42-
if exitstatus != 0:
43-
return
44-
45-
if os.getenv("BENCHMARK_VALIDATE_FILE"):
46-
return
47-
48-
if os.getenv("BENCHMARK_NO_VALIDATION") == "1":
49-
return
53+
# Print a report to the terminal
54+
msg = generate_text_report(benchmark_json_path)
55+
terminalreporter.ensure_newline()
56+
terminalreporter.write(msg)
57+
terminalreporter.ensure_newline()
5058

51-
# Rerun the tests in validation mode
52-
os.environ["BENCHMARK_VALIDATE_FILE"] = str(report_path)
53-
cp = subprocess.run(
54-
[sys.executable] + sys.argv + ["-W", "ignore::pytest_benchmark.logger.PytestBenchmarkWarning"]
55-
)
56-
session.exitstatus = cp.returncode
59+
# And save a nice JUnit XML
60+
if config.option.original_xmlpath:
61+
path = Path(config.option.original_xmlpath)
62+
path.parent.mkdir(parents=True, exist_ok=True)
63+
generate_junit_report(benchmark_json_path, path)
5764

5865
finally:
59-
try:
60-
os.unlink(report_path)
61-
except Exception:
62-
pass
63-
try:
64-
os.rmdir(tmp_dir)
65-
except Exception:
66-
pass
66+
cleanup()

tests/performance/decorator.py

Lines changed: 9 additions & 109 deletions
Original file line numberDiff line numberDiff line change
@@ -1,33 +1,10 @@
11
import json
2-
import os
3-
import warnings
42
from dataclasses import dataclass
5-
from functools import (
6-
cache,
7-
wraps,
8-
)
93
from typing import Any
104

115
import pytest
126

137

14-
@cache
15-
def _get_benchmark_data() -> dict[tuple[str, str], dict[str, Any]]:
16-
benchmark_output_file = os.getenv("BENCHMARK_VALIDATE_FILE")
17-
if benchmark_output_file is None:
18-
raise RuntimeError("Environment variable BENCHMARK_VALIDATE_FILE is not set.")
19-
20-
stats = {}
21-
with open(benchmark_output_file) as f:
22-
data = json.load(f)
23-
for benchmark in data["benchmarks"]:
24-
name = benchmark["name"].split("[")[0] # Remove params from the name
25-
params = json.dumps(benchmark["params"], sort_keys=True)
26-
stats[name, params] = benchmark["stats"]
27-
28-
return stats
29-
30-
318
@dataclass
329
class PerformanceTestCaseSpec:
3310
fn_name: str
@@ -44,6 +21,11 @@ def get_params_for_parametrize(self):
4421
def get_params_json(self):
4522
return json.dumps(self.params, sort_keys=True)
4623

24+
def get_params_human(self):
25+
if all(type(value) in [float, int] for value in self.params.values()):
26+
return ", ".join(f"{key}={value}" for key, value in sorted(self.params.items()))
27+
return self.get_params_json()
28+
4729

4830
def expected_benchmark(*multiple_cases: dict, **single_case: dict):
4931
def wrapper(fn):
@@ -59,7 +41,7 @@ def wrapper(fn):
5941

6042
if case_param_keys != param_keys:
6143
raise ValueError(
62-
"All expected_benchmark decorators must have the same parameter keys."
44+
"All listed cases in expected_benchmark must have the same parameter keys."
6345
f"Expected {param_keys}, got {case_param_keys}"
6446
)
6547

@@ -73,93 +55,11 @@ def wrapper(fn):
7355
)
7456
)
7557

76-
if not os.getenv("BENCHMARK_VALIDATE_FILE"):
77-
pytest.mark.parametrize(
78-
",".join(param_keys),
79-
[spec.get_params_for_parametrize() for spec in specs],
80-
)(fn)
81-
return fn
82-
83-
performance_factor = float(os.getenv("BENCHMARK_PERFORMANCE_FACTOR", "1.0"))
84-
85-
@wraps(fn)
86-
def validation(*args, **kwargs):
87-
# Find the matching spec
88-
spec: PerformanceTestCaseSpec | None = None
89-
for case in specs:
90-
if all(kwargs.get(k) == v for k, v in case.params.items()):
91-
spec = case
92-
break
93-
94-
assert spec is not None, "No matching performance case found for the given parameters."
95-
96-
# Extract the actual parameters used in this test run
97-
if spec.min_p0 is None or spec.max_p80 is None or spec.max_p100 is None:
98-
warnings.warn("Benchmark thresholds not set, skipping validation.", category=UserWarning)
99-
return
100-
101-
perf_data = _get_benchmark_data()
102-
103-
assert spec.fn_name, spec.get_params_json() in perf_data
104-
stats = perf_data[spec.fn_name, spec.get_params_json()]
105-
106-
times = sorted(stats["data"])
107-
p0 = times[0]
108-
p80 = times[int(len(times) * 0.8)]
109-
p100 = times[-1]
110-
111-
adjusted_min_p0 = spec.min_p0 * performance_factor
112-
adjusted_max_p80 = spec.max_p80 * performance_factor
113-
adjusted_max_p100 = spec.max_p100 * performance_factor
114-
115-
p0_marker = "✓" if p0 >= adjusted_min_p0 else "✗"
116-
p80_marker = "✓" if p80 <= adjusted_max_p80 else "✗"
117-
p100_marker = "✓" if p100 <= adjusted_max_p100 else "✗"
118-
119-
params_human = ", ".join(f"{k}={v!r}" for k, v in spec.params.items())
120-
detailed_msg = f"""
121-
122-
Benchmark '{spec.fn_name}' with params {params_human} results:
123-
124-
{p0_marker} 0th percentile: {p0:.3f} s
125-
Unadjusted min_p0: {spec.min_p0:.3f} s
126-
Adjusted (*) min_p0: {adjusted_min_p0:.3f} s
127-
128-
{p80_marker} 80th percentile: {p80:.3f} s
129-
Unadjusted max_p80: {spec.max_p80:.3f} s
130-
Adjusted (*) max_p80: {adjusted_max_p80:.3f} s
131-
132-
{p100_marker} 100th percentile: {p100:.3f} s
133-
Unadjusted max_p100: {spec.max_p100:.3f} s
134-
Adjusted (*) max_p100: {adjusted_max_p100:.3f} s
135-
136-
(*) Use the environment variable "BENCHMARK_PERFORMANCE_FACTOR" to adjust the thresholds.
137-
138-
BENCHMARK_PERFORMANCE_FACTOR=1.0 (default) is meant to represent GitHub Actions performance.
139-
Decrease this factor if your local machine is faster than GitHub Actions.
140-
141-
"""
142-
143-
if performance_factor == 1.0:
144-
adjusted_min_p0_str = f"{adjusted_min_p0:.3f}"
145-
adjusted_max_p80_str = f"{adjusted_max_p80:.3f}"
146-
adjusted_max_p100_str = f"{adjusted_max_p100:.3f}"
147-
else:
148-
adjusted_min_p0_str = f"{adjusted_min_p0:.3f} (= {spec.min_p0:.3f} * {performance_factor})"
149-
adjusted_max_p80_str = f"{adjusted_max_p80:.3f} (= {spec.max_p80:.3f} * {performance_factor})"
150-
adjusted_max_p100_str = f"{adjusted_max_p100:.3f} (= {spec.max_p100:.3f} * {performance_factor})"
151-
152-
assert p0 >= adjusted_min_p0, f"p0 {p0:.3f} is less than expected {adjusted_min_p0_str}" + detailed_msg
153-
assert p80 <= adjusted_max_p80, f"p80 {p80:.3f} is more than expected {adjusted_max_p80_str}" + detailed_msg
154-
assert p100 <= adjusted_max_p100, (
155-
f"p100 {p100:.3f} is more than expected {adjusted_max_p100_str}" + detailed_msg
156-
)
157-
15858
pytest.mark.parametrize(
15959
",".join(param_keys),
16060
[spec.get_params_for_parametrize() for spec in specs],
161-
)(validation)
162-
163-
return validation
61+
)(fn)
62+
fn.__expected_benchmark_specs = specs
63+
return fn
16464

16565
return wrapper

0 commit comments

Comments
 (0)