Skip to content

Commit 182df3f

Browse files
committed
WIP Manually create junit report
1 parent 6795660 commit 182df3f

File tree

6 files changed

+196
-140
lines changed

6 files changed

+196
-140
lines changed

.github/workflows/tests-performance.yml

Lines changed: 3 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -8,16 +8,6 @@ on:
88
- cron: '0 8 * * *' # Run at 8:00 daily
99
workflow_dispatch:
1010
push:
11-
branches:
12-
- main
13-
- dev/.*
14-
# pull_request:
15-
# paths:
16-
# - 'src/**'
17-
# - 'tests/**'
18-
# - 'dev_requirements.txt'
19-
# - 'pyproject.toml'
20-
# - '.github/workflows/tests-performance.yml'
2111

2212
jobs:
2313
test-performance:
@@ -38,25 +28,14 @@ jobs:
3828
3929
- name: Run performance tests
4030
run: |
41-
pytest --junitxml="test-results/test-performance.xml" tests/performance
42-
43-
- name: Report measurements
44-
uses: mikepenz/action-junit-report@v5
45-
if: always()
46-
with:
47-
check_name: 'Performance measurements'
48-
report_paths: "./test-results/test-performance.xml"
49-
detailed_summary: true
50-
include_passed: true
51-
include_time_in_summary: true
52-
resolve_ignore_classname: true
31+
pytest --junitxml=test-results/test-performance.xml tests/performance
5332
5433
- name: Performance validation
5534
uses: mikepenz/action-junit-report@v5
5635
if: always()
5736
with:
58-
check_name: 'Performance validation'
59-
report_paths: "./test-results/test-performance__validation.xml"
37+
check_name: Performance validation
38+
report_paths: ./test-results/test-performance__validation.xml
6039
detailed_summary: true
6140
include_passed: true
6241
include_time_in_summary: true

dev_requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
-e .
22
behave
3+
junit-xml
34
mock
45
pre-commit
56
pytest

test-performance__validation.xml

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
<?xml version="1.0" ?>
2+
<testsuites disabled="0" errors="0" failures="2" tests="2" time="2.9327545302003273">
3+
<testsuite disabled="0" errors="0" failures="2" name="BenchmarkResults" skipped="0" tests="2" time="2.9327545302003273">
4+
<testcase name="test_perf_output_format.test_perf_create_series_dataframe[200-50-100]" time="1.468609">
5+
<failure type="failure" message="max too big"/>
6+
</testcase>
7+
<testcase name="test_perf_output_format.test_perf_create_series_dataframe[50-200-100]" time="1.464145">
8+
<failure type="failure" message="max too big"/>
9+
</testcase>
10+
</testsuite>
11+
</testsuites>

tests/performance/conftest.py

Lines changed: 39 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
11
import json
22
import os
3-
import subprocess
4-
import sys
53
import tempfile
64
from io import BytesIO
75
from pathlib import Path
86

7+
8+
from .validation import generate_junit_report
9+
10+
911
if not os.getenv("BENCHMARK_VALIDATE_FILE"):
1012
# Create a temp dir for the benchmark results:
1113
tmp_dir = tempfile.mkdtemp(prefix="neptune-query-benchmark-")
@@ -31,36 +33,43 @@ def pytest_configure(config):
3133

3234

3335
def pytest_benchmark_update_json(config, benchmarks, output_json):
34-
with open(report_path, "w") as f:
35-
json.dump(output_json, f, indent=2)
3636
with open("benchmark_results.json", "w") as f:
3737
json.dump(output_json, f, indent=2)
3838

39+
if config.option.xmlpath:
40+
# For --junitxml = /path/abc.xml, create /path/abc__validation.xml
41+
path = Path(config.option.xmlpath)
42+
validation_report = path.with_stem(path.stem + "__validation")
43+
generate_junit_report("benchmark_results.json", validation_report)
3944

40-
def pytest_sessionfinish(session, exitstatus):
41-
try:
42-
if exitstatus != 0:
43-
return
44-
45-
if os.getenv("BENCHMARK_VALIDATE_FILE"):
46-
return
47-
48-
if os.getenv("BENCHMARK_NO_VALIDATION") == "1":
49-
return
50-
51-
# Rerun the tests in validation mode
52-
os.environ["BENCHMARK_VALIDATE_FILE"] = str(report_path)
53-
cp = subprocess.run(
54-
[sys.executable] + sys.argv + ["-W", "ignore::pytest_benchmark.logger.PytestBenchmarkWarning"]
55-
)
56-
session.exitstatus = cp.returncode
5745

58-
finally:
59-
try:
60-
os.unlink(report_path)
61-
except Exception:
62-
pass
63-
try:
64-
os.rmdir(tmp_dir)
65-
except Exception:
66-
pass
46+
# def pytest_sessionfinish(session, exitstatus):
47+
#
48+
# generate_junit_report("benchmark_results.json", "junit_report.xml")
49+
#
50+
# try:
51+
# if exitstatus != 0:
52+
# return
53+
#
54+
# if os.getenv("BENCHMARK_VALIDATE_FILE"):
55+
# return
56+
#
57+
# if os.getenv("BENCHMARK_NO_VALIDATION") == "1":
58+
# return
59+
#
60+
# # Rerun the tests in validation mode
61+
# os.environ["BENCHMARK_VALIDATE_FILE"] = str(report_path)
62+
# cp = subprocess.run(
63+
# [sys.executable] + sys.argv + ["-W", "ignore::pytest_benchmark.logger.PytestBenchmarkWarning"]
64+
# )
65+
# session.exitstatus = cp.returncode
66+
#
67+
# finally:
68+
# try:
69+
# os.unlink(report_path)
70+
# except Exception:
71+
# pass
72+
# try:
73+
# os.rmdir(tmp_dir)
74+
# except Exception:
75+
# pass

tests/performance/decorator.py

Lines changed: 86 additions & 86 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ def wrapper(fn):
5959

6060
if case_param_keys != param_keys:
6161
raise ValueError(
62-
"All expected_benchmark decorators must have the same parameter keys."
62+
"All listed cases in expected_benchmark must have the same parameter keys."
6363
f"Expected {param_keys}, got {case_param_keys}"
6464
)
6565

@@ -73,93 +73,93 @@ def wrapper(fn):
7373
)
7474
)
7575

76-
if not os.getenv("BENCHMARK_VALIDATE_FILE"):
77-
pytest.mark.parametrize(
78-
",".join(param_keys),
79-
[spec.get_params_for_parametrize() for spec in specs],
80-
)(fn)
81-
return fn
82-
83-
performance_factor = float(os.getenv("BENCHMARK_PERFORMANCE_FACTOR", "1.0"))
84-
85-
@wraps(fn)
86-
def validation(*args, **kwargs):
87-
# Find the matching spec
88-
spec: PerformanceTestCaseSpec | None = None
89-
for case in specs:
90-
if all(kwargs.get(k) == v for k, v in case.params.items()):
91-
spec = case
92-
break
93-
94-
assert spec is not None, "No matching performance case found for the given parameters."
95-
96-
# Extract the actual parameters used in this test run
97-
if spec.min_p0 is None or spec.max_p80 is None or spec.max_p100 is None:
98-
warnings.warn("Benchmark thresholds not set, skipping validation.", category=UserWarning)
99-
return
100-
101-
perf_data = _get_benchmark_data()
102-
103-
assert spec.fn_name, spec.get_params_json() in perf_data
104-
stats = perf_data[spec.fn_name, spec.get_params_json()]
105-
106-
times = sorted(stats["data"])
107-
p0 = times[0]
108-
p80 = times[int(len(times) * 0.8)]
109-
p100 = times[-1]
110-
111-
adjusted_min_p0 = spec.min_p0 * performance_factor
112-
adjusted_max_p80 = spec.max_p80 * performance_factor
113-
adjusted_max_p100 = spec.max_p100 * performance_factor
114-
115-
p0_marker = "✓" if p0 >= adjusted_min_p0 else "✗"
116-
p80_marker = "✓" if p80 <= adjusted_max_p80 else "✗"
117-
p100_marker = "✓" if p100 <= adjusted_max_p100 else "✗"
118-
119-
params_human = ", ".join(f"{k}={v!r}" for k, v in spec.params.items())
120-
detailed_msg = f"""
121-
122-
Benchmark '{spec.fn_name}' with params {params_human} results:
123-
124-
{p0_marker} 0th percentile: {p0:.3f} s
125-
Unadjusted min_p0: {spec.min_p0:.3f} s
126-
Adjusted (*) min_p0: {adjusted_min_p0:.3f} s
127-
128-
{p80_marker} 80th percentile: {p80:.3f} s
129-
Unadjusted max_p80: {spec.max_p80:.3f} s
130-
Adjusted (*) max_p80: {adjusted_max_p80:.3f} s
131-
132-
{p100_marker} 100th percentile: {p100:.3f} s
133-
Unadjusted max_p100: {spec.max_p100:.3f} s
134-
Adjusted (*) max_p100: {adjusted_max_p100:.3f} s
135-
136-
(*) Use the environment variable "BENCHMARK_PERFORMANCE_FACTOR" to adjust the thresholds.
137-
138-
BENCHMARK_PERFORMANCE_FACTOR=1.0 (default) is meant to represent GitHub Actions performance.
139-
Decrease this factor if your local machine is faster than GitHub Actions.
140-
141-
"""
142-
143-
if performance_factor == 1.0:
144-
adjusted_min_p0_str = f"{adjusted_min_p0:.3f}"
145-
adjusted_max_p80_str = f"{adjusted_max_p80:.3f}"
146-
adjusted_max_p100_str = f"{adjusted_max_p100:.3f}"
147-
else:
148-
adjusted_min_p0_str = f"{adjusted_min_p0:.3f} (= {spec.min_p0:.3f} * {performance_factor})"
149-
adjusted_max_p80_str = f"{adjusted_max_p80:.3f} (= {spec.max_p80:.3f} * {performance_factor})"
150-
adjusted_max_p100_str = f"{adjusted_max_p100:.3f} (= {spec.max_p100:.3f} * {performance_factor})"
151-
152-
assert p0 >= adjusted_min_p0, f"p0 {p0:.3f} is less than expected {adjusted_min_p0_str}" + detailed_msg
153-
assert p80 <= adjusted_max_p80, f"p80 {p80:.3f} is more than expected {adjusted_max_p80_str}" + detailed_msg
154-
assert p100 <= adjusted_max_p100, (
155-
f"p100 {p100:.3f} is more than expected {adjusted_max_p100_str}" + detailed_msg
156-
)
157-
15876
pytest.mark.parametrize(
15977
",".join(param_keys),
16078
[spec.get_params_for_parametrize() for spec in specs],
161-
)(validation)
162-
163-
return validation
79+
)(fn)
80+
fn.__expected_benchmark_specs = specs
81+
return fn
82+
83+
# performance_factor = float(os.getenv("BENCHMARK_PERFORMANCE_FACTOR", "1.0"))
84+
#
85+
# @wraps(fn)
86+
# def validation(*args, **kwargs):
87+
# # Find the matching spec
88+
# spec: PerformanceTestCaseSpec | None = None
89+
# for case in specs:
90+
# if all(kwargs.get(k) == v for k, v in case.params.items()):
91+
# spec = case
92+
# break
93+
#
94+
# assert spec is not None, "No matching performance case found for the given parameters."
95+
#
96+
# # Extract the actual parameters used in this test run
97+
# if spec.min_p0 is None or spec.max_p80 is None or spec.max_p100 is None:
98+
# warnings.warn("Benchmark thresholds not set, skipping validation.", category=UserWarning)
99+
# return
100+
#
101+
# perf_data = _get_benchmark_data()
102+
#
103+
# assert spec.fn_name, spec.get_params_json() in perf_data
104+
# stats = perf_data[spec.fn_name, spec.get_params_json()]
105+
#
106+
# times = sorted(stats["data"])
107+
# p0 = times[0]
108+
# p80 = times[int(len(times) * 0.8)]
109+
# p100 = times[-1]
110+
#
111+
# adjusted_min_p0 = spec.min_p0 * performance_factor
112+
# adjusted_max_p80 = spec.max_p80 * performance_factor
113+
# adjusted_max_p100 = spec.max_p100 * performance_factor
114+
#
115+
# p0_marker = "✓" if p0 >= adjusted_min_p0 else "✗"
116+
# p80_marker = "✓" if p80 <= adjusted_max_p80 else "✗"
117+
# p100_marker = "✓" if p100 <= adjusted_max_p100 else "✗"
118+
#
119+
# params_human = ", ".join(f"{k}={v!r}" for k, v in spec.params.items())
120+
# detailed_msg = f"""
121+
#
122+
# Benchmark '{spec.fn_name}' with params {params_human} results:
123+
#
124+
# {p0_marker} 0th percentile: {p0:.3f} s
125+
# Unadjusted min_p0: {spec.min_p0:.3f} s
126+
# Adjusted (*) min_p0: {adjusted_min_p0:.3f} s
127+
#
128+
# {p80_marker} 80th percentile: {p80:.3f} s
129+
# Unadjusted max_p80: {spec.max_p80:.3f} s
130+
# Adjusted (*) max_p80: {adjusted_max_p80:.3f} s
131+
#
132+
# {p100_marker} 100th percentile: {p100:.3f} s
133+
# Unadjusted max_p100: {spec.max_p100:.3f} s
134+
# Adjusted (*) max_p100: {adjusted_max_p100:.3f} s
135+
#
136+
# (*) Use the environment variable "BENCHMARK_PERFORMANCE_FACTOR" to adjust the thresholds.
137+
#
138+
# BENCHMARK_PERFORMANCE_FACTOR=1.0 (default) is meant to represent GitHub Actions performance.
139+
# Decrease this factor if your local machine is faster than GitHub Actions.
140+
#
141+
# """
142+
#
143+
# if performance_factor == 1.0:
144+
# adjusted_min_p0_str = f"{adjusted_min_p0:.3f}"
145+
# adjusted_max_p80_str = f"{adjusted_max_p80:.3f}"
146+
# adjusted_max_p100_str = f"{adjusted_max_p100:.3f}"
147+
# else:
148+
# adjusted_min_p0_str = f"{adjusted_min_p0:.3f} (= {spec.min_p0:.3f} * {performance_factor})"
149+
# adjusted_max_p80_str = f"{adjusted_max_p80:.3f} (= {spec.max_p80:.3f} * {performance_factor})"
150+
# adjusted_max_p100_str = f"{adjusted_max_p100:.3f} (= {spec.max_p100:.3f} * {performance_factor})"
151+
#
152+
# assert p0 >= adjusted_min_p0, f"p0 {p0:.3f} is less than expected {adjusted_min_p0_str}" + detailed_msg
153+
# assert p80 <= adjusted_max_p80, f"p80 {p80:.3f} is more than expected {adjusted_max_p80_str}" + detailed_msg
154+
# assert p100 <= adjusted_max_p100, (
155+
# f"p100 {p100:.3f} is more than expected {adjusted_max_p100_str}" + detailed_msg
156+
# )
157+
#
158+
# pytest.mark.parametrize(
159+
# ",".join(param_keys),
160+
# [spec.get_params_for_parametrize() for spec in specs],
161+
# )(validation)
162+
#
163+
# return validation
164164

165165
return wrapper

tests/performance/validation.py

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
import importlib
2+
import json
3+
from pathlib import Path
4+
5+
from junit_xml import TestSuite, TestCase, to_xml_report_file
6+
7+
from .decorator import PerformanceTestCaseSpec
8+
9+
10+
def get_benchmark_spec(benchmark):
11+
module_path = Path(benchmark["fullname"].split("::")[0])
12+
module_name = str(module_path.with_suffix("")).replace("/", ".")
13+
fn_name = benchmark["name"].split("[")[0]
14+
params = benchmark["params"]
15+
module = importlib.import_module(module_name)
16+
fn = getattr(module, fn_name)
17+
specs: PerformanceTestCaseSpec = fn.__expected_benchmark_specs
18+
for spec in specs:
19+
if spec.params == params:
20+
return spec
21+
raise ValueError(f"No matching spec found for benchmark {module_name}.{fn_name} with params {params}")
22+
23+
def generate_junit_report(benchmark_path, report_path):
24+
with open(benchmark_path, "r") as f:
25+
report = json.load(f)
26+
27+
test_cases = []
28+
for benchmark in report["benchmarks"]:
29+
name = benchmark["fullname"].replace("tests.performance.", "").replace(".py::", ".").replace("/", ".")
30+
spec = get_benchmark_spec(benchmark)
31+
32+
times = sorted(benchmark["stats"]["data"])
33+
p0 = times[0]
34+
p80 = times[int(len(times) * 0.8)]
35+
p100 = times[-1]
36+
37+
tc_p0 = TestCase(name=f"{name}__p0", elapsed_sec=p0)
38+
tc_p80 = TestCase(name=f"{name}__p80", elapsed_sec=p80)
39+
tc_p100 = TestCase(name=f"{name}__p100", elapsed_sec=p100)
40+
41+
if spec.min_p0 is not None and p0 < spec.min_p0:
42+
tc_p0.add_failure_info("p0 too small")
43+
if spec.max_p80 is not None and p80 > spec.max_p80:
44+
tc_p80.add_failure_info("p80 too big")
45+
if spec.max_p100 is not None and p100 > spec.max_p100:
46+
tc_p100.add_failure_info("p100 too big")
47+
test_cases += [tc_p0, tc_p80, tc_p100]
48+
49+
test_suite = TestSuite(name="BenchmarkResults", test_cases=test_cases)
50+
51+
with open(report_path, "w") as f:
52+
to_xml_report_file(f, [test_suite], prettyprint=True)
53+
54+
55+
if __name__ == "__main__":
56+
generate_junit_report("benchmark_results.json", "junit_report.xml")

0 commit comments

Comments
 (0)