Skip to content

Commit c356de7

Browse files
authored
Performance tests (#70)
* Performance tests * Parametrized performance tests plus BENCHMARK_PERFORMANCE_FACTOR * Remove warning supression * Adjust the expected thresholds * Rename the util module to decorator * More updates and fixes to perfomance testing * Fix parametrized performance benchmarks * More verbose messages * Properly silence pytest-benchmark warning on the validation phase * Fix aligmnment of detailed message to better work in GH UI * More beauty * Adjust thresholds * More updates * Always show the benchmark summary * Rename step to "Run performance tests" * Adjust meta params for the benchmarks * Fix format * Update docstrings * Add test for the perfomance issue introduced in nq 1.4.0 * Change the way to specify multiple cases to @expected_benchmark * Rename PerfomanceCase to PerformanceTestCaseSpec * Fail tests properly * Reformat
1 parent 7a0859a commit c356de7

File tree

8 files changed

+522
-0
lines changed

8 files changed

+522
-0
lines changed
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
name: Performance tests
2+
3+
on:
4+
schedule:
5+
- cron: '0 8 * * *' # Run at 8:00 daily
6+
workflow_dispatch:
7+
push:
8+
branches:
9+
- main
10+
- dev/.*
11+
pull_request:
12+
paths:
13+
- 'src/**'
14+
- 'tests/**'
15+
- 'dev_requirements.txt'
16+
- 'pyproject.toml'
17+
- '.github/workflows/tests-performance.yml'
18+
19+
jobs:
20+
test:
21+
runs-on: tools-gha-runners
22+
timeout-minutes: 30
23+
steps:
24+
- name: Checkout repository
25+
uses: actions/checkout@v4
26+
27+
- name: Install Python
28+
uses: actions/setup-python@v5
29+
with:
30+
python-version: "3.10"
31+
32+
- name: Install dependencies
33+
run: |
34+
python -m pip install --upgrade pip &&
35+
pip install -r dev_requirements.txt
36+
37+
- name: Run performance tests
38+
run: |
39+
pytest --junitxml="test-results/test-performance.xml" tests/performance
40+
41+
- name: Report
42+
uses: mikepenz/action-junit-report@v5
43+
if: always()
44+
with:
45+
report_paths: "./test-results/test-performance*.xml"
46+
update_check: true
47+
annotate_notice: true
48+
job_name: "Performance tests"

.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,3 +129,7 @@ stream.bin
129129
MagicMock/
130130

131131
poetry.lock
132+
133+
# pytest-benchmark
134+
.benchmarks
135+
benchmark_results.json

dev_requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ behave
33
mock
44
pre-commit
55
pytest
6+
pytest-benchmark
67
pytest-mock
78
pytest-retry
89
pytest-timeout

tests/performance/__init__.py

Whitespace-only changes.

tests/performance/conftest.py

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
import json
2+
import os
3+
import subprocess
4+
import sys
5+
import tempfile
6+
from io import BytesIO
7+
from pathlib import Path
8+
9+
if not os.getenv("BENCHMARK_VALIDATE_FILE"):
10+
# Create a temp dir for the benchmark results:
11+
tmp_dir = tempfile.mkdtemp(prefix="neptune-query-benchmark-")
12+
report_path = Path(tmp_dir) / "benchmark.json"
13+
14+
15+
def pytest_configure(config):
16+
if not os.getenv("BENCHMARK_VALIDATE_FILE"):
17+
# Perform at least 15 rounds per test
18+
# Testing at least for 10 seconds per test
19+
config.option.benchmark_min_rounds = 15
20+
config.option.benchmark_max_time = 10.0
21+
config.option.benchmark_disable_gc = True
22+
config.option.benchmark_time_unit = "ms"
23+
config.option.benchmark_sort = "name"
24+
config.option.benchmark_json = BytesIO()
25+
26+
27+
def pytest_benchmark_update_json(config, benchmarks, output_json):
28+
with open(report_path, "w") as f:
29+
json.dump(output_json, f, indent=2)
30+
with open("benchmark_results.json", "w") as f:
31+
json.dump(output_json, f, indent=2)
32+
33+
34+
def pytest_sessionfinish(session, exitstatus):
35+
try:
36+
if exitstatus != 0:
37+
return
38+
39+
if os.getenv("BENCHMARK_VALIDATE_FILE"):
40+
return
41+
42+
if os.getenv("BENCHMARK_NO_VALIDATION") == "1":
43+
return
44+
45+
# Rerun the tests in validation mode
46+
os.environ["BENCHMARK_VALIDATE_FILE"] = str(report_path)
47+
cp = subprocess.run(
48+
[sys.executable] + sys.argv + ["-W", "ignore::pytest_benchmark.logger.PytestBenchmarkWarning"]
49+
)
50+
session.exitstatus = cp.returncode
51+
52+
finally:
53+
try:
54+
os.unlink(report_path)
55+
except Exception:
56+
pass
57+
try:
58+
os.rmdir(tmp_dir)
59+
except Exception:
60+
pass

tests/performance/decorator.py

Lines changed: 165 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,165 @@
1+
import json
2+
import os
3+
import warnings
4+
from dataclasses import dataclass
5+
from functools import (
6+
cache,
7+
wraps,
8+
)
9+
from typing import Any
10+
11+
import pytest
12+
13+
14+
@cache
15+
def _get_benchmark_data() -> dict[tuple[str, str], dict[str, Any]]:
16+
benchmark_output_file = os.getenv("BENCHMARK_VALIDATE_FILE")
17+
if benchmark_output_file is None:
18+
raise RuntimeError("Environment variable BENCHMARK_VALIDATE_FILE is not set.")
19+
20+
stats = {}
21+
with open(benchmark_output_file) as f:
22+
data = json.load(f)
23+
for benchmark in data["benchmarks"]:
24+
name = benchmark["name"].split("[")[0] # Remove params from the name
25+
params = json.dumps(benchmark["params"], sort_keys=True)
26+
stats[name, params] = benchmark["stats"]
27+
28+
return stats
29+
30+
31+
@dataclass
32+
class PerformanceTestCaseSpec:
33+
fn_name: str
34+
params: dict[str, Any]
35+
min_p0: float | None
36+
max_p80: float | None
37+
max_p100: float | None
38+
39+
def get_params_for_parametrize(self):
40+
if len(self.params) == 1:
41+
return list(self.params.values())[0]
42+
return tuple(self.params.values())
43+
44+
def get_params_json(self):
45+
return json.dumps(self.params, sort_keys=True)
46+
47+
48+
def expected_benchmark(*multiple_cases: dict, **single_case: dict):
49+
def wrapper(fn):
50+
specs = []
51+
param_keys = {}
52+
53+
all_cases = multiple_cases or [single_case]
54+
55+
for case in all_cases:
56+
case_param_keys = {k for k in case.keys() if k not in ("min_p0", "max_p80", "max_p100")}
57+
if not param_keys:
58+
param_keys = case_param_keys
59+
60+
if case_param_keys != param_keys:
61+
raise ValueError(
62+
"All expected_benchmark decorators must have the same parameter keys."
63+
f"Expected {param_keys}, got {case_param_keys}"
64+
)
65+
66+
specs.append(
67+
PerformanceTestCaseSpec(
68+
fn_name=fn.__name__,
69+
params={k: case[k] for k in param_keys},
70+
min_p0=case.get("min_p0"),
71+
max_p80=case.get("max_p80"),
72+
max_p100=case.get("max_p100"),
73+
)
74+
)
75+
76+
if not os.getenv("BENCHMARK_VALIDATE_FILE"):
77+
pytest.mark.parametrize(
78+
",".join(param_keys),
79+
[spec.get_params_for_parametrize() for spec in specs],
80+
)(fn)
81+
return fn
82+
83+
performance_factor = float(os.getenv("BENCHMARK_PERFORMANCE_FACTOR", "1.0"))
84+
85+
@wraps(fn)
86+
def validation(*args, **kwargs):
87+
# Find the matching spec
88+
spec: PerformanceTestCaseSpec | None = None
89+
for case in specs:
90+
if all(kwargs.get(k) == v for k, v in case.params.items()):
91+
spec = case
92+
break
93+
94+
assert spec is not None, "No matching performance case found for the given parameters."
95+
96+
# Extract the actual parameters used in this test run
97+
if spec.min_p0 is None or spec.max_p80 is None or spec.max_p100 is None:
98+
warnings.warn("Benchmark thresholds not set, skipping validation.", category=UserWarning)
99+
return
100+
101+
perf_data = _get_benchmark_data()
102+
103+
assert spec.fn_name, spec.get_params_json() in perf_data
104+
stats = perf_data[spec.fn_name, spec.get_params_json()]
105+
106+
times = sorted(stats["data"])
107+
p0 = times[0]
108+
p80 = times[int(len(times) * 0.8)]
109+
p100 = times[-1]
110+
111+
adjusted_min_p0 = spec.min_p0 * performance_factor
112+
adjusted_max_p80 = spec.max_p80 * performance_factor
113+
adjusted_max_p100 = spec.max_p100 * performance_factor
114+
115+
p0_marker = "✓" if p0 >= adjusted_min_p0 else "✗"
116+
p80_marker = "✓" if p80 <= adjusted_max_p80 else "✗"
117+
p100_marker = "✓" if p100 <= adjusted_max_p100 else "✗"
118+
119+
params_human = ", ".join(f"{k}={v!r}" for k, v in spec.params.items())
120+
detailed_msg = f"""
121+
122+
Benchmark '{spec.fn_name}' with params {params_human} results:
123+
124+
{p0_marker} 0th percentile: {p0:.3f} s
125+
Unadjusted min_p0: {spec.min_p0:.3f} s
126+
Adjusted (*) min_p0: {adjusted_min_p0:.3f} s
127+
128+
{p80_marker} 80th percentile: {p80:.3f} s
129+
Unadjusted max_p80: {spec.max_p80:.3f} s
130+
Adjusted (*) max_p80: {adjusted_max_p80:.3f} s
131+
132+
{p100_marker} 100th percentile: {p100:.3f} s
133+
Unadjusted max_p100: {spec.max_p100:.3f} s
134+
Adjusted (*) max_p100: {adjusted_max_p100:.3f} s
135+
136+
(*) Use the environment variable "BENCHMARK_PERFORMANCE_FACTOR" to adjust the thresholds.
137+
138+
BENCHMARK_PERFORMANCE_FACTOR=1.0 (default) is meant to represent GitHub Actions performance.
139+
Decrease this factor if your local machine is faster than GitHub Actions.
140+
141+
"""
142+
143+
if performance_factor == 1.0:
144+
adjusted_min_p0_str = f"{adjusted_min_p0:.3f}"
145+
adjusted_max_p80_str = f"{adjusted_max_p80:.3f}"
146+
adjusted_max_p100_str = f"{adjusted_max_p100:.3f}"
147+
else:
148+
adjusted_min_p0_str = f"{adjusted_min_p0:.3f} (= {spec.min_p0:.3f} * {performance_factor})"
149+
adjusted_max_p80_str = f"{adjusted_max_p80:.3f} (= {spec.max_p80:.3f} * {performance_factor})"
150+
adjusted_max_p100_str = f"{adjusted_max_p100:.3f} (= {spec.max_p100:.3f} * {performance_factor})"
151+
152+
assert p0 >= adjusted_min_p0, f"p0 {p0:.3f} is less than expected {adjusted_min_p0_str}" + detailed_msg
153+
assert p80 <= adjusted_max_p80, f"p80 {p80:.3f} is more than expected {adjusted_max_p80_str}" + detailed_msg
154+
assert p100 <= adjusted_max_p100, (
155+
f"p100 {p100:.3f} is more than expected {adjusted_max_p100_str}" + detailed_msg
156+
)
157+
158+
pytest.mark.parametrize(
159+
",".join(param_keys),
160+
[spec.get_params_for_parametrize() for spec in specs],
161+
)(validation)
162+
163+
return validation
164+
165+
return wrapper

tests/performance/generate.py

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
import random
2+
import string
3+
4+
import neptune_query.internal.retrieval.metrics as metrics
5+
from neptune_query.internal.identifiers import (
6+
AttributeDefinition,
7+
ProjectIdentifier,
8+
RunAttributeDefinition,
9+
RunIdentifier,
10+
SysId,
11+
)
12+
from neptune_query.internal.retrieval.attribute_types import FloatSeriesAggregations
13+
from neptune_query.internal.retrieval.attribute_values import AttributeValue
14+
from neptune_query.internal.retrieval.metric_buckets import TimeseriesBucket
15+
16+
# Set the random seed for reproducibility
17+
random.seed(20250925)
18+
19+
20+
def random_alnum(length: int) -> str:
21+
return "".join(random.choices(string.ascii_lowercase + string.digits, k=length))
22+
23+
24+
def random_alnum_strings(count: int, length: int) -> list[str]:
25+
return [random_alnum(length) for _ in range(count)]
26+
27+
28+
def float_point_value(i: int, exp: int) -> metrics.FloatPointValue:
29+
return (1234567890 + i * 1000.0, float(i) + exp, float(i) * 10, False, 1.0)
30+
31+
32+
EXPERIMENT_IDENTIFIER = RunIdentifier(ProjectIdentifier("project/abc"), SysId("XXX-1"))
33+
34+
35+
def float_series_value(path: str, exp: int):
36+
"""Helper to create a float series value for testing."""
37+
return AttributeValue(
38+
attribute_definition=AttributeDefinition(path, "float_series"),
39+
value=FloatSeriesAggregations(last=float(exp), min=0.0, max=float(exp), average=float(exp) / 2, variance=0.0),
40+
run_identifier=EXPERIMENT_IDENTIFIER,
41+
)
42+
43+
44+
def string_value(path: str, exp: int):
45+
"""Helper to create a string value for testing."""
46+
return AttributeValue(
47+
attribute_definition=AttributeDefinition(path, "string"),
48+
value=f"value_{exp}",
49+
run_identifier=EXPERIMENT_IDENTIFIER,
50+
)
51+
52+
53+
def bucket_metrics(experiments: int, paths: int, buckets: int) -> dict[RunAttributeDefinition, list[TimeseriesBucket]]:
54+
return {
55+
run_attribute_definition(experiment, path): [bucket_metric(index=i) for i in range(buckets)]
56+
for experiment in range(experiments)
57+
for path in range(paths)
58+
}
59+
60+
61+
def run_attribute_definition(
62+
sys_id: int | str, path: int | str, attribute_type: str = "float_series"
63+
) -> RunAttributeDefinition:
64+
return RunAttributeDefinition(
65+
RunIdentifier(ProjectIdentifier("foo/bar"), SysId(f"sysid{sys_id}")),
66+
AttributeDefinition(f"path{path}", attribute_type),
67+
)
68+
69+
70+
def bucket_metric(index: int) -> TimeseriesBucket:
71+
if index > 0:
72+
return TimeseriesBucket(
73+
index=index,
74+
from_x=20.0 * index,
75+
to_x=20.0 * (index + 1),
76+
first_x=20.0 * index + 2,
77+
first_y=100.0 * (index - 1) + 90.0,
78+
last_x=20.0 * (index + 1) - 2,
79+
last_y=100.0 * index,
80+
y_min=80.0 * index,
81+
y_max=110.0 * index,
82+
finite_point_count=10 + index,
83+
nan_count=5 - index,
84+
positive_inf_count=2 * index,
85+
negative_inf_count=index,
86+
finite_points_sum=950.0 * index,
87+
)
88+
else:
89+
return TimeseriesBucket(
90+
index=index,
91+
from_x=float("-inf"),
92+
to_x=20.0,
93+
first_x=20.0,
94+
first_y=0.0,
95+
last_x=20.0,
96+
last_y=0.0,
97+
y_min=0.0,
98+
y_max=0.0,
99+
finite_point_count=1,
100+
nan_count=0,
101+
positive_inf_count=0,
102+
negative_inf_count=0,
103+
finite_points_sum=0.0,
104+
)

0 commit comments

Comments
 (0)