Skip to content

Commit c35c4f0

Browse files
committed
feat: add performance tests to benchmark Robocop on release
1 parent 3e1df7d commit c35c4f0

File tree

12 files changed

+1882
-1981
lines changed

12 files changed

+1882
-1981
lines changed

.github/workflows/docs-check.yml

Lines changed: 0 additions & 18 deletions
This file was deleted.
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
name: Release Check
2+
3+
on:
4+
pull_request:
5+
types: [opened, synchronize, labeled]
6+
workflow_dispatch:
7+
8+
jobs:
9+
build-docs:
10+
runs-on: ubuntu-latest
11+
# Only run on release PRs (release-please labels them with 'autorelease: pending')
12+
if: "contains(github.event.pull_request.labels.*.name, 'autorelease: pending')"
13+
steps:
14+
- uses: actions/checkout@v4
15+
- uses: astral-sh/setup-uv@v4
16+
- name: Install dependencies
17+
run: uv sync --group doc
18+
- name: Build docs
19+
run: uv run mkdocs build --clean
20+
21+
performance-tests:
22+
runs-on: ubuntu-latest
23+
if: "contains(github.event.pull_request.labels.*.name, 'autorelease: pending')"
24+
steps:
25+
- uses: actions/checkout@v4
26+
- uses: astral-sh/setup-uv@v4
27+
28+
- name: Set up Python 3.13
29+
uses: actions/setup-python@v5
30+
with:
31+
python-version: 3.13
32+
33+
- name: Install dependencies
34+
run: uv sync --dev
35+
36+
- name: Resolve app versions
37+
env:
38+
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
39+
run: |
40+
echo "ROBOCOP_VERSIONS=$(gh release list --limit 4 --json tagName --jq '.[].tagName' | tr '\n' ',')" >> $GITHUB_ENV
41+
42+
- name: Run performance tests
43+
run: uv run nox -s performance > performance.log 2>&1
44+
45+
- name: Merge report and prepare for publishing
46+
run: uv run tests/performance/merge_reports.py
47+
48+
- name: Publish to job summary
49+
run: cat perf_report.md >> "$GITHUB_STEP_SUMMARY"
50+
51+
- name: Publish to PR
52+
if: github.event_name == 'pull_request'
53+
env:
54+
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
55+
run: |
56+
gh pr comment \
57+
"${{ github.event.pull_request.number }}" \
58+
--body-file perf_report.md
59+
60+
- name: Upload log artifact
61+
if: always()
62+
uses: actions/upload-artifact@v4
63+
with:
64+
name: performance.log
65+
path: performance.log

.github/workflows/tests.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ on:
44
push:
55
branches: [main]
66
pull_request:
7-
branches: [ main ]
7+
branches: [main]
88

99
jobs:
1010
build:

noxfile.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,20 @@
44
> uv run nox -s docs
55
"""
66

7+
import os
8+
79
import nox
810

911
nox.options.default_venv_backend = "uv"
1012

1113
PYTHON_VERSIONS = ["3.9", "3.10", "3.11", "3.12", "3.13", "3.14"]
1214

1315
ROBOT_VERSIONS = ["robotframework==4.*", "robotframework==5.*", "robotframework==6.*", "robotframework==7.*"]
16+
ROBOCOP_VERSIONS = (
17+
[*[non_empty for non_empty in os.environ["ROBOCOP_VERSIONS"].split(",") if non_empty], "local"]
18+
if os.environ.get("ROBOCOP_VERSIONS")
19+
else ["v7.1.0", "local"]
20+
)
1421

1522

1623
@nox.session(python=PYTHON_VERSIONS) # , reuse_venv=False
@@ -52,3 +59,40 @@ def docs(session):
5259
# session.run("sphinx-build", "-a", "-E", "-b", "html", "docs", "docs/_build/")
5360
command = ["sphinx-build", "-a", "-E", "--verbose", "-b", "html", "docs/source", "docs/_build/"]
5461
session.run(*command)
62+
63+
64+
@nox.session(python=PYTHON_VERSIONS[-2])
65+
@nox.parametrize("robocop_version", ROBOCOP_VERSIONS)
66+
def performance(session: nox.Session, robocop_version: str) -> None:
67+
"""
68+
Generate performance reports.
69+
70+
Used by the GitHub Workflow: ``.github/workflows/release_check.yml``
71+
72+
ROBOCOP_VERSIONS is created based on the environment variable in the workflow (set to latest 4 released tags) and
73+
"latest" which means local installation. The goal is to execute performance tests in the isolated environment with
74+
a selected past/current Robocop version for comparison.
75+
76+
The reports are designed in a way that specific results do not matter, but change between a version does. We are
77+
re-executing the tests for the past version to get the baseline benchmark for the current version.
78+
"""
79+
robocop_version = robocop_version.removeprefix("v")
80+
if not robocop_version:
81+
return
82+
if robocop_version == "local":
83+
session.run_install(
84+
"uv",
85+
"sync",
86+
f"--python={session.virtualenv.location}",
87+
env={"UV_PROJECT_ENVIRONMENT": session.virtualenv.location},
88+
)
89+
else:
90+
session.run(
91+
"uv",
92+
"pip",
93+
"install",
94+
f"robotframework-robocop=={robocop_version}",
95+
f"--python={session.virtualenv.location}",
96+
env={"UV_PROJECT_ENVIRONMENT": session.virtualenv.location},
97+
)
98+
session.run("python", "-m", "tests.performance.generate_reports", external=True, silent=False)

pyproject.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,8 @@ dev = [
8080
"pytest-xdist>=3.6.1",
8181
"ruff==0.14.8",
8282
"pysonar",
83+
"nox>=2025.11.12",
84+
"packaging>=25.0",
8385
]
8486
doc = [
8587
"mkdocs",

tests/performance/__init__.py

Whitespace-only changes.

tests/performance/generate_reports.py

Lines changed: 65 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,8 @@
33
44
Reports from previous runs are stored in the reports folder and can be used for comparison.
55
6-
Each report is run multiple times and calculates a trimmed mean by excluding the bottom and top 10% of values.
6+
Each report is run multiple times and calculates a trimmed mean by excluding the bottom and top values (according to
7+
cut_off parameter).
78
"""
89

910
import json
@@ -16,16 +17,25 @@
1617

1718
from robocop import __version__, config
1819
from robocop.formatter.formatters import FORMATTERS
20+
from robocop.linter.utils.version_matching import Version
1921
from robocop.run import check_files, format_files
2022
from tests import working_directory
2123

2224
LINTER_TESTS_DIR = Path(__file__).parent.parent / "linter"
2325
TEST_DATA = Path(__file__).parent / "test_data"
26+
ROBOCOP_VERSION = Version(__version__)
2427
REPORTS = {}
2528

2629

27-
def performance_report(runs: int = 100):
28-
"""Use as decorator to measure performance of a function and store results."""
30+
def performance_report(runs: int = 100, cut_off: int = 0):
31+
"""
32+
Use as decorator to measure performance of a function and store results.
33+
34+
Args:
35+
runs: Number of runs to take into account when calculating the average.
36+
cut_off: Number of slowest and fastest runs to exclude from the average.
37+
38+
"""
2939

3040
def decorator(func):
3141
@wraps(func)
@@ -38,18 +48,13 @@ def wrapper(*args, **kwargs):
3848
print(f"Run {run + 1} / {runs} of {func.__name__}")
3949
start = time.perf_counter()
4050
counter = func(*args, **kwargs)
41-
end = time.perf_counter()
42-
time_taken = end - start
51+
time_taken = time.perf_counter() - start
4352
run_times.append(time_taken)
4453
print(f" Execution time: {time_taken:.6f} seconds")
4554
run_times.sort()
46-
cut_off = int(runs * 0.1)
47-
if cut_off + 2 > runs:
48-
cut_off = 0
49-
if len(run_times) > 2:
50-
avg_time = sum(run_times[cut_off:-cut_off]) / (len(run_times) - 2 * cut_off)
51-
else:
52-
avg_time = sum(run_times) / len(run_times)
55+
if cut_off:
56+
run_times = run_times[cut_off:-cut_off]
57+
avg_time = sum(run_times) / len(run_times)
5358
print(f"Mean average execution time over {runs} runs: {avg_time:.6f} seconds")
5459
if report_name:
5560
if func.__name__ not in REPORTS:
@@ -63,7 +68,7 @@ def wrapper(*args, **kwargs):
6368
return decorator
6469

6570

66-
@performance_report(runs=50)
71+
@performance_report(runs=10, cut_off=2)
6772
def project_traversing_report() -> int:
6873
"""
6974
Measure how long it takes to traverse Robocop repository files.
@@ -90,33 +95,41 @@ def project_traversing_report() -> int:
9095
return files_count
9196

9297

93-
@performance_report(runs=50)
94-
def formatter_report(formatter: str, report_name: str, cache: bool = True) -> int: # noqa: ARG001
98+
@performance_report(runs=10, cut_off=2)
99+
def formatter_report(formatter: str, report_name: str, **kwargs) -> int: # noqa: ARG001
100+
"""Measure how long it takes to format test files using a specific formatter."""
95101
main_dir = Path(__file__).parent.parent.parent
96102
formatter_dir = main_dir / "tests" / "formatter" / "formatters" / formatter
97103
with working_directory(formatter_dir):
98-
format_files(["source"], select=[formatter], overwrite=False, return_result=True, silent=True, cache=cache)
104+
format_files(["source"], select=[formatter], overwrite=False, return_result=True, silent=True, **kwargs)
99105
source_dir = formatter_dir / "source"
100106
return len(list(source_dir.iterdir()))
101107

102108

103-
@performance_report(runs=10)
109+
@performance_report(runs=5)
104110
def linter_report(report_name: str, **kwargs) -> int: # noqa: ARG001
111+
"""Measure how long it takes to lint all linter test files."""
105112
main_dir = Path(__file__).parent.parent.parent
106113
linter_dir = main_dir / "tests" / "linter"
107114
with working_directory(linter_dir):
108115
check_files(return_result=True, select=["ALL"], **kwargs)
109116
return len(list(linter_dir.glob("**/*.robot")))
110117

111118

112-
@performance_report(runs=2)
119+
@performance_report(runs=1)
113120
def lint_large_file(report_name: str, lint_dir: Path, **kwargs) -> int: # noqa: ARG001
121+
"""Measure how long it takes to lint a large file."""
114122
with working_directory(lint_dir):
115-
check_files(return_result=True, select=["ALL"], cache=False, **kwargs)
123+
check_files(return_result=True, select=["ALL"], **kwargs)
116124
return 1
117125

118126

119127
def merge_dictionaries(d1: dict, d2: dict) -> dict:
128+
"""
129+
Merge two dictionaries recursively.
130+
131+
This function is used to merge two partial reports generated by different runs.
132+
"""
120133
for key, value in d2.items():
121134
if key in d1 and isinstance(d1[key], dict) and isinstance(value, dict):
122135
merge_dictionaries(d1[key], value)
@@ -126,6 +139,12 @@ def merge_dictionaries(d1: dict, d2: dict) -> dict:
126139

127140

128141
def generate_large_file(template_path: Path, output_dir: Path) -> None:
142+
"""
143+
Generate a large file based on a template.
144+
145+
This function is used to generate a large file for performance testing. Because of the potential size and
146+
complexity, it is easier to use a templated file than hardcoded one.
147+
"""
129148
env = Environment(loader=FileSystemLoader(template_path.parent), autoescape=True)
130149
template = env.get_template(template_path.name)
131150

@@ -135,30 +154,37 @@ def generate_large_file(template_path: Path, output_dir: Path) -> None:
135154
f.write(rendered_content)
136155

137156

138-
if __name__ == "__main__":
139-
# TODO: prepare i.e. nox script to install external robocops and run this script
140-
# So we can generate reports for multiple past versions. It is important since the actual seconds change depending
141-
# on where we run the script from, but the % change between version should be comparable. Also we can use new tests
142-
# on old versions
143-
linter_report(report_name="with_print_cache", cache=True)
144-
linter_report(report_name="with_print_no_cache", cache=False)
145-
linter_report(report_name="without_print_cache", silent=True, cache=True)
146-
linter_report(report_name="without_print_no_cache", silent=True, cache=False)
157+
def generate_reports() -> None:
158+
"""Entry point for generating performance reports and saving it to global REPORTS variable."""
159+
if Version("7.1.0") > ROBOCOP_VERSION:
160+
disable_cache_option = {}
161+
elif Version("7.1.0") == ROBOCOP_VERSION:
162+
disable_cache_option = {"no_cache": True}
163+
else:
164+
disable_cache_option = {"cache": False}
165+
166+
if disable_cache_option:
167+
linter_report(report_name="with_print_cache")
168+
linter_report(report_name="with_print_no_cache", **disable_cache_option)
169+
if disable_cache_option:
170+
linter_report(report_name="without_print_cache", silent=True)
171+
linter_report(report_name="without_print_no_cache", silent=True, **disable_cache_option)
147172
for formatter in FORMATTERS:
148-
formatter_report(formatter=formatter, report_name=formatter)
149-
formatter_report(formatter=formatter, report_name=f"{formatter}_no_cache", cache=False)
173+
formatter_report(formatter=formatter, report_name=f"{formatter}_no_cache", **disable_cache_option)
150174
project_traversing_report()
151175
with tempfile.TemporaryDirectory() as temp_dir:
152176
temp_dir = Path(temp_dir)
153177
generate_large_file(TEST_DATA / "large_file.robot", temp_dir)
154-
lint_large_file(report_name="large_file_with_print", lint_dir=temp_dir)
155-
lint_large_file(report_name="large_file_without_print", lint_dir=temp_dir, silent=True)
178+
lint_large_file(report_name="large_file_with_print", lint_dir=temp_dir, **disable_cache_option)
179+
lint_large_file(report_name="large_file_without_print", lint_dir=temp_dir, silent=True, **disable_cache_option)
156180

157-
report_path = Path(__file__).parent / "reports" / f"robocop_{__version__.replace('.', '_')}.json"
158-
if report_path.exists():
159-
with open(report_path) as fp:
160-
prev_report = json.load(fp)
161-
REPORTS = merge_dictionaries(prev_report, REPORTS)
162181

163-
with open(report_path, "w") as fp:
164-
json.dump(REPORTS, fp, indent=4)
182+
if __name__ == "__main__":
183+
whole_run_start = time.perf_counter()
184+
report_path = Path(__file__).parent / "reports" / f"robocop_{__version__.replace('.', '_')}.json"
185+
if not report_path.exists(): # additional safe guard in case we run on the same version (there was no version bump)
186+
generate_reports()
187+
print(f"Generating report in {report_path}")
188+
with open(report_path, "w") as fp:
189+
json.dump(REPORTS, fp, indent=4)
190+
print(f"Took {time.perf_counter() - whole_run_start:.2f} seconds to generate report.")

0 commit comments

Comments
 (0)