Skip to content

Commit 37a046c

Browse files
change markdown output in benchmark PR comments
add an option for limiting markdown content size calculate relative performance with different baselines calculate relative performance using only already saved data group results according to suite names and explicit groups add multiple data columns if multiple --compare specified
1 parent f66751d commit 37a046c

File tree

6 files changed

+353
-143
lines changed

6 files changed

+353
-143
lines changed

.github/workflows/benchmarks-reusable.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -220,11 +220,12 @@ jobs:
220220
--compute-runtime ${{ inputs.compute_runtime_commit }}
221221
--build-igc
222222
${{ inputs.upload_report && '--output-html' || '' }}
223+
${{ inputs.pr_no != 0 && '--output-markdown' || '' }}
223224
${{ inputs.bench_script_params }}
224225
225226
- name: Print benchmark results
226227
run: |
227-
cat ${{ github.workspace }}/ur-repo/benchmark_results.md
228+
cat ${{ github.workspace }}/ur-repo/benchmark_results.md || true
228229
229230
- name: Add comment to PR
230231
uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1

scripts/benchmarks/README.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,11 +37,20 @@ By default, the benchmark results are not stored. To store them, use the option
3737

3838
To compare a benchmark run with a previously stored result, use the option `--compare <name>`. You can compare with more than one result.
3939

40+
In a markdown output file (see below), listing more than two `--compare` options results in displaying performance time. If only one `--compare` option is specified, the relative performance of provided results is calculated against previously saved `baseline`. You can compare your data against results other than `baseline` by using:
41+
42+
`--compare <name> --relative-perf <name> --compare <new_baseline> --new-base-name <new_baseline>`.
43+
4044
If no `--compare` option is specified, the benchmark run is compared against a previously stored `baseline`.
4145

4246
Baseline, as well as baseline-v2 (for the level-zero adapter v2) is updated automatically during a nightly job. The results
4347
are stored [here](https://oneapi-src.github.io/unified-runtime/benchmark_results.html).
4448

49+
50+
## Output formats
51+
You can display the results in the form of a HTML file by using `--ouptut-html` and a markdown file by using `--output-markdown`. Due to character limits for posting PR comments, the final content of the markdown file might be reduced. In order to obtain the full markdown output, use `--output-markdown full`.
52+
53+
4554
## Requirements
4655

4756
### Python

scripts/benchmarks/benches/result.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ class Result:
1818
stdout: str
1919
passed: bool = True
2020
unit: str = ""
21-
explicit_group: str = ""
21+
explicit_group: str = "Ungrouped"
2222
# stddev can be optionally set by the benchmark,
2323
# if not set, it will be calculated automatically.
2424
stddev: float = 0.0

scripts/benchmarks/main.py

Lines changed: 22 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -189,9 +189,12 @@ def main(directory, additional_env_vars, save_name, compare_names, filter):
189189
benchmark.teardown()
190190
print("complete.")
191191

192-
this_name = "This PR"
193192

194-
chart_data = {this_name : results}
193+
this_name = options.current_run_name
194+
chart_data = {}
195+
196+
if not options.dry_run:
197+
chart_data = {this_name : results}
195198

196199
history = BenchmarkHistory(directory)
197200
# limit how many files we load.
@@ -207,7 +210,7 @@ def main(directory, additional_env_vars, save_name, compare_names, filter):
207210
chart_data[name] = compare_result.results
208211

209212
if options.output_markdown:
210-
markdown_content = generate_markdown(this_name, chart_data)
213+
markdown_content = generate_markdown(this_name, chart_data, options.output_markdown)
211214

212215
with open('benchmark_results.md', 'w') as file:
213216
file.write(markdown_content)
@@ -241,6 +244,11 @@ def validate_and_parse_env_args(env_args):
241244
env_vars[key] = value
242245
return env_vars
243246

247+
def substitute_baseline(run_names_to_compare: list[str], new_baseline_name: str):
248+
new_compare_names = [run_name if run_name != options.default_baseline else new_baseline_name for run_name in run_names_to_compare]
249+
250+
return new_compare_names
251+
244252
if __name__ == "__main__":
245253
parser = argparse.ArgumentParser(description='Unified Runtime Benchmark Runner')
246254
parser.add_argument('benchmark_directory', type=str, help='Working directory to setup benchmarks.')
@@ -251,7 +259,7 @@ def validate_and_parse_env_args(env_args):
251259
parser.add_argument("--no-rebuild", help='Do not rebuild the benchmarks from scratch.', action="store_true")
252260
parser.add_argument("--env", type=str, help='Use env variable for a benchmark run.', action="append", default=[])
253261
parser.add_argument("--save", type=str, help='Save the results for comparison under a specified name.')
254-
parser.add_argument("--compare", type=str, help='Compare results against previously saved data.', action="append", default=["baseline"])
262+
parser.add_argument("--compare", type=str, help='Compare results against previously saved data.', action="append", default=[options.default_baseline])
255263
parser.add_argument("--iterations", type=int, help='Number of times to run each benchmark to select a median value.', default=options.iterations)
256264
parser.add_argument("--stddev-threshold", type=float, help='If stddev pct is above this threshold, rerun all iterations', default=options.stddev_threshold)
257265
parser.add_argument("--timeout", type=int, help='Timeout for individual benchmarks in seconds.', default=options.timeout)
@@ -261,12 +269,14 @@ def validate_and_parse_env_args(env_args):
261269
parser.add_argument("--exit-on-failure", help='Exit on first failure.', action="store_true")
262270
parser.add_argument("--compare-type", type=str, choices=[e.value for e in Compare], help='Compare results against previously saved data.', default=Compare.LATEST.value)
263271
parser.add_argument("--compare-max", type=int, help='How many results to read for comparisions', default=options.compare_max)
272+
parser.add_argument("--output-markdown", nargs='?', const=options.output_markdown, help='Specify whether markdown output should fit the content size limit for request validation')
264273
parser.add_argument("--output-html", help='Create HTML output', action="store_true", default=False)
265-
parser.add_argument("--output-markdown", help='Create Markdown output', action="store_true", default=True)
266274
parser.add_argument("--dry-run", help='Do not run any actual benchmarks', action="store_true", default=False)
267275
parser.add_argument("--compute-runtime", nargs='?', const=options.compute_runtime_tag, help="Fetch and build compute runtime")
268276
parser.add_argument("--iterations-stddev", type=int, help="Max number of iterations of the loop calculating stddev after completed benchmark runs", default=options.iterations_stddev)
269277
parser.add_argument("--build-igc", help="Build IGC from source instead of using the OS-installed version", action="store_true", default=options.build_igc)
278+
parser.add_argument("--relative-perf", type=str, help="The name of the results which should be used as a baseline for metrics calculation", default=options.current_run_name)
279+
parser.add_argument("--new-base-name", help="New name of the default baseline to compare", type=str, default='')
270280

271281
args = parser.parse_args()
272282
additional_env_vars = validate_and_parse_env_args(args.env)
@@ -283,12 +293,13 @@ def validate_and_parse_env_args(env_args):
283293
options.exit_on_failure = args.exit_on_failure
284294
options.compare = Compare(args.compare_type)
285295
options.compare_max = args.compare_max
286-
options.output_html = args.output_html
287296
options.output_markdown = args.output_markdown
297+
options.output_html = args.output_html
288298
options.dry_run = args.dry_run
289299
options.umf = args.umf
290300
options.iterations_stddev = args.iterations_stddev
291301
options.build_igc = args.build_igc
302+
options.current_run_name = args.relative_perf
292303

293304
if args.build_igc and args.compute_runtime is None:
294305
parser.error("--build-igc requires --compute-runtime to be set")
@@ -298,4 +309,8 @@ def validate_and_parse_env_args(env_args):
298309

299310
benchmark_filter = re.compile(args.filter) if args.filter else None
300311

301-
main(args.benchmark_directory, additional_env_vars, args.save, args.compare, benchmark_filter)
312+
compare_names = args.compare
313+
if args.new_base_name != '':
314+
compare_names = substitute_baseline(run_names_to_compare=args.compare, new_baseline_name=args.new_base_name)
315+
316+
main(args.benchmark_directory, additional_env_vars, args.save, compare_names, benchmark_filter)

scripts/benchmarks/options.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,10 @@ class Compare(Enum):
66
AVERAGE = 'average'
77
MEDIAN = 'median'
88

9+
class MarkdownSize(Enum):
10+
SHORT = 'short'
11+
FULL = 'full'
12+
913
@dataclass
1014
class Options:
1115
workdir: str = None
@@ -20,8 +24,8 @@ class Options:
2024
verbose: bool = False
2125
compare: Compare = Compare.LATEST
2226
compare_max: int = 10 # average/median over how many results
27+
output_markdown: MarkdownSize = MarkdownSize.SHORT
2328
output_html: bool = False
24-
output_markdown: bool = True
2529
dry_run: bool = False
2630
# these two should probably be merged into one setting
2731
stddev_threshold: float = 0.02
@@ -32,6 +36,8 @@ class Options:
3236
extra_env_vars: dict = field(default_factory=dict)
3337
compute_runtime_tag: str = '24.52.32224.10'
3438
build_igc: bool = False
39+
current_run_name: str = "This PR"
40+
default_baseline: str = "baseline"
3541

3642
options = Options()
3743

0 commit comments

Comments
 (0)