Skip to content
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion devops/actions/run-tests/benchmark/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -170,14 +170,17 @@ runs:
--results-dir "./llvm-ci-perf-results/results/" \
--regression-filter '^[a-z_]+_sycl ' \
--verbose \
--produce-github-summary \
${{ inputs.dry_run == 'true' && '--dry-run' || '' }} \

echo "-----"

- name: Cache changes to benchmark folder for archival purposes
- name: Cache changes and upload github summary
if: always()
shell: bash
run: |
[ -f "github_summary.md" ] && cat github_summary.md >> $GITHUB_STEP_SUMMARY

cd "./llvm-ci-perf-results"
git add .
for diff in $(git diff HEAD --name-only); do
Expand Down
80 changes: 80 additions & 0 deletions devops/scripts/benchmarks/compare.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,24 @@ class BenchmarkHistoricAverage:
# TODO Ensure ONEAPI_DEVICE_SELECTOR? GPU name itself?


class OutputFile:
"""
Represents a text file to output, but only output the file when manually
specified.
"""

def __init__(self, output_path: str):
self.output_path = output_path
self.output_content = []

def write_file(self):
with open(self.output_path, "w") as f:
f.write("\n".join(self.output_content))

def println(self, text: str):
self.output_content.append(text)


class Compare:
"""Class containing logic for comparisons between results"""

Expand Down Expand Up @@ -348,6 +366,11 @@ def to_hist(
action="store_true",
help="Do not return error upon regressions.",
)
parser_avg.add_argument(
"--produce-github-summary",
action="store_true",
help="Produce a github CI summary file.",
)

args = parser.parse_args()

Expand All @@ -370,6 +393,9 @@ def to_hist(
regressions_ignored = []
regressions_of_concern = []
if args.regression_filter is not None:
if args.produce_github_summary:
gh_summary = OutputFile("github_summary.md")

filter_pattern = re.compile(args.regression_filter)
for test in regressions:
if filter_pattern.search(test["name"]):
Expand All @@ -390,28 +416,82 @@ def print_regression(entry: dict, is_warning: bool = False):
log_func(f"-- Run result: {entry['value']}")
log_func(f"-- Delta: {entry['delta']}")
log_func("")
if args.produce_github_summary:
gh_summary.println(f"#### {entry['name']}:")
gh_summary.println(
f"- Historic {entry['avg_type']}: {entry['hist_avg']}"
)
gh_summary.println(f"- Run result: {entry['value']}")
gh_summary.println(f"- Delta: {entry['delta']}")
gh_summary.println("")

if improvements:
log.info("#")
log.info("# Improvements:")
log.info("#")
if args.produce_github_summary:
gh_summary.println("### Improvements")
gh_summary.println(
f"<details><summary>{len(improvements)} improved tests:</summary>"
)
gh_summary.println("")
for test in improvements:
print_regression(test)
if args.produce_github_summary:
gh_summary.println("</details>")
gh_summary.println("")
if regressions_ignored:
log.info("#")
log.info("# Regressions (filtered out by regression-filter):")
log.info("#")
if args.produce_github_summary:
gh_summary.println("### Regressions")
gh_summary.println(
f"<details><summary>{len(regressions_ignored)} non CI-failing regressions:</summary>"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry, could you clarify what you meant by non CI-failing regressions?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hey thanks for taking a look Udit,
The benchmark CI now also runs UR benchmarks, L0 benchmarks, etc.; regressions in e.g. L0 should not cause the nightly benchmarking CI for SYCL to fail, thus they are filtered out and categorized differently.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see. In that case, should we rename it to "non-SYCL regressions"?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also, we don't list "regressions" in the summary where delta is less than the noise threshold, correct?

Copy link
Contributor Author

@ianayl ianayl Aug 13, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see. In that case, should we rename it to "non-SYCL regressions"?

I feel like that would be less confusing, but I am also aware that other projects use these series of benchmarking scripts as well (i.e. UMF), so I was hesitant to hardcode "non-SYCL" into the titles/descriptions. In hindsight this should perhaps be a customizable option.

Also, we don't list "regressions" in the summary where delta is less than the noise threshold, correct?

That is correct. Noise is ignored.

)
gh_summary.println("")
for test in regressions_ignored:
print_regression(test)
if args.produce_github_summary:
gh_summary.println("</details>")
gh_summary.println("")
if regressions_of_concern:
log.warning("#")
log.warning("# Regressions:")
log.warning("#")
if args.produce_github_summary:
gh_summary.println("### SYCL-Specific Regressions")
gh_summary.println(
"Regressions pertaining to non-experimental "
"SYCL benchmarks. These regressions warrant "
"a CI failure: "
)
gh_summary.println(
f"<details><summary>{len(regressions_of_concern)} CI-failing regressions:</summary>"
)
gh_summary.println("")
for test in regressions_of_concern:
print_regression(test, is_warning=True)
if args.produce_github_summary:
gh_summary.println("</details>")
gh_summary.println("")

if not args.dry_run:
if args.produce_github_summary:
gh_summary.println("### Failed benchmarks:")
gh_summary.println("")
for test in regressions_of_concern:
gh_summary.println(
f"- {test['name']}: Delta {round(test['delta']*100, 2)}%"
)
gh_summary.write_file()
exit(1) # Exit 1 to trigger github test failure

log.info("No unexpected regressions found!")
if args.produce_github_summary:
gh_summary.println("No unexpected regressions found!")
gh_summary.write_file()

else:
log.error("Unsupported operation: exiting.")
exit(1)
Loading