Skip to content

[CI][Bench] Create summary reports for benchmarking CI run results #19733

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 7 commits into
base: sycl
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion devops/actions/run-tests/benchmark/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -170,14 +170,17 @@ runs:
--results-dir "./llvm-ci-perf-results/results/" \
--regression-filter '^[a-z_]+_sycl ' \
--verbose \
--produce-github-summary \
${{ inputs.dry_run == 'true' && '--dry-run' || '' }} \

echo "-----"

- name: Cache changes to benchmark folder for archival purposes
- name: Cache changes and upload github summary
if: always()
shell: bash
run: |
[ -f "github_summary.md" ] && cat github_summary.md >> $GITHUB_STEP_SUMMARY

cd "./llvm-ci-perf-results"
git add .
for diff in $(git diff HEAD --name-only); do
Expand Down
80 changes: 80 additions & 0 deletions devops/scripts/benchmarks/compare.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,24 @@ class BenchmarkHistoricAverage:
# TODO Ensure ONEAPI_DEVICE_SELECTOR? GPU name itself?


class OutputFile:
"""
Represents a text file to output, but only output the file when manually
specified.
"""

def __init__(self, output_path: str):
self.output_path = output_path
self.output_content = []

def write_file(self):
with open(self.output_path, "w") as f:
f.write("\n".join(self.output_content))

def println(self, text: str):
self.output_content.append(text)


class Compare:
"""Class containing logic for comparisons between results"""

Expand Down Expand Up @@ -348,6 +366,11 @@ def to_hist(
action="store_true",
help="Do not return error upon regressions.",
)
parser_avg.add_argument(
"--produce-github-summary",
action="store_true",
help="Produce a github CI summary file.",
)

args = parser.parse_args()

Expand All @@ -370,6 +393,9 @@ def to_hist(
regressions_ignored = []
regressions_of_concern = []
if args.regression_filter is not None:
if args.produce_github_summary:
gh_summary = OutputFile("github_summary.md")

filter_pattern = re.compile(args.regression_filter)
for test in regressions:
if filter_pattern.search(test["name"]):
Expand All @@ -390,28 +416,82 @@ def print_regression(entry: dict, is_warning: bool = False):
log_func(f"-- Run result: {entry['value']}")
log_func(f"-- Delta: {entry['delta']}")
log_func("")
if args.produce_github_summary:
gh_summary.println(f"#### {entry['name']}:")
gh_summary.println(
f"- Historic {entry['avg_type']}: {entry['hist_avg']}"
)
gh_summary.println(f"- Run result: {entry['value']}")
gh_summary.println(f"- Delta: {entry['delta']}")
gh_summary.println("")

if improvements:
log.info("#")
log.info("# Improvements:")
log.info("#")
if args.produce_github_summary:
gh_summary.println("### Improvements")
gh_summary.println(
f"<details><summary>{len(improvements)} improved tests:</summary>"
)
gh_summary.println("")
for test in improvements:
print_regression(test)
if args.produce_github_summary:
gh_summary.println("</details>")
gh_summary.println("")
if regressions_ignored:
log.info("#")
log.info("# Regressions (filtered out by regression-filter):")
log.info("#")
if args.produce_github_summary:
gh_summary.println("### Regressions")
gh_summary.println(
f"<details><summary>{len(regressions_ignored)} non CI-failing regressions:</summary>"
)
gh_summary.println("")
for test in regressions_ignored:
print_regression(test)
if args.produce_github_summary:
gh_summary.println("</details>")
gh_summary.println("")
if regressions_of_concern:
log.warning("#")
log.warning("# Regressions:")
log.warning("#")
if args.produce_github_summary:
gh_summary.println("### SYCL-Specific Regressions")
gh_summary.println(
"Regressions pertaining to non-experimental "
"SYCL benchmarks. These regressions warrant "
"a CI failure: "
)
gh_summary.println(
f"<details><summary>{len(regressions_of_concern)} CI-failing regressions:</summary>"
)
gh_summary.println("")
for test in regressions_of_concern:
print_regression(test, is_warning=True)
if args.produce_github_summary:
gh_summary.println("</details>")
gh_summary.println("")

if not args.dry_run:
if args.produce_github_summary:
gh_summary.println("### Failed benchmarks:")
gh_summary.println("")
for test in regressions_of_concern:
gh_summary.println(
f"- {test['name']}: Delta {round(test['delta']*100, 2)}%"
)
gh_summary.write_file()
exit(1) # Exit 1 to trigger github test failure

log.info("No unexpected regressions found!")
if args.produce_github_summary:
gh_summary.println("No unexpected regressions found!")
gh_summary.write_file()

else:
log.error("Unsupported operation: exiting.")
exit(1)
Loading