Skip to content

[CI][Bench] Create summary reports for benchmarking CI run results #19733

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 12 commits into
base: sycl
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion devops/actions/run-tests/benchmark/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -169,15 +169,19 @@ runs:
--compare-file "./llvm-ci-perf-results/results/${SAVE_NAME}_${SAVE_TIMESTAMP}.json" \
--results-dir "./llvm-ci-perf-results/results/" \
--regression-filter '^[a-z_]+_sycl ' \
--regression-filter-type 'SYCL' \
--verbose \
--produce-github-summary \
${{ inputs.dry_run == 'true' && '--dry-run' || '' }} \

echo "-----"

- name: Cache changes to benchmark folder for archival purposes
- name: Cache changes and upload github summary
if: always()
shell: bash
run: |
[ -f "github_summary.md" ] && cat github_summary.md >> $GITHUB_STEP_SUMMARY

cd "./llvm-ci-perf-results"
git add .
for diff in $(git diff HEAD --name-only); do
Expand Down
79 changes: 75 additions & 4 deletions devops/scripts/benchmarks/compare.py
Original file line number Diff line number Diff line change
Expand Up @@ -340,14 +340,25 @@ def to_hist(
parser_avg.add_argument(
"--regression-filter",
type=str,
help="If provided, only regressions matching provided regex will cause exit status 1.",
help="If provided, only regressions in tests matching provided regex will cause exit status 1.",
default=None,
)
parser_avg.add_argument(
"--regression-filter-type",
type=str,
help="Name to use in logging for tests that fall within the filter defined by --regression-filter; i.e. if --regression-filter filters for SYCL benchmarks, --regression-filter-type could be 'SYCL'.",
default="filtered",
)
parser_avg.add_argument(
"--dry-run",
action="store_true",
help="Do not return error upon regressions.",
)
parser_avg.add_argument(
"--produce-github-summary",
action="store_true",
help=f"Create a summary file '{options.github_summary_filename}' for Github workflow summaries.",
)

args = parser.parse_args()

Expand All @@ -365,6 +376,14 @@ def to_hist(
args.avg_type, args.name, args.compare_file, args.results_dir, args.cutoff
)

# Initialize Github summary variables:
if args.produce_github_summary:
gh_summary = []

filter_type_capitalized = (
args.regression_filter_type[0].upper() + args.regression_filter_type[1:]
)

# Not all regressions are of concern: if a filter is provided, filter
# regressions using filter
regressions_ignored = []
Expand All @@ -378,7 +397,7 @@ def to_hist(
regressions_ignored.append(test)

def print_regression(entry: dict, is_warning: bool = False):
"""Print an entry outputted from Compare.to_hist
"""Print an entry outputted from Compare.to_hist()

Args:
entry (dict): The entry to print
Expand All @@ -390,28 +409,80 @@ def print_regression(entry: dict, is_warning: bool = False):
log_func(f"-- Run result: {entry['value']}")
log_func(f"-- Delta: {entry['delta']}")
log_func("")
if args.produce_github_summary:
gh_summary.append(f"#### {entry['name']}:")
gh_summary.append(
f"- Historic {entry['avg_type']}: {entry['hist_avg']}"
)
gh_summary.append(f"- Run result: {entry['value']}")
gh_summary.append(
# Since we are dealing with floats, our deltas have a lot
# of decimal places. For easier readability, we round our
# deltas and format our Github summary output as:
#
# Delta: <rounded number>% (<full number>)
#
f"- Delta: {round(entry['delta']*100, 2)}% ({entry['delta']})"
)
gh_summary.append("")

if improvements:
log.info("#")
log.info("# Improvements:")
log.info("#")
if args.produce_github_summary:
gh_summary.append(f"### Improvements")
gh_summary.append(
f"<details><summary>{len(improvements)} improved tests:</summary>"
)
gh_summary.append("")
for test in improvements:
print_regression(test)
if args.produce_github_summary:
gh_summary.append("</details>")
gh_summary.append("")
if regressions_ignored:
log.info("#")
log.info("# Regressions (filtered out by regression-filter):")
log.info("# Regressions (filtered out by --regression-filter):")
log.info("#")
if args.produce_github_summary:
gh_summary.append(f"### Non-{filter_type_capitalized} Regressions")
gh_summary.append(
f"<details><summary>{len(regressions_ignored)} non-{args.regression_filter_type} regressions:</summary>"
)
gh_summary.append("")
for test in regressions_ignored:
print_regression(test)
if args.produce_github_summary:
gh_summary.append("</details>")
gh_summary.append("")
if regressions_of_concern:
log.warning("#")
log.warning("# Regressions:")
log.warning("#")
if args.produce_github_summary:
gh_summary.append(f"### {filter_type_capitalized} Regressions")
gh_summary.append(
f"{len(regressions_of_concern)} {args.regression_filter_type} regressions. These regressions warrant a CI failure:"
)
gh_summary.append("")
for test in regressions_of_concern:
print_regression(test, is_warning=True)
if args.produce_github_summary:
gh_summary.append("")

if not args.dry_run:
exit(1) # Exit 1 to trigger github test failure
if args.produce_github_summary:
with open(options.github_summary_filename, "w") as f:
f.write("\n".join(summary))
exit(1) # Exit 1 to trigger Github test failure

log.info("No unexpected regressions found!")
if args.produce_github_summary:
gh_summary.append("No unexpected regressions found!")
with open(options.github_summary_filename, "w") as f:
f.write("\n".join(summary))

else:
log.error("Unsupported operation: exiting.")
exit(1)
2 changes: 2 additions & 0 deletions devops/scripts/benchmarks/options.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,8 @@ class Options:
# CI scripts vs SYCl build source.
github_repo_override: str = None
git_commit_override: str = None
# Filename used to store Github summary files:
github_summary_filename: str = "github_summary.md"
# Archiving settings
# Archived runs are stored separately from the main dataset but are still accessible
# via the HTML UI when "Include archived runs" is enabled.
Expand Down
Loading