-
Notifications
You must be signed in to change notification settings - Fork 798
[CI][Bench] Create summary reports for benchmarking CI run results #19733
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: sycl
Are you sure you want to change the base?
Changes from 7 commits
0dfe023
d68d054
cfaf3da
2becef0
09cfa77
83ac322
4a3e296
25d930d
22c5cb3
d264d07
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -46,6 +46,24 @@ class BenchmarkHistoricAverage: | |
# TODO Ensure ONEAPI_DEVICE_SELECTOR? GPU name itself? | ||
|
||
|
||
class OutputFile: | ||
""" | ||
Represents a text file to output, but only output the file when manually | ||
specified. | ||
""" | ||
|
||
def __init__(self, output_path: str): | ||
self.output_path = output_path | ||
self.output_content = [] | ||
|
||
def write_file(self): | ||
with open(self.output_path, "w") as f: | ||
f.write("\n".join(self.output_content)) | ||
|
||
def println(self, text: str): | ||
self.output_content.append(text) | ||
|
||
|
||
class Compare: | ||
"""Class containing logic for comparisons between results""" | ||
|
||
|
@@ -348,6 +366,11 @@ def to_hist( | |
action="store_true", | ||
help="Do not return error upon regressions.", | ||
) | ||
parser_avg.add_argument( | ||
"--produce-github-summary", | ||
action="store_true", | ||
help="Produce a github CI summary file.", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. IMO, the help msg should also include the name of the file containing Github summary, |
||
) | ||
|
||
args = parser.parse_args() | ||
|
||
|
@@ -370,6 +393,9 @@ def to_hist( | |
regressions_ignored = [] | ||
regressions_of_concern = [] | ||
if args.regression_filter is not None: | ||
if args.produce_github_summary: | ||
gh_summary = OutputFile("github_summary.md") | ||
|
||
filter_pattern = re.compile(args.regression_filter) | ||
for test in regressions: | ||
if filter_pattern.search(test["name"]): | ||
|
@@ -390,28 +416,82 @@ def print_regression(entry: dict, is_warning: bool = False): | |
log_func(f"-- Run result: {entry['value']}") | ||
log_func(f"-- Delta: {entry['delta']}") | ||
log_func("") | ||
if args.produce_github_summary: | ||
gh_summary.println(f"#### {entry['name']}:") | ||
gh_summary.println( | ||
f"- Historic {entry['avg_type']}: {entry['hist_avg']}" | ||
) | ||
gh_summary.println(f"- Run result: {entry['value']}") | ||
gh_summary.println(f"- Delta: {entry['delta']}") | ||
gh_summary.println("") | ||
|
||
if improvements: | ||
log.info("#") | ||
log.info("# Improvements:") | ||
log.info("#") | ||
if args.produce_github_summary: | ||
gh_summary.println("### Improvements") | ||
gh_summary.println( | ||
f"<details><summary>{len(improvements)} improved tests:</summary>" | ||
) | ||
gh_summary.println("") | ||
for test in improvements: | ||
print_regression(test) | ||
if args.produce_github_summary: | ||
gh_summary.println("</details>") | ||
gh_summary.println("") | ||
if regressions_ignored: | ||
log.info("#") | ||
log.info("# Regressions (filtered out by regression-filter):") | ||
log.info("#") | ||
if args.produce_github_summary: | ||
gh_summary.println("### Regressions") | ||
gh_summary.println( | ||
f"<details><summary>{len(regressions_ignored)} non CI-failing regressions:</summary>" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sorry, could you clarify what you meant by There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hey thanks for taking a look Udit, There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I see. In that case, should we rename it to "non-SYCL regressions"? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also, we don't list "regressions" in the summary where delta is less than the noise threshold, correct? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I feel like that would be less confusing, but I am also aware that other projects use these series of benchmarking scripts as well (i.e. UMF), so I was hesitant to hardcode "non-SYCL" into the titles/descriptions. In hindsight this should perhaps be a customizable option.
That is correct. Noise is ignored. |
||
) | ||
gh_summary.println("") | ||
for test in regressions_ignored: | ||
print_regression(test) | ||
if args.produce_github_summary: | ||
gh_summary.println("</details>") | ||
gh_summary.println("") | ||
if regressions_of_concern: | ||
log.warning("#") | ||
log.warning("# Regressions:") | ||
log.warning("#") | ||
if args.produce_github_summary: | ||
gh_summary.println("### SYCL-Specific Regressions") | ||
gh_summary.println( | ||
"Regressions pertaining to non-experimental " | ||
"SYCL benchmarks. These regressions warrant " | ||
"a CI failure: " | ||
) | ||
gh_summary.println( | ||
f"<details><summary>{len(regressions_of_concern)} CI-failing regressions:</summary>" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same, by There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The specific implementation is "if the benchmark matches a filter (i.e. the benchmark is a SYCL test) and there is a regression above the noise threshold, we fail the SYCL nightly benchmarking CI" There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. IMO, using |
||
) | ||
gh_summary.println("") | ||
for test in regressions_of_concern: | ||
print_regression(test, is_warning=True) | ||
if args.produce_github_summary: | ||
gh_summary.println("</details>") | ||
gh_summary.println("") | ||
|
||
if not args.dry_run: | ||
if args.produce_github_summary: | ||
gh_summary.println("### Failed benchmarks:") | ||
gh_summary.println("") | ||
for test in regressions_of_concern: | ||
gh_summary.println( | ||
f"- {test['name']}: Delta {round(test['delta']*100, 2)}%" | ||
) | ||
gh_summary.write_file() | ||
exit(1) # Exit 1 to trigger github test failure | ||
|
||
log.info("No unexpected regressions found!") | ||
if args.produce_github_summary: | ||
gh_summary.println("No unexpected regressions found!") | ||
gh_summary.write_file() | ||
|
||
else: | ||
log.error("Unsupported operation: exiting.") | ||
exit(1) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Also, just thinking out loud, do we really need this class? As an alternative, we can have a string to keep Github summary and at the end, we can just append this string to the
github_summary.md
file?