intel · uditagarwal97 · Aug 19, 2025 · Aug 6, 2025 · Aug 6, 2025 · Aug 6, 2025
@@ -170,14 +170,17 @@ runs:
         --results-dir "./llvm-ci-perf-results/results/" \
         --regression-filter '^[a-z_]+_sycl ' \
         --verbose \
+        --produce-github-summary \
         ${{ inputs.dry_run == 'true' && '--dry-run' || '' }} \
 
       echo "-----"
 
-  - name: Cache changes to benchmark folder for archival purposes
+  - name: Cache changes and upload github summary
     if: always()
     shell: bash
     run: | 
+      [ -f "github_summary.md" ] && cat github_summary.md >> $GITHUB_STEP_SUMMARY
+
       cd "./llvm-ci-perf-results"
       git add .
       for diff in $(git diff HEAD --name-only); do

@@ -46,6 +46,24 @@ class BenchmarkHistoricAverage:
     # TODO Ensure ONEAPI_DEVICE_SELECTOR? GPU name itself?
 
 
+class OutputFile:
+    """
+    Represents a text file to output, but only output the file when manually
+    specified.
+    """
+
+    def __init__(self, output_path: str):
+        self.output_path = output_path
+        self.output_content = []
+
+    def write_file(self):
+        with open(self.output_path, "w") as f:
+            f.write("\n".join(self.output_content))
+
+    def println(self, text: str):
+        self.output_content.append(text)
+
+
 class Compare:
     """Class containing logic for comparisons between results"""
 
@@ -348,6 +366,11 @@ def to_hist(
         action="store_true",
         help="Do not return error upon regressions.",
     )
+    parser_avg.add_argument(
+        "--produce-github-summary",
+        action="store_true",
+        help="Produce a github CI summary file.",
+    )
 
     args = parser.parse_args()
 
@@ -370,6 +393,9 @@ def to_hist(
         regressions_ignored = []
         regressions_of_concern = []
         if args.regression_filter is not None:
+            if args.produce_github_summary:
+                gh_summary = OutputFile("github_summary.md")
+
             filter_pattern = re.compile(args.regression_filter)
             for test in regressions:
                 if filter_pattern.search(test["name"]):
@@ -390,28 +416,82 @@ def print_regression(entry: dict, is_warning: bool = False):
             log_func(f"-- Run result: {entry['value']}")
             log_func(f"-- Delta: {entry['delta']}")
             log_func("")
+            if args.produce_github_summary:
+                gh_summary.println(f"#### {entry['name']}:")
+                gh_summary.println(
+                    f"- Historic {entry['avg_type']}: {entry['hist_avg']}"
+                )
+                gh_summary.println(f"- Run result: {entry['value']}")
+                gh_summary.println(f"- Delta: {entry['delta']}")
+                gh_summary.println("")
 
         if improvements:
             log.info("#")
             log.info("# Improvements:")
             log.info("#")
+            if args.produce_github_summary:
+                gh_summary.println("### Improvements")
+                gh_summary.println(
+                    f"<details><summary>{len(improvements)} improved tests:</summary>"
+                )
+                gh_summary.println("")
             for test in improvements:
                 print_regression(test)
+            if args.produce_github_summary:
+                gh_summary.println("</details>")
+                gh_summary.println("")
         if regressions_ignored:
             log.info("#")
             log.info("# Regressions (filtered out by regression-filter):")
             log.info("#")
+            if args.produce_github_summary:
+                gh_summary.println("### Regressions")
+                gh_summary.println(
+                    f"<details><summary>{len(regressions_ignored)} non CI-failing regressions:</summary>"
+                )
+                gh_summary.println("")
             for test in regressions_ignored:
                 print_regression(test)
+            if args.produce_github_summary:
+                gh_summary.println("</details>")
+                gh_summary.println("")
         if regressions_of_concern:
             log.warning("#")
             log.warning("# Regressions:")
             log.warning("#")
+            if args.produce_github_summary:
+                gh_summary.println("### SYCL-Specific Regressions")
+                gh_summary.println(
+                    "Regressions pertaining to non-experimental "
+                    "SYCL benchmarks. These regressions warrant "
+                    "a CI failure: "
+                )
+                gh_summary.println(
+                    f"<details><summary>{len(regressions_of_concern)} CI-failing regressions:</summary>"
+                )
+                gh_summary.println("")
             for test in regressions_of_concern:
                 print_regression(test, is_warning=True)
+            if args.produce_github_summary:
+                gh_summary.println("</details>")
+                gh_summary.println("")
+
             if not args.dry_run:
+                if args.produce_github_summary:
+                    gh_summary.println("### Failed benchmarks:")
+                    gh_summary.println("")
+                    for test in regressions_of_concern:
+                        gh_summary.println(
+                            f"- {test['name']}: Delta {round(test['delta']*100, 2)}%"
+                        )
+                    gh_summary.write_file()
                 exit(1)  # Exit 1 to trigger github test failure
+
         log.info("No unexpected regressions found!")
+        if args.produce_github_summary:
+            gh_summary.println("No unexpected regressions found!")
+            gh_summary.write_file()
+
     else:
         log.error("Unsupported operation: exiting.")
         exit(1)