[CI] Add Ability to Explain Failures

boomanaiden154 · boomanaiden154 · commit 56e24b215595 · 2025-11-05T16:41:03.000Z
With the premerge advisor infrastructure almost done, we can now request on demand explanations (failing at head or flaky). This patch adds the infrastructure to write out test reports containing this information so we can easily surface it to the user. Pull Request: llvm#166590
diff --git a/.ci/generate_test_report_lib.py b/.ci/generate_test_report_lib.py
@@ -3,8 +3,19 @@
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 """Library to parse JUnit XML files and return a markdown report."""
 
+from typing import TypedDict
+
 from junitparser import JUnitXml, Failure
 
+
+# This data structure should match the definition in llvm-zorg in
+# premerge/advisor/advisor_lib.py
+class FailureExplanation(TypedDict):
+    name: str
+    explained: bool
+    reason: str | None
+
+
 SEE_BUILD_FILE_STR = "Download the build's log file to see the details."
 UNRELATED_FAILURES_STR = (
     "If these failures are unrelated to your changes (for example "
@@ -82,16 +93,29 @@ def find_failure_in_ninja_logs(ninja_logs: list[list[str]]) -> list[tuple[str, s
     return failures
 
 
-def _format_failures(failures: list[tuple[str, str]]) -> list[str]:
+def _format_failures(
+    failures: list[tuple[str, str]], failure_explanations: dict[str, FailureExplanation]
+) -> list[str]:
     """Formats failures into summary views for the report."""
     output = []
     for build_failure in failures:
         failed_action, failure_message = build_failure
+        failure_explanation = None
+        if failed_action in failure_explanations:
+            failure_explanation = failure_explanations[failed_action]
+        output.append("<details>")
+        if failure_explanation:
+            output.extend(
+                [
+                    f"<summary>{failed_action} (Likely Already Failing)</summary>" "",
+                    failure_explanation["reason"],
+                    "",
+                ]
+            )
+        else:
+            output.extend([f"<summary>{failed_action}</summary>", ""])
         output.extend(
             [
-                "<details>",
-                f"<summary>{failed_action}</summary>",
-                "",
                 "```",
                 failure_message,
                 "```",
@@ -132,12 +156,19 @@ def generate_report(
     ninja_logs: list[list[str]],
     size_limit=1024 * 1024,
     list_failures=True,
+    failure_explanations_list: list[FailureExplanation] = [],
 ):
     failures = get_failures(junit_objects)
     tests_run = 0
     tests_skipped = 0
     tests_failed = 0
 
+    failure_explanations: dict[str, FailureExplanation] = {}
+    for failure_explanation in failure_explanations_list:
+        if not failure_explanation["explained"]:
+            continue
+        failure_explanations[failure_explanation["name"]] = failure_explanation
+
     for results in junit_objects:
         for testsuite in results:
             tests_run += testsuite.tests
@@ -176,7 +207,7 @@ def generate_report(
                         "",
                     ]
                 )
-                report.extend(_format_failures(ninja_failures))
+                report.extend(_format_failures(ninja_failures, failure_explanations))
                 report.extend(
                     [
                         "",
@@ -212,7 +243,7 @@ def plural(num_tests):
 
         for testsuite_name, failures in failures.items():
             report.extend(["", f"### {testsuite_name}"])
-            report.extend(_format_failures(failures))
+            report.extend(_format_failures(failures, failure_explanations))
     elif return_code != 0:
         # No tests failed but the build was in a failed state. Bring this to the user's
         # attention.
@@ -237,7 +268,7 @@ def plural(num_tests):
                     "",
                 ]
             )
-            report.extend(_format_failures(ninja_failures))
+            report.extend(_format_failures(ninja_failures, failure_explanations))
 
     if failures or return_code != 0:
         report.extend(["", UNRELATED_FAILURES_STR])
diff --git a/.ci/generate_test_report_lib_test.py b/.ci/generate_test_report_lib_test.py
@@ -781,6 +781,107 @@ def test_report_size_limit(self):
             ),
         )
 
+    def test_report_ninja_explanation(self):
+        self.assertEqual(
+            generate_test_report_lib.generate_report(
+                "Foo",
+                1,
+                [],
+                [
+                    [
+                        "[1/5] test/1.stamp",
+                        "[2/5] test/2.stamp",
+                        "[3/5] test/3.stamp",
+                        "[4/5] test/4.stamp",
+                        "FAILED: test/4.stamp",
+                        "touch test/4.stamp",
+                        "Half Moon Bay.",
+                        "[5/5] test/5.stamp",
+                    ]
+                ],
+                failure_explanations_list=[
+                    {
+                        "name": "test/4.stamp",
+                        "explained": True,
+                        "reason": "Failing at head",
+                    }
+                ],
+            ),
+            dedent(
+                """\
+            # Foo
+
+            The build failed before running any tests. Click on a failure below to see the details.
+
+            <details>
+            <summary>test/4.stamp (Likely Already Failing)</summary>
+            Failing at head
+
+            ```
+            FAILED: test/4.stamp
+            touch test/4.stamp
+            Half Moon Bay.
+            ```
+            </details>
+            
+            If these failures are unrelated to your changes (for example tests are broken or flaky at HEAD), please open an issue at https://github.com/llvm/llvm-project/issues and add the `infrastructure` label."""
+            ),
+        )
+
+    def test_report_test_failure_explanation(self):
+        self.assertEqual(
+            generate_test_report_lib.generate_report(
+                "Foo",
+                1,
+                [
+                    junit_from_xml(
+                        dedent(
+                            """\
+          <?xml version="1.0" encoding="UTF-8"?>
+          <testsuites time="8.89">
+          <testsuite name="Bar" tests="1" failures="1" skipped="0" time="410.63">
+          <testcase classname="Bar/test_3" name="test_3" time="0.02">
+            <failure><![CDATA[Error! Expected Big Sur to be next to the ocean.]]></failure>
+          </testcase>
+          </testsuite>
+          </testsuites>"""
+                        )
+                    )
+                ],
+                [],
+                failure_explanations_list=[
+                    {
+                        "name": "Bar/test_3/test_3",
+                        "explained": True,
+                        "reason": "Big Sur is next to the Pacific.",
+                    }
+                ],
+            ),
+            (
+                dedent(
+                    """\
+          # Foo
+
+          * 1 test failed
+
+          ## Failed Tests
+          (click on a test name to see its output)
+
+          ### Bar
+          <details>
+          <summary>Bar/test_3/test_3 (Likely Already Failing)</summary>
+          Big Sur is next to the Pacific.
+          
+          ```
+          Error! Expected Big Sur to be next to the ocean.
+          ```
+          </details>
+
+          If these failures are unrelated to your changes (for example tests are broken or flaky at HEAD), please open an issue at https://github.com/llvm/llvm-project/issues and add the `infrastructure` label."""
+                )
+            ),
+        )
+
     def test_generate_report_end_to_end(self):
         with tempfile.TemporaryDirectory() as temp_dir:
             junit_xml_file = os.path.join(temp_dir, "junit.xml")