pytorch
diff --git a/‎backends/test/suite/__init__.py‎
Lines changed: 6 additions & 0 deletions b/‎backends/test/suite/__init__.py‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎backends/test/suite/conftest.py‎
Lines changed: 109 additions & 0 deletions b/‎backends/test/suite/conftest.py‎
Lines changed: 109 additions & 0 deletions
diff --git a/‎backends/test/suite/flow.py‎
Lines changed: 3 additions & 0 deletions b/‎backends/test/suite/flow.py‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎backends/test/suite/generate_markdown_summary_json.py‎
Lines changed: 251 additions & 0 deletions b/‎backends/test/suite/generate_markdown_summary_json.py‎
Lines changed: 251 additions & 0 deletions
@@ -11,6 +11,7 @@
 import os
 
 import executorch.backends.test.suite.flow
+import torch
 
 from executorch.backends.test.suite.flow import TestFlow
 from executorch.backends.test.suite.runner import runner_main
@@ -55,6 +56,11 @@ def get_test_flows() -> dict[str, TestFlow]:
     return _ALL_TEST_FLOWS
 
 
+def dtype_to_str(dtype: torch.dtype) -> str:
+    # Strip off "torch."
+    return str(dtype)[6:]
+
+
 def load_tests(loader, suite, pattern):
     package_dir = os.path.dirname(__file__)
     discovered_suite = loader.discover(
 
@@ -0,0 +1,109 @@
+import pytest
+import torch
+
+from executorch.backends.test.suite.flow import TestFlow, all_flows
+from executorch.backends.test.suite.reporting import _sum_op_counts
+from executorch.backends.test.suite.runner import run_test
+
+from typing import Any
+
+BACKENDS = ["xnnpack", "coreml", "vulkan", "qnn", "arm"]
+
+def pytest_configure(config):
+    for backend in BACKENDS:
+        config.addinivalue_line("markers", f"backend_{backend}: mark a test as testing the {backend} backend")
+    
+class TestRunner:
+    def __init__(self, flow, test_name, test_base_name):
+        self._flow = flow
+        self._test_name = test_name
+        self._test_base_name = test_base_name
+        self._subtest = 0
+        self._results = []
+
+    def lower_and_run_model(self, model: torch.nn.Module, inputs: Any, generate_random_test_inputs=True):
+        run_summary = run_test(
+            model,
+            inputs,
+            self._flow,
+            self._test_name,
+            self._test_base_name,
+            self._subtest,
+            None,
+            generate_random_test_inputs=generate_random_test_inputs,
+        )
+
+        self._subtest += 1
+        self._results.append(run_summary)
+
+        if not run_summary.result.is_success():
+            raise RuntimeError("Test failure.") from run_summary.error
+            if run_summary.result.is_backend_failure():
+                raise RuntimeError("Test failure.") from run_summary.error
+            else:
+                # Non-backend failure indicates a bad test. Mark as skipped.
+                pytest.skip(
+                    f"Test failed for reasons other than backend failure. Error: {run_summary.error}"
+                )
+
+@pytest.fixture(params=all_flows().values(), ids=str)
+def test_runner(request):
+    return TestRunner(request.param, request.node.name, request.node.originalname)
+
+@pytest.hookimpl(optionalhook=True)
+def pytest_json_runtest_metadata(item, call):
+    metadata = {
+        "subtests": []
+    }
+
+    if hasattr(item, "funcargs") and "test_runner" in item.funcargs:
+        runner_instance = item.funcargs["test_runner"]
+
+        for record in runner_instance._results:
+            subtest_metadata = {}
+
+            error_message = ""
+            if record.error is not None:
+                error_str = str(record.error)
+                if len(error_str) > 400:
+                    error_message = error_str[:200] + "..." + error_str[-200:]
+                else:
+                    error_message = error_str
+
+            subtest_metadata["Test ID"] = record.name
+            subtest_metadata["Test Case"] = record.base_name
+            subtest_metadata["Subtest"] = record.subtest_index
+            subtest_metadata["Flow"] = record.flow
+            subtest_metadata["Params"] = record.params
+            subtest_metadata["Result"] = record.result.to_short_str()
+            subtest_metadata["Result Detail"] = record.result.to_detail_str()
+            subtest_metadata["Error"] = error_message
+            subtest_metadata["Delegated"] = "True" if record.is_delegated() else "False"
+            subtest_metadata["Quantize Time (s)"] = (
+                f"{record.quantize_time.total_seconds():.3f}"
+                if record.quantize_time
+                else None
+            )
+            subtest_metadata["Lower Time (s)"] = (
+                f"{record.lower_time.total_seconds():.3f}" if record.lower_time else None
+            )
+
+            for output_idx, error_stats in enumerate(record.tensor_error_statistics):
+                subtest_metadata[f"Output {output_idx} Error Max"] = f"{error_stats.error_max:.3f}"
+                subtest_metadata[f"Output {output_idx} Error MAE"] = f"{error_stats.error_mae:.3f}"
+                subtest_metadata[f"Output {output_idx} SNR"] = f"{error_stats.sqnr:.3f}"
+
+            subtest_metadata["Delegated Nodes"] = _sum_op_counts(record.delegated_op_counts)
+            subtest_metadata["Undelegated Nodes"] = _sum_op_counts(record.undelegated_op_counts)
+            if record.delegated_op_counts:
+                subtest_metadata["Delegated Ops"] = dict(record.delegated_op_counts)
+            if record.undelegated_op_counts:
+                subtest_metadata["Undelegated Ops"] = dict(record.undelegated_op_counts)
+            subtest_metadata["PTE Size (Kb)"] = (
+                f"{record.pte_size_bytes / 1000.0:.3f}" if record.pte_size_bytes else ""
+            )
+
+            metadata["subtests"].append(subtest_metadata)
+            
+        
+    return metadata
@@ -44,6 +44,9 @@ class TestFlow:
     def should_skip_test(self, test_name: str) -> bool:
         return any(pattern in test_name for pattern in self.skip_patterns)
 
+    def __str__(self):
+        return self.name
+
 
 def all_flows() -> dict[str, TestFlow]:
     flows = []
 
@@ -0,0 +1,251 @@
+import argparse
+import csv
+import functools
+import json
+import sys
+
+from dataclasses import dataclass, field
+
+
+@dataclass
+class ResultCounts:
+    """
+    Represents aggregated result counts for each status.
+    """
+
+    total: int = 0
+    passes: int = 0
+    fails: int = 0
+    skips: int = 0
+    by_detail: dict[str, int] = field(default_factory=lambda: {})
+
+    def add_row(self, result_value: str, result_detail: str) -> None:
+        """
+        Update the result counts for the specified row.
+        """
+
+        self.total += 1
+
+        if result_value == "Pass":
+            self.passes += 1
+        elif result_value == "Fail":
+            self.fails += 1
+        elif result_value == "Skip":
+            self.skips += 1
+        else:
+            raise RuntimeError(f"Unknown result value {result_value}")
+
+        if result_detail:
+            if result_detail not in self.by_detail:
+                self.by_detail[result_detail] = 0
+
+            self.by_detail[result_detail] += 1
+
+
+@dataclass
+class AggregatedSummary:
+    """
+    Represents aggegrated summary data for the test run.
+    """
+
+    counts: ResultCounts
+    counts_by_params: dict[str, ResultCounts]
+    failed_tests: list[list[str]]
+
+
+#
+# A standalone script to generate a Markdown representation of a test report.
+# This is primarily intended to be used with GitHub actions to generate a nice
+# representation of the test results when looking at the action run.
+#
+# Usage: python executorch/backends/test/suite/generate_markdown_summary.py <path to test report CSV file>
+# Markdown is written to stdout.
+#
+
+
+def aggregate_results(json_path: str) -> AggregatedSummary:
+    with open(json_path) as f:
+        data = json.load(f)
+
+    # Count results and prepare data
+    counts = ResultCounts()
+    failed_tests = []
+    counts_by_param = {}
+
+    for test_data in data["tests"]:
+        result_meta = test_data.get("metadata")
+        if result_meta:
+            for subtest_meta in result_meta["subtests"]:
+                result = subtest_meta["Result"]
+                result_detail = subtest_meta.get("Result Detail") or ""
+
+                counts.add_row(result, result_detail)
+
+                params = subtest_meta["Params"]
+                if params:
+                    if params not in counts_by_param:
+                        counts_by_param[params] = ResultCounts()
+                    counts_by_param[params].add_row(result, result_detail)
+
+                if result.lower() == "fail":
+                    failed_tests.append(subtest_meta)
+
+    return AggregatedSummary(
+        counts=counts,
+        failed_tests=failed_tests,
+        counts_by_params=counts_by_param,
+    )
+
+
+def escape_for_markdown(text: str) -> str:
+    """
+    Modify a string to properly display in a markdown table cell.
+    """
+    if not text:
+        return text
+
+    # Replace newlines with <br /> tags
+    escaped = text.replace("\n", "<br />")
+
+    # Escape backslashes.
+    escaped = escaped.replace("\\", "\\\\")
+
+    # Escape pipe characters that would break table structure
+    escaped = escaped.replace("|", "\\|")
+
+    return escaped
+
+
+def generate_markdown(json_path: str, exit_code: int = 0):  # noqa (C901)
+    # Print warning if exit code is non-zero
+    if exit_code != 0:
+        print("> [!WARNING]")
+        print(
+            f"> Exit code {exit_code} was non-zero. Test process may have crashed. Check the job logs for more information.\n"
+        )
+
+    results = aggregate_results(json_path)
+
+    # Generate Summary section
+    print("# Summary\n")
+    total_excluding_skips = results.counts.passes + results.counts.fails
+    pass_fraction = results.counts.passes / total_excluding_skips
+    fail_fraction = results.counts.fails / total_excluding_skips
+    print(
+        f"- **Pass**: {results.counts.passes}/{total_excluding_skips} ({pass_fraction*100:.2f}%)"
+    )
+    print(
+        f"- **Fail**: {results.counts.fails}/{total_excluding_skips} ({fail_fraction*100:.2f}%)"
+    )
+    print(f"- **Skip**: {results.counts.skips}")
+
+    if results.counts_by_params:
+        print("\n## Results by Parameters\n")
+
+        # Extract all unique parameter keys from the JSON strings
+        all_param_keys = set()
+        parsed_params = {}
+
+        for params_str in results.counts_by_params.keys():
+            # Parse the JSON string (it's a string representation of a dict)
+            params_dict = json.loads(params_str)
+            parsed_params[params_str] = params_dict
+            all_param_keys.update(params_dict.keys())
+
+        if parsed_params and len(parsed_params) > 1:
+            # Sort parameter keys for consistent column ordering
+            sorted_param_keys = sorted(all_param_keys)
+
+            # Create table header
+            header_cols = sorted_param_keys + ["Pass", "Fail", "Skip", "Pass %"]
+            print("| " + " | ".join(header_cols) + " |")
+            print("|" + "|".join(["---"] * len(header_cols)) + "|")
+
+            # Create table rows
+            for params_str, counts in results.counts_by_params.items():
+                if params_str in parsed_params:
+                    params_dict = parsed_params[params_str]
+                    row_values = []
+
+                    # Add parameter values
+                    for key in sorted_param_keys:
+                        value = params_dict.get(key, "")
+                        row_values.append(str(value))
+
+                    pass_fraction = counts.passes / (counts.passes + counts.fails)
+
+                    # Add count values
+                    row_values.extend(
+                        [
+                            str(counts.passes),
+                            str(counts.fails),
+                            str(counts.skips),
+                            f"{pass_fraction*100:.2f}%",
+                        ]
+                    )
+
+                    print("| " + " | ".join(row_values) + " |")
+
+        print()
+
+    print("## Failure Breakdown:")
+    total_rows_with_result_detail = sum(results.counts.by_detail.values())
+    for detail, count in sorted(results.counts.by_detail.items()):
+        print(f"- **{detail}**: {count}/{total_rows_with_result_detail}")
+
+    # Generate Failed Tests section
+    print("# Failed Tests\n")
+    if results.failed_tests:
+        header = build_header(results.failed_tests)
+
+        escaped_header = [escape_for_markdown(col) for col in header.keys()]
+        print("| " + " | ".join(escaped_header) + " |")
+        print("|" + "|".join(["---"] * len(escaped_header)) + "|")
+        for rec in results.failed_tests:
+            row = build_row(rec, header)
+            print("| " + " | ".join(row) + " |")
+    else:
+        print("No failed tests.\n")
+
+
+def build_header(data) -> dict[str, int]:
+    """
+    Find the union of all keys and return a dict of header keys and indices. Try to preserve
+    ordering as much as possible.
+    """
+
+    keys = max(data, key=len)
+
+    header = {
+        k:i for (i,k) in enumerate(keys)
+    }
+
+    for rec in data:
+        keys = set(rec.keys())
+        for k in keys:
+            if k not in header:
+                header[k] = len(header)
+    
+    return header
+
+def build_row(rec, header: dict[str, int]) -> list[str]:
+    row = [""] * len(header)
+    for k, v in rec.items():
+        row[header[k]] = escape_for_markdown(str(v))
+    return row
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Generate a Markdown representation of a test report."
+    )
+    parser.add_argument("csv_path", help="Path to the test report CSV file.")
+    parser.add_argument(
+        "--exit-code", type=int, default=0, help="Exit code from the test process."
+    )
+    args = parser.parse_args()
+    generate_markdown(args.csv_path, args.exit_code)
+
+
+if __name__ == "__main__":
+    main()