Update

GregoryComer · GregoryComer · commit 1b9315e5e05d · 2025-08-11T22:58:54.000-07:00
[ghstack-poisoned]
diff --git a/backends/test/suite/reporting.py b/backends/test/suite/reporting.py
@@ -1,7 +1,7 @@
 import csv
 
 from collections import Counter
-from dataclasses import dataclass
+from dataclasses import dataclass, field
 from datetime import timedelta
 from enum import IntEnum
 from functools import reduce
@@ -205,11 +205,15 @@ def is_delegated(self):
         )
 
 
+@dataclass
 class TestSessionState:
-    test_case_summaries: list[TestCaseSummary]
+    # True if the CSV header has been written to report__path.
+    has_written_report_header: bool = False
+
+    # The file path to write the detail report to, if enabled.
+    report_path: str | None = None
 
-    def __init__(self):
-        self.test_case_summaries = []
+    test_case_summaries: list[TestCaseSummary] = field(default_factory=list)
 
 
 @dataclass
@@ -287,11 +291,11 @@ def count_ops(program: dict[str, ExportedProgram] | ExportedProgram) -> Counter:
         )
 
 
-def begin_test_session():
+def begin_test_session(report_path: str | None):
     global _active_session
 
     assert _active_session is None, "A test session is already active."
-    _active_session = TestSessionState()
+    _active_session = TestSessionState(report_path=report_path)
 
 
 def log_test_summary(summary: TestCaseSummary):
@@ -300,6 +304,15 @@ def log_test_summary(summary: TestCaseSummary):
     if _active_session is not None:
         _active_session.test_case_summaries.append(summary)
 
+        if _active_session.report_path is not None:
+            file_mode = "a" if _active_session.has_written_report_header else "w"
+            with open(_active_session.report_path, file_mode) as f:
+                if not _active_session.has_written_report_header:
+                    write_csv_header(f)
+                    _active_session.has_written_report_header = True
+
+                write_csv_row(summary, f)
+
 
 def complete_test_session() -> RunSummary:
     global _active_session
@@ -318,6 +331,13 @@ def _sum_op_counts(counter: Counter | None) -> int | None:
     return sum(counter.values()) if counter is not None else None
 
 
+def _serialize_params(params: dict[str, Any] | None) -> str:
+    if params is not None:
+        return str(dict(sorted(params.items())))
+    else:
+        return ""
+
+
 def _serialize_op_counts(counter: Counter | None) -> str:
     """
     A utility function to serialize op counts to a string, for the purpose of including
@@ -329,19 +349,10 @@ def _serialize_op_counts(counter: Counter | None) -> str:
         return ""
 
 
-def generate_csv_report(summary: RunSummary, output: TextIO):
-    """Write a run summary report to a file in CSV format."""
+def write_csv_header(output: TextIO):
+    writer = csv.DictWriter(output, CSV_FIELD_NAMES)
+    writer.writeheader()
 
-    field_names = [
-        "Test ID",
-        "Test Case",
-        "Flow",
-        "Result",
-        "Result Detail",
-        "Delegated",
-        "Quantize Time (s)",
-        "Lower Time (s)",
-    ]
 
 def write_csv_row(record: TestCaseSummary, output: TextIO):
     writer = csv.DictWriter(output, CSV_FIELD_NAMES)
@@ -360,68 +371,28 @@ def write_csv_row(record: TestCaseSummary, output: TextIO):
             if record.quantize_time
             else None
         ),
-        set(),
-    )
-    field_names += (s.capitalize() for s in param_names)
-
-    # Add tensor error statistic field names for each output index.
-    max_outputs = max(
-        len(s.tensor_error_statistics) for s in summary.test_case_summaries
-    )
-    for i in range(max_outputs):
-        field_names.extend(
-            [
-                f"Output {i} Error Max",
-                f"Output {i} Error MAE",
-                f"Output {i} SNR",
-            ]
-        )
-    field_names.extend(
-        [
-            "Delegated Nodes",
-            "Undelegated Nodes",
-            "Delegated Ops",
-            "Undelegated Ops",
-            "PTE Size (Kb)",
-        ]
+        "Lower Time (s)": (
+            f"{record.lower_time.total_seconds():.3f}" if record.lower_time else None
+        ),
+    }
+
+    for output_idx, error_stats in enumerate(record.tensor_error_statistics):
+        if output_idx >= MAX_LOGGED_MODEL_OUTPUTS:
+            print(
+                f"Model output stats are truncated as model has more than {MAX_LOGGED_MODEL_OUTPUTS} outputs. Consider increasing MAX_LOGGED_MODEL_OUTPUTS."
+            )
+            break
+
+        row[f"Output {output_idx} Error Max"] = f"{error_stats.error_max:.3f}"
+        row[f"Output {output_idx} Error MAE"] = f"{error_stats.error_mae:.3f}"
+        row[f"Output {output_idx} SNR"] = f"{error_stats.sqnr:.3f}"
+
+    row["Delegated Nodes"] = _sum_op_counts(record.delegated_op_counts)
+    row["Undelegated Nodes"] = _sum_op_counts(record.undelegated_op_counts)
+    row["Delegated Ops"] = _serialize_op_counts(record.delegated_op_counts)
+    row["Undelegated Ops"] = _serialize_op_counts(record.undelegated_op_counts)
+    row["PTE Size (Kb)"] = (
+        f"{record.pte_size_bytes / 1000.0:.3f}" if record.pte_size_bytes else ""
     )
 
-    writer = csv.DictWriter(output, field_names)
-    writer.writeheader()
-
-    for record in summary.test_case_summaries:
-        row = {
-            "Test ID": record.name,
-            "Test Case": record.base_name,
-            "Flow": record.flow,
-            "Result": record.result.to_short_str(),
-            "Result Detail": record.result.to_detail_str(),
-            "Delegated": "True" if record.is_delegated() else "False",
-            "Quantize Time (s)": (
-                f"{record.quantize_time.total_seconds():.3f}"
-                if record.quantize_time
-                else None
-            ),
-            "Lower Time (s)": (
-                f"{record.lower_time.total_seconds():.3f}"
-                if record.lower_time
-                else None
-            ),
-        }
-        if record.params is not None:
-            row.update({k.capitalize(): v for k, v in record.params.items()})
-
-        for output_idx, error_stats in enumerate(record.tensor_error_statistics):
-            row[f"Output {output_idx} Error Max"] = f"{error_stats.error_max:.3f}"
-            row[f"Output {output_idx} Error MAE"] = f"{error_stats.error_mae:.3f}"
-            row[f"Output {output_idx} SNR"] = f"{error_stats.sqnr:.3f}"
-
-        row["Delegated Nodes"] = _sum_op_counts(record.delegated_op_counts)
-        row["Undelegated Nodes"] = _sum_op_counts(record.undelegated_op_counts)
-        row["Delegated Ops"] = _serialize_op_counts(record.delegated_op_counts)
-        row["Undelegated Ops"] = _serialize_op_counts(record.undelegated_op_counts)
-        row["PTE Size (Kb)"] = (
-            f"{record.pte_size_bytes / 1000.0:.3f}" if record.pte_size_bytes else ""
-        )
-
-        writer.writerow(row)
+    writer.writerow(row)
diff --git a/backends/test/suite/runner.py b/backends/test/suite/runner.py
@@ -25,7 +25,6 @@
     begin_test_session,
     complete_test_session,
     count_ops,
-    generate_csv_report,
     RunSummary,
     TestCaseSummary,
     TestResult,
@@ -250,7 +249,7 @@ def build_test_filter(args: argparse.Namespace) -> TestFilter:
 def runner_main():
     args = parse_args()
 
-    begin_test_session()
+    begin_test_session(args.report)
 
     if len(args.suite) > 1:
         raise NotImplementedError("TODO Support multiple suites.")
@@ -265,11 +264,6 @@ def runner_main():
     summary = complete_test_session()
     print_summary(summary)
 
-    if args.report is not None:
-        with open(args.report, "w") as f:
-            print(f"Writing CSV report to {args.report}.")
-            generate_csv_report(summary, f)
-
 
 if __name__ == "__main__":
     runner_main()
diff --git a/backends/test/suite/tests/test_reporting.py b/backends/test/suite/tests/test_reporting.py
@@ -9,11 +9,12 @@
 
 from ..reporting import (
     count_ops,
-    generate_csv_report,
     RunSummary,
     TestCaseSummary,
     TestResult,
     TestSessionState,
+    write_csv_header,
+    write_csv_row,
 )
 
 # Test data for simulated test results.
@@ -69,7 +70,9 @@ def test_csv_report_simple(self):
         run_summary = RunSummary.from_session(session_state)
 
         strio = StringIO()
-        generate_csv_report(run_summary, strio)
+        write_csv_header(strio)
+        for case_summary in run_summary.test_case_summaries:
+            write_csv_row(case_summary, strio)
 
         # Attempt to deserialize and validate the CSV report.
         report = DictReader(StringIO(strio.getvalue()))
@@ -81,32 +84,28 @@ def test_csv_report_simple(self):
         self.assertEqual(records[0]["Test Case"], "test1")
         self.assertEqual(records[0]["Flow"], "flow1")
         self.assertEqual(records[0]["Result"], "Pass")
-        self.assertEqual(records[0]["Dtype"], "")
-        self.assertEqual(records[0]["Use_dynamic_shapes"], "")
+        self.assertEqual(records[0]["Params"], "")
 
         # Validate second record: test1, backend2, LOWER_FAIL
         self.assertEqual(records[1]["Test ID"], "test1_backend2_flow1")
         self.assertEqual(records[1]["Test Case"], "test1")
         self.assertEqual(records[1]["Flow"], "flow1")
         self.assertEqual(records[1]["Result"], "Fail")
-        self.assertEqual(records[1]["Dtype"], "")
-        self.assertEqual(records[1]["Use_dynamic_shapes"], "")
+        self.assertEqual(records[1]["Params"], "")
 
         # Validate third record: test2, backend1, SUCCESS_UNDELEGATED with dtype param
         self.assertEqual(records[2]["Test ID"], "test2_backend1_flow1")
         self.assertEqual(records[2]["Test Case"], "test2")
         self.assertEqual(records[2]["Flow"], "flow1")
         self.assertEqual(records[2]["Result"], "Pass")
-        self.assertEqual(records[2]["Dtype"], str(torch.float32))
-        self.assertEqual(records[2]["Use_dynamic_shapes"], "")
+        self.assertEqual(records[2]["Params"], str({"dtype": torch.float32}))
 
         # Validate fourth record: test2, backend2, EXPORT_FAIL with use_dynamic_shapes param
         self.assertEqual(records[3]["Test ID"], "test2_backend2_flow1")
         self.assertEqual(records[3]["Test Case"], "test2")
         self.assertEqual(records[3]["Flow"], "flow1")
         self.assertEqual(records[3]["Result"], "Skip")
-        self.assertEqual(records[3]["Dtype"], "")
-        self.assertEqual(records[3]["Use_dynamic_shapes"], "True")
+        self.assertEqual(records[3]["Params"], str({"use_dynamic_shapes": True}))
 
     def test_count_ops(self):
         """