From 6f85fc13f24933c1df045ce897b89694a8833ccc Mon Sep 17 00:00:00 2001 From: Gregory Comer Date: Mon, 11 Aug 2025 18:47:04 -0700 Subject: [PATCH 1/2] Update [ghstack-poisoned] --- backends/test/suite/reporting.py | 63 +++++++++++++++------ backends/test/suite/tests/test_reporting.py | 12 ++-- 2 files changed, 51 insertions(+), 24 deletions(-) diff --git a/backends/test/suite/reporting.py b/backends/test/suite/reporting.py index 93a93f76283..cb37ded947e 100644 --- a/backends/test/suite/reporting.py +++ b/backends/test/suite/reporting.py @@ -57,6 +57,36 @@ def is_non_backend_failure(self): def is_backend_failure(self): return not self.is_success() and not self.is_non_backend_failure() + + def to_short_str(self): + if self in { TestResult.SUCCESS, TestResult.SUCCESS_UNDELEGATED }: + return "Pass" + elif self == TestResult.SKIPPED: + return "Skip" + else: + return "Fail" + + def to_detail_str(self): + if self == TestResult.SUCCESS: + return "" + elif self == TestResult.SUCCESS_UNDELEGATED: + return "" + elif self == TestResult.SKIPPED: + return "" + elif self == TestResult.QUANTIZE_FAIL: + return "Quantization Failed" + elif self == TestResult.LOWER_FAIL: + return "Lowering Failed" + elif self == TestResult.PTE_LOAD_FAIL: + return "PTE Load Failed" + elif self == TestResult.PTE_RUN_FAIL: + return "PTE Run Failed" + elif self == TestResult.OUTPUT_MISMATCH_FAIL: + return "Output Mismatch" + elif self == TestResult.UNKNOWN_FAIL: + return "Unknown Failure" + else: + raise ValueError(f"Invalid TestResult value: {self}.") def display_name(self): if self == TestResult.SUCCESS: @@ -129,6 +159,9 @@ class TestCaseSummary: pte_size_bytes: int | None = None """ The size of the PTE file in bytes. """ + def is_delegated(self): + return any(v > 0 for v in self.delegated_op_counts.values()) if self.delegated_op_counts else False + class TestSessionState: test_case_summaries: list[TestCaseSummary] @@ -260,11 +293,12 @@ def generate_csv_report(summary: RunSummary, output: TextIO): field_names = [ "Test ID", "Test Case", - "Backend", "Flow", "Result", + "Result Detail", + "Delegated", "Quantize Time (s)", - "Lowering Time (s)", + "Lower Time (s)", ] # Tests can have custom parameters. We'll want to report them here, so we need @@ -289,9 +323,7 @@ def generate_csv_report(summary: RunSummary, output: TextIO): [ f"Output {i} Error Max", f"Output {i} Error MAE", - f"Output {i} Error MSD", - f"Output {i} Error L2", - f"Output {i} SQNR", + f"Output {i} SNR", ] ) field_names.extend( @@ -311,32 +343,31 @@ def generate_csv_report(summary: RunSummary, output: TextIO): row = { "Test ID": record.name, "Test Case": record.base_name, - "Backend": record.backend, "Flow": record.flow, - "Result": record.result.display_name(), + "Result": record.result.to_short_str(), + "Result Detail": record.result.to_detail_str(), + "Delegated": "True" if record.is_delegated() else "False", "Quantize Time (s)": ( - record.quantize_time.total_seconds() if record.quantize_time else None + f"{record.quantize_time.total_seconds():.3f}" if record.quantize_time else None ), - "Lowering Time (s)": ( - record.lower_time.total_seconds() if record.lower_time else None + "Lower Time (s)": ( + f"{record.lower_time.total_seconds():.3f}" if record.lower_time else None ), } if record.params is not None: row.update({k.capitalize(): v for k, v in record.params.items()}) for output_idx, error_stats in enumerate(record.tensor_error_statistics): - row[f"Output {output_idx} Error Max"] = error_stats.error_max - row[f"Output {output_idx} Error MAE"] = error_stats.error_mae - row[f"Output {output_idx} Error MSD"] = error_stats.error_msd - row[f"Output {output_idx} Error L2"] = error_stats.error_l2_norm - row[f"Output {output_idx} SQNR"] = error_stats.sqnr + row[f"Output {output_idx} Error Max"] = f"{error_stats.error_max:.3f}" + row[f"Output {output_idx} Error MAE"] = f"{error_stats.error_mae:.3f}" + row[f"Output {output_idx} SNR"] = f"{error_stats.sqnr:.3f}" row["Delegated Nodes"] = _sum_op_counts(record.delegated_op_counts) row["Undelegated Nodes"] = _sum_op_counts(record.undelegated_op_counts) row["Delegated Ops"] = _serialize_op_counts(record.delegated_op_counts) row["Undelegated Ops"] = _serialize_op_counts(record.undelegated_op_counts) row["PTE Size (Kb)"] = ( - record.pte_size_bytes / 1000.0 if record.pte_size_bytes else "" + f"{record.pte_size_bytes / 1000.0:.3f}" if record.pte_size_bytes else "" ) writer.writerow(row) diff --git a/backends/test/suite/tests/test_reporting.py b/backends/test/suite/tests/test_reporting.py index 5eab5648335..c3324b58332 100644 --- a/backends/test/suite/tests/test_reporting.py +++ b/backends/test/suite/tests/test_reporting.py @@ -79,36 +79,32 @@ def test_csv_report_simple(self): # Validate first record: test1, backend1, SUCCESS self.assertEqual(records[0]["Test ID"], "test1_backend1_flow1") self.assertEqual(records[0]["Test Case"], "test1") - self.assertEqual(records[0]["Backend"], "backend1") self.assertEqual(records[0]["Flow"], "flow1") - self.assertEqual(records[0]["Result"], "Success (Delegated)") + self.assertEqual(records[0]["Result"], "Pass") self.assertEqual(records[0]["Dtype"], "") self.assertEqual(records[0]["Use_dynamic_shapes"], "") # Validate second record: test1, backend2, LOWER_FAIL self.assertEqual(records[1]["Test ID"], "test1_backend2_flow1") self.assertEqual(records[1]["Test Case"], "test1") - self.assertEqual(records[1]["Backend"], "backend2") self.assertEqual(records[1]["Flow"], "flow1") - self.assertEqual(records[1]["Result"], "Fail (Lowering)") + self.assertEqual(records[1]["Result"], "Fail") self.assertEqual(records[1]["Dtype"], "") self.assertEqual(records[1]["Use_dynamic_shapes"], "") # Validate third record: test2, backend1, SUCCESS_UNDELEGATED with dtype param self.assertEqual(records[2]["Test ID"], "test2_backend1_flow1") self.assertEqual(records[2]["Test Case"], "test2") - self.assertEqual(records[2]["Backend"], "backend1") self.assertEqual(records[2]["Flow"], "flow1") - self.assertEqual(records[2]["Result"], "Success (Undelegated)") + self.assertEqual(records[2]["Result"], "Pass") self.assertEqual(records[2]["Dtype"], str(torch.float32)) self.assertEqual(records[2]["Use_dynamic_shapes"], "") # Validate fourth record: test2, backend2, EXPORT_FAIL with use_dynamic_shapes param self.assertEqual(records[3]["Test ID"], "test2_backend2_flow1") self.assertEqual(records[3]["Test Case"], "test2") - self.assertEqual(records[3]["Backend"], "backend2") self.assertEqual(records[3]["Flow"], "flow1") - self.assertEqual(records[3]["Result"], "Skipped") + self.assertEqual(records[3]["Result"], "Skip") self.assertEqual(records[3]["Dtype"], "") self.assertEqual(records[3]["Use_dynamic_shapes"], "True") From 78086b4ae523d9c889d847b1ce756585c02916af Mon Sep 17 00:00:00 2001 From: Gregory Comer Date: Mon, 11 Aug 2025 22:24:23 -0700 Subject: [PATCH 2/2] Update [ghstack-poisoned] --- backends/test/suite/flow.py | 2 +- backends/test/suite/reporting.py | 20 ++++++++++++++------ 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/backends/test/suite/flow.py b/backends/test/suite/flow.py index 4324db46796..8f47ebf0ebd 100644 --- a/backends/test/suite/flow.py +++ b/backends/test/suite/flow.py @@ -1,6 +1,6 @@ import logging -from dataclasses import dataclass, field +from dataclasses import dataclass from typing import Callable from executorch.backends.test.harness import Tester diff --git a/backends/test/suite/reporting.py b/backends/test/suite/reporting.py index cb37ded947e..a19c63dd474 100644 --- a/backends/test/suite/reporting.py +++ b/backends/test/suite/reporting.py @@ -57,15 +57,15 @@ def is_non_backend_failure(self): def is_backend_failure(self): return not self.is_success() and not self.is_non_backend_failure() - + def to_short_str(self): - if self in { TestResult.SUCCESS, TestResult.SUCCESS_UNDELEGATED }: + if self in {TestResult.SUCCESS, TestResult.SUCCESS_UNDELEGATED}: return "Pass" elif self == TestResult.SKIPPED: return "Skip" else: return "Fail" - + def to_detail_str(self): if self == TestResult.SUCCESS: return "" @@ -160,7 +160,11 @@ class TestCaseSummary: """ The size of the PTE file in bytes. """ def is_delegated(self): - return any(v > 0 for v in self.delegated_op_counts.values()) if self.delegated_op_counts else False + return ( + any(v > 0 for v in self.delegated_op_counts.values()) + if self.delegated_op_counts + else False + ) class TestSessionState: @@ -348,10 +352,14 @@ def generate_csv_report(summary: RunSummary, output: TextIO): "Result Detail": record.result.to_detail_str(), "Delegated": "True" if record.is_delegated() else "False", "Quantize Time (s)": ( - f"{record.quantize_time.total_seconds():.3f}" if record.quantize_time else None + f"{record.quantize_time.total_seconds():.3f}" + if record.quantize_time + else None ), "Lower Time (s)": ( - f"{record.lower_time.total_seconds():.3f}" if record.lower_time else None + f"{record.lower_time.total_seconds():.3f}" + if record.lower_time + else None ), } if record.params is not None: