Skip to content

Commit 6756145

Browse files
committed
[Backend Tester] Clean up report output
ghstack-source-id: a8cd249 ghstack-comment-id: 3177420376 Pull-Request: #13306
1 parent 3f62c30 commit 6756145

File tree

2 files changed

+59
-24
lines changed

2 files changed

+59
-24
lines changed

backends/test/suite/reporting.py

Lines changed: 55 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,36 @@ def is_non_backend_failure(self):
5858
def is_backend_failure(self):
5959
return not self.is_success() and not self.is_non_backend_failure()
6060

61+
def to_short_str(self):
62+
if self in {TestResult.SUCCESS, TestResult.SUCCESS_UNDELEGATED}:
63+
return "Pass"
64+
elif self == TestResult.SKIPPED:
65+
return "Skip"
66+
else:
67+
return "Fail"
68+
69+
def to_detail_str(self):
70+
if self == TestResult.SUCCESS:
71+
return ""
72+
elif self == TestResult.SUCCESS_UNDELEGATED:
73+
return ""
74+
elif self == TestResult.SKIPPED:
75+
return ""
76+
elif self == TestResult.QUANTIZE_FAIL:
77+
return "Quantization Failed"
78+
elif self == TestResult.LOWER_FAIL:
79+
return "Lowering Failed"
80+
elif self == TestResult.PTE_LOAD_FAIL:
81+
return "PTE Load Failed"
82+
elif self == TestResult.PTE_RUN_FAIL:
83+
return "PTE Run Failed"
84+
elif self == TestResult.OUTPUT_MISMATCH_FAIL:
85+
return "Output Mismatch"
86+
elif self == TestResult.UNKNOWN_FAIL:
87+
return "Unknown Failure"
88+
else:
89+
raise ValueError(f"Invalid TestResult value: {self}.")
90+
6191
def display_name(self):
6292
if self == TestResult.SUCCESS:
6393
return "Success (Delegated)"
@@ -129,6 +159,13 @@ class TestCaseSummary:
129159
pte_size_bytes: int | None = None
130160
""" The size of the PTE file in bytes. """
131161

162+
def is_delegated(self):
163+
return (
164+
any(v > 0 for v in self.delegated_op_counts.values())
165+
if self.delegated_op_counts
166+
else False
167+
)
168+
132169

133170
class TestSessionState:
134171
test_case_summaries: list[TestCaseSummary]
@@ -260,11 +297,12 @@ def generate_csv_report(summary: RunSummary, output: TextIO):
260297
field_names = [
261298
"Test ID",
262299
"Test Case",
263-
"Backend",
264300
"Flow",
265301
"Result",
302+
"Result Detail",
303+
"Delegated",
266304
"Quantize Time (s)",
267-
"Lowering Time (s)",
305+
"Lower Time (s)",
268306
]
269307

270308
# Tests can have custom parameters. We'll want to report them here, so we need
@@ -289,9 +327,7 @@ def generate_csv_report(summary: RunSummary, output: TextIO):
289327
[
290328
f"Output {i} Error Max",
291329
f"Output {i} Error MAE",
292-
f"Output {i} Error MSD",
293-
f"Output {i} Error L2",
294-
f"Output {i} SQNR",
330+
f"Output {i} SNR",
295331
]
296332
)
297333
field_names.extend(
@@ -311,32 +347,35 @@ def generate_csv_report(summary: RunSummary, output: TextIO):
311347
row = {
312348
"Test ID": record.name,
313349
"Test Case": record.base_name,
314-
"Backend": record.backend,
315350
"Flow": record.flow,
316-
"Result": record.result.display_name(),
351+
"Result": record.result.to_short_str(),
352+
"Result Detail": record.result.to_detail_str(),
353+
"Delegated": "True" if record.is_delegated() else "False",
317354
"Quantize Time (s)": (
318-
record.quantize_time.total_seconds() if record.quantize_time else None
355+
f"{record.quantize_time.total_seconds():.3f}"
356+
if record.quantize_time
357+
else None
319358
),
320-
"Lowering Time (s)": (
321-
record.lower_time.total_seconds() if record.lower_time else None
359+
"Lower Time (s)": (
360+
f"{record.lower_time.total_seconds():.3f}"
361+
if record.lower_time
362+
else None
322363
),
323364
}
324365
if record.params is not None:
325366
row.update({k.capitalize(): v for k, v in record.params.items()})
326367

327368
for output_idx, error_stats in enumerate(record.tensor_error_statistics):
328-
row[f"Output {output_idx} Error Max"] = error_stats.error_max
329-
row[f"Output {output_idx} Error MAE"] = error_stats.error_mae
330-
row[f"Output {output_idx} Error MSD"] = error_stats.error_msd
331-
row[f"Output {output_idx} Error L2"] = error_stats.error_l2_norm
332-
row[f"Output {output_idx} SQNR"] = error_stats.sqnr
369+
row[f"Output {output_idx} Error Max"] = f"{error_stats.error_max:.3f}"
370+
row[f"Output {output_idx} Error MAE"] = f"{error_stats.error_mae:.3f}"
371+
row[f"Output {output_idx} SNR"] = f"{error_stats.sqnr:.3f}"
333372

334373
row["Delegated Nodes"] = _sum_op_counts(record.delegated_op_counts)
335374
row["Undelegated Nodes"] = _sum_op_counts(record.undelegated_op_counts)
336375
row["Delegated Ops"] = _serialize_op_counts(record.delegated_op_counts)
337376
row["Undelegated Ops"] = _serialize_op_counts(record.undelegated_op_counts)
338377
row["PTE Size (Kb)"] = (
339-
record.pte_size_bytes / 1000.0 if record.pte_size_bytes else ""
378+
f"{record.pte_size_bytes / 1000.0:.3f}" if record.pte_size_bytes else ""
340379
)
341380

342381
writer.writerow(row)

backends/test/suite/tests/test_reporting.py

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -79,36 +79,32 @@ def test_csv_report_simple(self):
7979
# Validate first record: test1, backend1, SUCCESS
8080
self.assertEqual(records[0]["Test ID"], "test1_backend1_flow1")
8181
self.assertEqual(records[0]["Test Case"], "test1")
82-
self.assertEqual(records[0]["Backend"], "backend1")
8382
self.assertEqual(records[0]["Flow"], "flow1")
84-
self.assertEqual(records[0]["Result"], "Success (Delegated)")
83+
self.assertEqual(records[0]["Result"], "Pass")
8584
self.assertEqual(records[0]["Dtype"], "")
8685
self.assertEqual(records[0]["Use_dynamic_shapes"], "")
8786

8887
# Validate second record: test1, backend2, LOWER_FAIL
8988
self.assertEqual(records[1]["Test ID"], "test1_backend2_flow1")
9089
self.assertEqual(records[1]["Test Case"], "test1")
91-
self.assertEqual(records[1]["Backend"], "backend2")
9290
self.assertEqual(records[1]["Flow"], "flow1")
93-
self.assertEqual(records[1]["Result"], "Fail (Lowering)")
91+
self.assertEqual(records[1]["Result"], "Fail")
9492
self.assertEqual(records[1]["Dtype"], "")
9593
self.assertEqual(records[1]["Use_dynamic_shapes"], "")
9694

9795
# Validate third record: test2, backend1, SUCCESS_UNDELEGATED with dtype param
9896
self.assertEqual(records[2]["Test ID"], "test2_backend1_flow1")
9997
self.assertEqual(records[2]["Test Case"], "test2")
100-
self.assertEqual(records[2]["Backend"], "backend1")
10198
self.assertEqual(records[2]["Flow"], "flow1")
102-
self.assertEqual(records[2]["Result"], "Success (Undelegated)")
99+
self.assertEqual(records[2]["Result"], "Pass")
103100
self.assertEqual(records[2]["Dtype"], str(torch.float32))
104101
self.assertEqual(records[2]["Use_dynamic_shapes"], "")
105102

106103
# Validate fourth record: test2, backend2, EXPORT_FAIL with use_dynamic_shapes param
107104
self.assertEqual(records[3]["Test ID"], "test2_backend2_flow1")
108105
self.assertEqual(records[3]["Test Case"], "test2")
109-
self.assertEqual(records[3]["Backend"], "backend2")
110106
self.assertEqual(records[3]["Flow"], "flow1")
111-
self.assertEqual(records[3]["Result"], "Skipped")
107+
self.assertEqual(records[3]["Result"], "Skip")
112108
self.assertEqual(records[3]["Dtype"], "")
113109
self.assertEqual(records[3]["Use_dynamic_shapes"], "True")
114110

0 commit comments

Comments
 (0)