Skip to content

[Backend Tester] Clean up report output #13306

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: gh/GregoryComer/116/head
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 55 additions & 16 deletions backends/test/suite/reporting.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,36 @@ def is_non_backend_failure(self):
def is_backend_failure(self):
return not self.is_success() and not self.is_non_backend_failure()

def to_short_str(self):
if self in {TestResult.SUCCESS, TestResult.SUCCESS_UNDELEGATED}:
return "Pass"
elif self == TestResult.SKIPPED:
return "Skip"
else:
return "Fail"

def to_detail_str(self):
if self == TestResult.SUCCESS:
return ""
elif self == TestResult.SUCCESS_UNDELEGATED:
return ""
elif self == TestResult.SKIPPED:
return ""
elif self == TestResult.QUANTIZE_FAIL:
return "Quantization Failed"
elif self == TestResult.LOWER_FAIL:
return "Lowering Failed"
elif self == TestResult.PTE_LOAD_FAIL:
return "PTE Load Failed"
elif self == TestResult.PTE_RUN_FAIL:
return "PTE Run Failed"
elif self == TestResult.OUTPUT_MISMATCH_FAIL:
return "Output Mismatch"
elif self == TestResult.UNKNOWN_FAIL:
return "Unknown Failure"
else:
raise ValueError(f"Invalid TestResult value: {self}.")

def display_name(self):
if self == TestResult.SUCCESS:
return "Success (Delegated)"
Expand Down Expand Up @@ -129,6 +159,13 @@ class TestCaseSummary:
pte_size_bytes: int | None = None
""" The size of the PTE file in bytes. """

def is_delegated(self):
return (
any(v > 0 for v in self.delegated_op_counts.values())
if self.delegated_op_counts
else False
)


class TestSessionState:
test_case_summaries: list[TestCaseSummary]
Expand Down Expand Up @@ -260,11 +297,12 @@ def generate_csv_report(summary: RunSummary, output: TextIO):
field_names = [
"Test ID",
"Test Case",
"Backend",
"Flow",
"Result",
"Result Detail",
"Delegated",
"Quantize Time (s)",
"Lowering Time (s)",
"Lower Time (s)",
]

# Tests can have custom parameters. We'll want to report them here, so we need
Expand All @@ -289,9 +327,7 @@ def generate_csv_report(summary: RunSummary, output: TextIO):
[
f"Output {i} Error Max",
f"Output {i} Error MAE",
f"Output {i} Error MSD",
f"Output {i} Error L2",
f"Output {i} SQNR",
f"Output {i} SNR",
]
)
field_names.extend(
Expand All @@ -311,32 +347,35 @@ def generate_csv_report(summary: RunSummary, output: TextIO):
row = {
"Test ID": record.name,
"Test Case": record.base_name,
"Backend": record.backend,
"Flow": record.flow,
"Result": record.result.display_name(),
"Result": record.result.to_short_str(),
"Result Detail": record.result.to_detail_str(),
"Delegated": "True" if record.is_delegated() else "False",
"Quantize Time (s)": (
record.quantize_time.total_seconds() if record.quantize_time else None
f"{record.quantize_time.total_seconds():.3f}"
if record.quantize_time
else None
),
"Lowering Time (s)": (
record.lower_time.total_seconds() if record.lower_time else None
"Lower Time (s)": (
f"{record.lower_time.total_seconds():.3f}"
if record.lower_time
else None
),
}
if record.params is not None:
row.update({k.capitalize(): v for k, v in record.params.items()})

for output_idx, error_stats in enumerate(record.tensor_error_statistics):
row[f"Output {output_idx} Error Max"] = error_stats.error_max
row[f"Output {output_idx} Error MAE"] = error_stats.error_mae
row[f"Output {output_idx} Error MSD"] = error_stats.error_msd
row[f"Output {output_idx} Error L2"] = error_stats.error_l2_norm
row[f"Output {output_idx} SQNR"] = error_stats.sqnr
row[f"Output {output_idx} Error Max"] = f"{error_stats.error_max:.3f}"
row[f"Output {output_idx} Error MAE"] = f"{error_stats.error_mae:.3f}"
row[f"Output {output_idx} SNR"] = f"{error_stats.sqnr:.3f}"

row["Delegated Nodes"] = _sum_op_counts(record.delegated_op_counts)
row["Undelegated Nodes"] = _sum_op_counts(record.undelegated_op_counts)
row["Delegated Ops"] = _serialize_op_counts(record.delegated_op_counts)
row["Undelegated Ops"] = _serialize_op_counts(record.undelegated_op_counts)
row["PTE Size (Kb)"] = (
record.pte_size_bytes / 1000.0 if record.pte_size_bytes else ""
f"{record.pte_size_bytes / 1000.0:.3f}" if record.pte_size_bytes else ""
)

writer.writerow(row)
12 changes: 4 additions & 8 deletions backends/test/suite/tests/test_reporting.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,36 +79,32 @@ def test_csv_report_simple(self):
# Validate first record: test1, backend1, SUCCESS
self.assertEqual(records[0]["Test ID"], "test1_backend1_flow1")
self.assertEqual(records[0]["Test Case"], "test1")
self.assertEqual(records[0]["Backend"], "backend1")
self.assertEqual(records[0]["Flow"], "flow1")
self.assertEqual(records[0]["Result"], "Success (Delegated)")
self.assertEqual(records[0]["Result"], "Pass")
self.assertEqual(records[0]["Dtype"], "")
self.assertEqual(records[0]["Use_dynamic_shapes"], "")

# Validate second record: test1, backend2, LOWER_FAIL
self.assertEqual(records[1]["Test ID"], "test1_backend2_flow1")
self.assertEqual(records[1]["Test Case"], "test1")
self.assertEqual(records[1]["Backend"], "backend2")
self.assertEqual(records[1]["Flow"], "flow1")
self.assertEqual(records[1]["Result"], "Fail (Lowering)")
self.assertEqual(records[1]["Result"], "Fail")
self.assertEqual(records[1]["Dtype"], "")
self.assertEqual(records[1]["Use_dynamic_shapes"], "")

# Validate third record: test2, backend1, SUCCESS_UNDELEGATED with dtype param
self.assertEqual(records[2]["Test ID"], "test2_backend1_flow1")
self.assertEqual(records[2]["Test Case"], "test2")
self.assertEqual(records[2]["Backend"], "backend1")
self.assertEqual(records[2]["Flow"], "flow1")
self.assertEqual(records[2]["Result"], "Success (Undelegated)")
self.assertEqual(records[2]["Result"], "Pass")
self.assertEqual(records[2]["Dtype"], str(torch.float32))
self.assertEqual(records[2]["Use_dynamic_shapes"], "")

# Validate fourth record: test2, backend2, EXPORT_FAIL with use_dynamic_shapes param
self.assertEqual(records[3]["Test ID"], "test2_backend2_flow1")
self.assertEqual(records[3]["Test Case"], "test2")
self.assertEqual(records[3]["Backend"], "backend2")
self.assertEqual(records[3]["Flow"], "flow1")
self.assertEqual(records[3]["Result"], "Skipped")
self.assertEqual(records[3]["Result"], "Skip")
self.assertEqual(records[3]["Dtype"], "")
self.assertEqual(records[3]["Use_dynamic_shapes"], "True")

Expand Down
Loading