Skip to content

Commit a6c9a30

Browse files
committed
Update
[ghstack-poisoned]
1 parent 641e737 commit a6c9a30

File tree

10 files changed

+588
-252
lines changed

10 files changed

+588
-252
lines changed

backends/test/suite/__init__.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
import os
1212

1313
import executorch.backends.test.suite.flow
14+
import torch
1415

1516
from executorch.backends.test.suite.flow import TestFlow
1617
from executorch.backends.test.suite.runner import runner_main
@@ -55,6 +56,11 @@ def get_test_flows() -> dict[str, TestFlow]:
5556
return _ALL_TEST_FLOWS
5657

5758

59+
def dtype_to_str(dtype: torch.dtype) -> str:
60+
# Strip off "torch."
61+
return str(dtype)[6:]
62+
63+
5864
def load_tests(loader, suite, pattern):
5965
package_dir = os.path.dirname(__file__)
6066
discovered_suite = loader.discover(

backends/test/suite/conftest.py

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
import pytest
2+
import torch
3+
4+
from executorch.backends.test.suite.flow import TestFlow, all_flows
5+
from executorch.backends.test.suite.reporting import _sum_op_counts
6+
from executorch.backends.test.suite.runner import run_test
7+
8+
from typing import Any
9+
10+
BACKENDS = ["xnnpack", "coreml", "vulkan", "qnn", "arm"]
11+
12+
def pytest_configure(config):
13+
for backend in BACKENDS:
14+
config.addinivalue_line("markers", f"backend_{backend}: mark a test as testing the {backend} backend")
15+
16+
class TestRunner:
17+
def __init__(self, flow, test_name, test_base_name):
18+
self._flow = flow
19+
self._test_name = test_name
20+
self._test_base_name = test_base_name
21+
self._subtest = 0
22+
self._results = []
23+
24+
def lower_and_run_model(self, model: torch.nn.Module, inputs: Any, generate_random_test_inputs=True):
25+
run_summary = run_test(
26+
model,
27+
inputs,
28+
self._flow,
29+
self._test_name,
30+
self._test_base_name,
31+
self._subtest,
32+
None,
33+
generate_random_test_inputs=generate_random_test_inputs,
34+
)
35+
36+
self._subtest += 1
37+
self._results.append(run_summary)
38+
39+
if not run_summary.result.is_success():
40+
raise RuntimeError("Test failure.") from run_summary.error
41+
if run_summary.result.is_backend_failure():
42+
raise RuntimeError("Test failure.") from run_summary.error
43+
else:
44+
# Non-backend failure indicates a bad test. Mark as skipped.
45+
pytest.skip(
46+
f"Test failed for reasons other than backend failure. Error: {run_summary.error}"
47+
)
48+
49+
@pytest.fixture(params=all_flows().values(), ids=str)
50+
def test_runner(request):
51+
return TestRunner(request.param, request.node.name, request.node.originalname)
52+
53+
@pytest.hookimpl(optionalhook=True)
54+
def pytest_json_runtest_metadata(item, call):
55+
metadata = {
56+
"subtests": []
57+
}
58+
59+
if hasattr(item, "funcargs") and "test_runner" in item.funcargs:
60+
runner_instance = item.funcargs["test_runner"]
61+
62+
for record in runner_instance._results:
63+
subtest_metadata = {}
64+
65+
error_message = ""
66+
if record.error is not None:
67+
error_str = str(record.error)
68+
if len(error_str) > 400:
69+
error_message = error_str[:200] + "..." + error_str[-200:]
70+
else:
71+
error_message = error_str
72+
73+
subtest_metadata["Test ID"] = record.name
74+
subtest_metadata["Test Case"] = record.base_name
75+
subtest_metadata["Subtest"] = record.subtest_index
76+
subtest_metadata["Flow"] = record.flow
77+
subtest_metadata["Params"] = record.params
78+
subtest_metadata["Result"] = record.result.to_short_str()
79+
subtest_metadata["Result Detail"] = record.result.to_detail_str()
80+
subtest_metadata["Error"] = error_message
81+
subtest_metadata["Delegated"] = "True" if record.is_delegated() else "False"
82+
subtest_metadata["Quantize Time (s)"] = (
83+
f"{record.quantize_time.total_seconds():.3f}"
84+
if record.quantize_time
85+
else None
86+
)
87+
subtest_metadata["Lower Time (s)"] = (
88+
f"{record.lower_time.total_seconds():.3f}" if record.lower_time else None
89+
)
90+
91+
for output_idx, error_stats in enumerate(record.tensor_error_statistics):
92+
subtest_metadata[f"Output {output_idx} Error Max"] = f"{error_stats.error_max:.3f}"
93+
subtest_metadata[f"Output {output_idx} Error MAE"] = f"{error_stats.error_mae:.3f}"
94+
subtest_metadata[f"Output {output_idx} SNR"] = f"{error_stats.sqnr:.3f}"
95+
96+
subtest_metadata["Delegated Nodes"] = _sum_op_counts(record.delegated_op_counts)
97+
subtest_metadata["Undelegated Nodes"] = _sum_op_counts(record.undelegated_op_counts)
98+
if record.delegated_op_counts:
99+
subtest_metadata["Delegated Ops"] = dict(record.delegated_op_counts)
100+
if record.undelegated_op_counts:
101+
subtest_metadata["Undelegated Ops"] = dict(record.undelegated_op_counts)
102+
subtest_metadata["PTE Size (Kb)"] = (
103+
f"{record.pte_size_bytes / 1000.0:.3f}" if record.pte_size_bytes else ""
104+
)
105+
106+
metadata["subtests"].append(subtest_metadata)
107+
108+
109+
return metadata

backends/test/suite/flow.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,9 @@ class TestFlow:
4444
def should_skip_test(self, test_name: str) -> bool:
4545
return any(pattern in test_name for pattern in self.skip_patterns)
4646

47+
def __str__(self):
48+
return self.name
49+
4750

4851
def all_flows() -> dict[str, TestFlow]:
4952
flows = []
Lines changed: 251 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,251 @@
1+
import argparse
2+
import csv
3+
import functools
4+
import json
5+
import sys
6+
7+
from dataclasses import dataclass, field
8+
9+
10+
@dataclass
11+
class ResultCounts:
12+
"""
13+
Represents aggregated result counts for each status.
14+
"""
15+
16+
total: int = 0
17+
passes: int = 0
18+
fails: int = 0
19+
skips: int = 0
20+
by_detail: dict[str, int] = field(default_factory=lambda: {})
21+
22+
def add_row(self, result_value: str, result_detail: str) -> None:
23+
"""
24+
Update the result counts for the specified row.
25+
"""
26+
27+
self.total += 1
28+
29+
if result_value == "Pass":
30+
self.passes += 1
31+
elif result_value == "Fail":
32+
self.fails += 1
33+
elif result_value == "Skip":
34+
self.skips += 1
35+
else:
36+
raise RuntimeError(f"Unknown result value {result_value}")
37+
38+
if result_detail:
39+
if result_detail not in self.by_detail:
40+
self.by_detail[result_detail] = 0
41+
42+
self.by_detail[result_detail] += 1
43+
44+
45+
@dataclass
46+
class AggregatedSummary:
47+
"""
48+
Represents aggegrated summary data for the test run.
49+
"""
50+
51+
counts: ResultCounts
52+
counts_by_params: dict[str, ResultCounts]
53+
failed_tests: list[list[str]]
54+
55+
56+
#
57+
# A standalone script to generate a Markdown representation of a test report.
58+
# This is primarily intended to be used with GitHub actions to generate a nice
59+
# representation of the test results when looking at the action run.
60+
#
61+
# Usage: python executorch/backends/test/suite/generate_markdown_summary.py <path to test report CSV file>
62+
# Markdown is written to stdout.
63+
#
64+
65+
66+
def aggregate_results(json_path: str) -> AggregatedSummary:
67+
with open(json_path) as f:
68+
data = json.load(f)
69+
70+
# Count results and prepare data
71+
counts = ResultCounts()
72+
failed_tests = []
73+
counts_by_param = {}
74+
75+
for test_data in data["tests"]:
76+
result_meta = test_data.get("metadata")
77+
if result_meta:
78+
for subtest_meta in result_meta["subtests"]:
79+
result = subtest_meta["Result"]
80+
result_detail = subtest_meta.get("Result Detail") or ""
81+
82+
counts.add_row(result, result_detail)
83+
84+
params = subtest_meta["Params"]
85+
if params:
86+
if params not in counts_by_param:
87+
counts_by_param[params] = ResultCounts()
88+
counts_by_param[params].add_row(result, result_detail)
89+
90+
if result.lower() == "fail":
91+
failed_tests.append(subtest_meta)
92+
93+
return AggregatedSummary(
94+
counts=counts,
95+
failed_tests=failed_tests,
96+
counts_by_params=counts_by_param,
97+
)
98+
99+
100+
def escape_for_markdown(text: str) -> str:
101+
"""
102+
Modify a string to properly display in a markdown table cell.
103+
"""
104+
if not text:
105+
return text
106+
107+
# Replace newlines with <br /> tags
108+
escaped = text.replace("\n", "<br />")
109+
110+
# Escape backslashes.
111+
escaped = escaped.replace("\\", "\\\\")
112+
113+
# Escape pipe characters that would break table structure
114+
escaped = escaped.replace("|", "\\|")
115+
116+
return escaped
117+
118+
119+
def generate_markdown(json_path: str, exit_code: int = 0): # noqa (C901)
120+
# Print warning if exit code is non-zero
121+
if exit_code != 0:
122+
print("> [!WARNING]")
123+
print(
124+
f"> Exit code {exit_code} was non-zero. Test process may have crashed. Check the job logs for more information.\n"
125+
)
126+
127+
results = aggregate_results(json_path)
128+
129+
# Generate Summary section
130+
print("# Summary\n")
131+
total_excluding_skips = results.counts.passes + results.counts.fails
132+
pass_fraction = results.counts.passes / total_excluding_skips
133+
fail_fraction = results.counts.fails / total_excluding_skips
134+
print(
135+
f"- **Pass**: {results.counts.passes}/{total_excluding_skips} ({pass_fraction*100:.2f}%)"
136+
)
137+
print(
138+
f"- **Fail**: {results.counts.fails}/{total_excluding_skips} ({fail_fraction*100:.2f}%)"
139+
)
140+
print(f"- **Skip**: {results.counts.skips}")
141+
142+
if results.counts_by_params:
143+
print("\n## Results by Parameters\n")
144+
145+
# Extract all unique parameter keys from the JSON strings
146+
all_param_keys = set()
147+
parsed_params = {}
148+
149+
for params_str in results.counts_by_params.keys():
150+
# Parse the JSON string (it's a string representation of a dict)
151+
params_dict = json.loads(params_str)
152+
parsed_params[params_str] = params_dict
153+
all_param_keys.update(params_dict.keys())
154+
155+
if parsed_params and len(parsed_params) > 1:
156+
# Sort parameter keys for consistent column ordering
157+
sorted_param_keys = sorted(all_param_keys)
158+
159+
# Create table header
160+
header_cols = sorted_param_keys + ["Pass", "Fail", "Skip", "Pass %"]
161+
print("| " + " | ".join(header_cols) + " |")
162+
print("|" + "|".join(["---"] * len(header_cols)) + "|")
163+
164+
# Create table rows
165+
for params_str, counts in results.counts_by_params.items():
166+
if params_str in parsed_params:
167+
params_dict = parsed_params[params_str]
168+
row_values = []
169+
170+
# Add parameter values
171+
for key in sorted_param_keys:
172+
value = params_dict.get(key, "")
173+
row_values.append(str(value))
174+
175+
pass_fraction = counts.passes / (counts.passes + counts.fails)
176+
177+
# Add count values
178+
row_values.extend(
179+
[
180+
str(counts.passes),
181+
str(counts.fails),
182+
str(counts.skips),
183+
f"{pass_fraction*100:.2f}%",
184+
]
185+
)
186+
187+
print("| " + " | ".join(row_values) + " |")
188+
189+
print()
190+
191+
print("## Failure Breakdown:")
192+
total_rows_with_result_detail = sum(results.counts.by_detail.values())
193+
for detail, count in sorted(results.counts.by_detail.items()):
194+
print(f"- **{detail}**: {count}/{total_rows_with_result_detail}")
195+
196+
# Generate Failed Tests section
197+
print("# Failed Tests\n")
198+
if results.failed_tests:
199+
header = build_header(results.failed_tests)
200+
201+
escaped_header = [escape_for_markdown(col) for col in header.keys()]
202+
print("| " + " | ".join(escaped_header) + " |")
203+
print("|" + "|".join(["---"] * len(escaped_header)) + "|")
204+
for rec in results.failed_tests:
205+
row = build_row(rec, header)
206+
print("| " + " | ".join(row) + " |")
207+
else:
208+
print("No failed tests.\n")
209+
210+
211+
def build_header(data) -> dict[str, int]:
212+
"""
213+
Find the union of all keys and return a dict of header keys and indices. Try to preserve
214+
ordering as much as possible.
215+
"""
216+
217+
keys = max(data, key=len)
218+
219+
header = {
220+
k:i for (i,k) in enumerate(keys)
221+
}
222+
223+
for rec in data:
224+
keys = set(rec.keys())
225+
for k in keys:
226+
if k not in header:
227+
header[k] = len(header)
228+
229+
return header
230+
231+
def build_row(rec, header: dict[str, int]) -> list[str]:
232+
row = [""] * len(header)
233+
for k, v in rec.items():
234+
row[header[k]] = escape_for_markdown(str(v))
235+
return row
236+
237+
238+
def main():
239+
parser = argparse.ArgumentParser(
240+
description="Generate a Markdown representation of a test report."
241+
)
242+
parser.add_argument("csv_path", help="Path to the test report CSV file.")
243+
parser.add_argument(
244+
"--exit-code", type=int, default=0, help="Exit code from the test process."
245+
)
246+
args = parser.parse_args()
247+
generate_markdown(args.csv_path, args.exit_code)
248+
249+
250+
if __name__ == "__main__":
251+
main()

0 commit comments

Comments
 (0)