Skip to content

Commit c695c9c

Browse files
GregoryComerStrycekSimon
authored andcommitted
[Backend Tester] Add pass rate breakdown by parameterization to markdown summary (pytorch#14360)
Add a table showing pass rate by test parameters. This gives a breakdown by dtype and dynamic shape on/off for model tests, making it easier to see the pass rate for f32 + static shapes. Also, run on release branches.
1 parent 5d2b812 commit c695c9c

File tree

9 files changed

+188
-67
lines changed

9 files changed

+188
-67
lines changed

.github/workflows/test-backend-arm.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@ on:
44
schedule:
55
- cron: 0 2 * * *
66
push:
7+
branches:
8+
- release/*
79
tags:
810
- ciflow/nightly/*
911
pull_request:

.github/workflows/test-backend-coreml.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@ on:
44
schedule:
55
- cron: 0 2 * * *
66
push:
7+
branches:
8+
- release/*
79
tags:
810
- ciflow/nightly/*
911
pull_request:

.github/workflows/test-backend-qnn.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@ on:
44
schedule:
55
- cron: 0 2 * * *
66
push:
7+
branches:
8+
- release/*
79
tags:
810
- ciflow/nightly/*
911
pull_request:

.github/workflows/test-backend-vulkan.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@ on:
44
schedule:
55
- cron: 0 2 * * *
66
push:
7+
branches:
8+
- release/*
79
tags:
810
- ciflow/nightly/*
911
pull_request:

.github/workflows/test-backend-xnnpack.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@ on:
44
schedule:
55
- cron: 0 2 * * *
66
push:
7+
branches:
8+
- release/*
79
tags:
810
- ciflow/nightly/*
911
pull_request:

backends/test/suite/generate_markdown_summary.py

Lines changed: 168 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -1,124 +1,229 @@
11
import argparse
22
import csv
3+
import json
34
import sys
45

5-
#
6-
# A standalone script to generate a Markdown representation of a test report.
7-
# This is primarily intended to be used with GitHub actions to generate a nice
8-
# representation of the test results when looking at the action run.
9-
#
10-
# Usage: python executorch/backends/test/suite/generate_markdown_summary.py <path to test report CSV file>
11-
# Markdown is written to stdout.
12-
#
6+
from dataclasses import dataclass, field
137

148

15-
def escape_for_markdown(text: str) -> str:
9+
@dataclass
10+
class ResultCounts:
1611
"""
17-
Modify a string to properly display in a markdown table cell.
12+
Represents aggregated result counts for each status.
1813
"""
19-
if not text:
20-
return text
2114

22-
# Replace newlines with <br /> tags
23-
escaped = text.replace("\n", "<br />")
15+
total: int = 0
16+
passes: int = 0
17+
fails: int = 0
18+
skips: int = 0
19+
by_detail: dict[str, int] = field(default_factory=lambda: {})
2420

25-
# Escape backslashes.
26-
escaped = escaped.replace("\\", "\\\\")
21+
def add_row(self, result_value: str, result_detail: str) -> None:
22+
"""
23+
Update the result counts for the specified row.
24+
"""
2725

28-
# Escape pipe characters that would break table structure
29-
escaped = escaped.replace("|", "\\|")
26+
self.total += 1
3027

31-
return escaped
28+
if result_value == "Pass":
29+
self.passes += 1
30+
elif result_value == "Fail":
31+
self.fails += 1
32+
elif result_value == "Skip":
33+
self.skips += 1
34+
else:
35+
raise RuntimeError(f"Unknown result value {result_value}")
3236

37+
if result_detail:
38+
if result_detail not in self.by_detail:
39+
self.by_detail[result_detail] = 0
40+
41+
self.by_detail[result_detail] += 1
42+
43+
44+
@dataclass
45+
class AggregatedSummary:
46+
"""
47+
Represents aggegrated summary data for the test run.
48+
"""
49+
50+
counts: ResultCounts
51+
counts_by_params: dict[str, ResultCounts]
52+
failed_tests: list[list[str]]
53+
header: list[str]
54+
55+
56+
#
57+
# A standalone script to generate a Markdown representation of a test report.
58+
# This is primarily intended to be used with GitHub actions to generate a nice
59+
# representation of the test results when looking at the action run.
60+
#
61+
# Usage: python executorch/backends/test/suite/generate_markdown_summary.py <path to test report CSV file>
62+
# Markdown is written to stdout.
63+
#
3364

34-
def generate_markdown(csv_path: str, exit_code: int = 0): # noqa (C901)
35-
# Print warning if exit code is non-zero
36-
if exit_code != 0:
37-
print("> [!WARNING]")
38-
print(
39-
f"> Exit code {exit_code} was non-zero. Test process may have crashed. Check the job logs for more information.\n"
40-
)
4165

66+
def aggregate_results(csv_path: str) -> AggregatedSummary:
4267
with open(csv_path, newline="", encoding="utf-8") as f:
4368
reader = csv.reader(f)
4469
rows = list(reader)
4570

4671
header = rows[0]
4772
data_rows = rows[1:]
4873

49-
# Find the Result and Result Detail column indices
50-
result_column_index = None
51-
result_detail_column_index = None
52-
for i, col in enumerate(header):
53-
if col.lower() == "result":
54-
result_column_index = i
55-
elif col.lower() == "result detail":
56-
result_detail_column_index = i
74+
header_indices_by_name = {n.lower(): i for (i, n) in enumerate(header)}
75+
params_column_index = header_indices_by_name.get("params", None)
76+
result_column_index = header_indices_by_name["result"]
77+
result_detail_column_index = header_indices_by_name["result detail"]
5778

5879
# Count results and prepare data
59-
pass_count = 0
60-
fail_count = 0
61-
skip_count = 0
80+
counts = ResultCounts()
6281
failed_tests = []
63-
processed_rows = []
64-
result_detail_counts = {}
82+
counts_by_param = {}
6583

6684
for row in data_rows:
85+
result = row[result_column_index]
86+
result_detail = row[result_detail_column_index]
87+
88+
counts.add_row(result, result_detail)
89+
90+
params = row[params_column_index] if params_column_index else None
91+
if params:
92+
if params not in counts_by_param:
93+
counts_by_param[params] = ResultCounts()
94+
counts_by_param[params].add_row(result, result_detail)
95+
6796
# Make a copy of the row to avoid modifying the original
6897
processed_row = [escape_for_markdown(cell) for cell in row]
6998

7099
# Count results and collect failed tests
71100
if result_column_index is not None and result_column_index < len(row):
72101
result_value = row[result_column_index].strip().lower()
73102
if result_value == "pass":
74-
pass_count += 1
75103
processed_row[result_column_index] = (
76104
'<span style="color:green">Pass</span>'
77105
)
78106
elif result_value == "fail":
79-
fail_count += 1
80107
processed_row[result_column_index] = (
81108
'<span style="color:red">Fail</span>'
82109
)
83110
failed_tests.append(processed_row.copy())
84111
elif result_value == "skip":
85-
skip_count += 1
86112
processed_row[result_column_index] = (
87113
'<span style="color:gray">Skip</span>'
88114
)
89115

90-
# Count result details (excluding empty ones)
91-
if result_detail_column_index is not None and result_detail_column_index < len(
92-
row
93-
):
94-
result_detail_value = row[result_detail_column_index].strip()
95-
if result_detail_value: # Only count non-empty result details
96-
if result_detail_value in result_detail_counts:
97-
result_detail_counts[result_detail_value] += 1
98-
else:
99-
result_detail_counts[result_detail_value] = 1
116+
return AggregatedSummary(
117+
counts=counts,
118+
failed_tests=failed_tests,
119+
counts_by_params=counts_by_param,
120+
header=header,
121+
)
122+
123+
124+
def escape_for_markdown(text: str) -> str:
125+
"""
126+
Modify a string to properly display in a markdown table cell.
127+
"""
128+
if not text:
129+
return text
130+
131+
# Replace newlines with <br /> tags
132+
escaped = text.replace("\n", "<br />")
100133

101-
processed_rows.append(processed_row)
134+
# Escape backslashes.
135+
escaped = escaped.replace("\\", "\\\\")
136+
137+
# Escape pipe characters that would break table structure
138+
escaped = escaped.replace("|", "\\|")
139+
140+
return escaped
141+
142+
143+
def generate_markdown(csv_path: str, exit_code: int = 0): # noqa (C901)
144+
# Print warning if exit code is non-zero
145+
if exit_code != 0:
146+
print("> [!WARNING]")
147+
print(
148+
f"> Exit code {exit_code} was non-zero. Test process may have crashed. Check the job logs for more information.\n"
149+
)
150+
151+
results = aggregate_results(csv_path)
102152

103153
# Generate Summary section
104-
total_rows = len(data_rows)
105154
print("# Summary\n")
106-
print(f"- **Pass**: {pass_count}/{total_rows}")
107-
print(f"- **Fail**: {fail_count}/{total_rows}")
108-
print(f"- **Skip**: {skip_count}/{total_rows}")
155+
total_excluding_skips = results.counts.passes + results.counts.fails
156+
pass_fraction = results.counts.passes / total_excluding_skips
157+
fail_fraction = results.counts.fails / total_excluding_skips
158+
print(
159+
f"- **Pass**: {results.counts.passes}/{total_excluding_skips} ({pass_fraction*100:.2f}%)"
160+
)
161+
print(
162+
f"- **Fail**: {results.counts.fails}/{total_excluding_skips} ({fail_fraction*100:.2f}%)"
163+
)
164+
print(f"- **Skip**: {results.counts.skips}")
165+
166+
if results.counts_by_params:
167+
print("\n## Results by Parameters\n")
168+
169+
# Extract all unique parameter keys from the JSON strings
170+
all_param_keys = set()
171+
parsed_params = {}
172+
173+
for params_str in results.counts_by_params.keys():
174+
# Parse the JSON string (it's a string representation of a dict)
175+
params_dict = json.loads(params_str)
176+
parsed_params[params_str] = params_dict
177+
all_param_keys.update(params_dict.keys())
178+
179+
if parsed_params and len(parsed_params) > 1:
180+
# Sort parameter keys for consistent column ordering
181+
sorted_param_keys = sorted(all_param_keys)
182+
183+
# Create table header
184+
header_cols = sorted_param_keys + ["Pass", "Fail", "Skip", "Pass %"]
185+
print("| " + " | ".join(header_cols) + " |")
186+
print("|" + "|".join(["---"] * len(header_cols)) + "|")
187+
188+
# Create table rows
189+
for params_str, counts in results.counts_by_params.items():
190+
if params_str in parsed_params:
191+
params_dict = parsed_params[params_str]
192+
row_values = []
193+
194+
# Add parameter values
195+
for key in sorted_param_keys:
196+
value = params_dict.get(key, "")
197+
row_values.append(str(value))
198+
199+
pass_fraction = counts.passes / (counts.passes + counts.fails)
200+
201+
# Add count values
202+
row_values.extend(
203+
[
204+
str(counts.passes),
205+
str(counts.fails),
206+
str(counts.skips),
207+
f"{pass_fraction*100:.2f}%",
208+
]
209+
)
210+
211+
print("| " + " | ".join(row_values) + " |")
212+
213+
print()
109214

110215
print("## Failure Breakdown:")
111-
total_rows_with_result_detail = sum(result_detail_counts.values())
112-
for detail, count in sorted(result_detail_counts.items()):
216+
total_rows_with_result_detail = sum(results.counts.by_detail.values())
217+
for detail, count in sorted(results.counts.by_detail.items()):
113218
print(f"- **{detail}**: {count}/{total_rows_with_result_detail}")
114219

115220
# Generate Failed Tests section
116221
print("# Failed Tests\n")
117-
if failed_tests:
118-
escaped_header = [escape_for_markdown(col) for col in header]
222+
if results.failed_tests:
223+
escaped_header = [escape_for_markdown(col) for col in results.header]
119224
print("| " + " | ".join(escaped_header) + " |")
120-
print("|" + "|".join(["---"] * len(header)) + "|")
121-
for row in failed_tests:
225+
print("|" + "|".join(["---"] * len(results.header)) + "|")
226+
for row in results.failed_tests:
122227
print("| " + " | ".join(row) + " |")
123228
else:
124229
print("No failed tests.\n")

backends/test/suite/reporting.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import csv
2+
import json
23

34
from collections import Counter
45
from dataclasses import dataclass, field
@@ -343,7 +344,9 @@ def _sum_op_counts(counter: Counter | None) -> int | None:
343344

344345
def _serialize_params(params: dict[str, Any] | None) -> str:
345346
if params is not None:
346-
return str(dict(sorted(params.items())))
347+
# Convert values to strings - JSON conversion doesn't like dtypes.
348+
str_params = {k: str(v) for k, v in params.items()}
349+
return json.dumps(str_params)
347350
else:
348351
return ""
349352

backends/test/suite/runner.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ def _graph_has_unsupported_patterns(program: torch.export.ExportedProgram) -> bo
5757
and node.target == exir_ops.edge.aten.convolution.default
5858
):
5959
in_rank = node.args[0].meta["val"].dim()
60-
if in_rank != 4:
60+
if in_rank > 4:
6161
return True
6262

6363
return False

backends/test/suite/tests/test_reporting.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import json
12
import unittest
23

34
from csv import DictReader
@@ -102,14 +103,16 @@ def test_csv_report_simple(self):
102103
self.assertEqual(records[2]["Test Case"], "test2")
103104
self.assertEqual(records[2]["Flow"], "flow1")
104105
self.assertEqual(records[2]["Result"], "Pass")
105-
self.assertEqual(records[2]["Params"], str({"dtype": torch.float32}))
106+
self.assertEqual(records[2]["Params"], json.dumps({"dtype": "torch.float32"}))
106107

107108
# Validate fourth record: test2, backend2, EXPORT_FAIL with use_dynamic_shapes param
108109
self.assertEqual(records[3]["Test ID"], "test2_backend2_flow1")
109110
self.assertEqual(records[3]["Test Case"], "test2")
110111
self.assertEqual(records[3]["Flow"], "flow1")
111112
self.assertEqual(records[3]["Result"], "Skip")
112-
self.assertEqual(records[3]["Params"], str({"use_dynamic_shapes": True}))
113+
self.assertEqual(
114+
records[3]["Params"], json.dumps({"use_dynamic_shapes": "True"})
115+
)
113116

114117
def test_count_ops(self):
115118
"""

0 commit comments

Comments
 (0)