Skip to content

Commit 8e4b5ed

Browse files
committed
[Backend Tester] Pick test jobs + logic onto release/1.0
1 parent 7f201c2 commit 8e4b5ed

33 files changed

+337
-53
lines changed

backends/test/suite/flow.py

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import logging
22

3-
from dataclasses import dataclass
3+
from dataclasses import dataclass, field
44
from typing import Callable
55

66
from executorch.backends.test.harness import Tester
@@ -35,6 +35,15 @@ class TestFlow:
3535
is_delegated: bool = True
3636
""" Indicates whether the flow is expected to generate CALL_DELEGATE nodes. """
3737

38+
skip_patterns: list[str] = field(default_factory=lambda: [])
39+
""" Tests with names containing any substrings in this list are skipped. """
40+
41+
supports_serialize: bool = True
42+
""" True if the test flow supports the Serialize stage. """
43+
44+
def should_skip_test(self, test_name: str) -> bool:
45+
return any(pattern in test_name for pattern in self.skip_patterns)
46+
3847

3948
def all_flows() -> dict[str, TestFlow]:
4049
flows = []
@@ -109,4 +118,13 @@ def all_flows() -> dict[str, TestFlow]:
109118
except Exception as e:
110119
logger.info(f"Skipping QNN flow registration: {e}")
111120

121+
try:
122+
from executorch.backends.test.suite.flows.arm import ARM_TOSA_FLOW
123+
124+
flows += [
125+
ARM_TOSA_FLOW,
126+
]
127+
except Exception as e:
128+
logger.info(f"Skipping ARM flow registration: {e}")
129+
112130
return {f.name: f for f in flows if f is not None}

backends/test/suite/flows/arm.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
from executorch.backends.arm.test import common
2+
from executorch.backends.arm.test.tester.arm_tester import ArmTester
3+
from executorch.backends.test.suite.flow import TestFlow
4+
5+
6+
def _create_arm_tester_tosa_fp(*args, **kwargs) -> ArmTester:
7+
kwargs["compile_spec"] = common.get_tosa_compile_spec(tosa_spec="TOSA-1.0+FP")
8+
9+
return ArmTester(
10+
*args,
11+
**kwargs,
12+
)
13+
14+
15+
def _create_tosa_flow() -> TestFlow:
16+
return TestFlow(
17+
"arm_tosa",
18+
backend="arm",
19+
tester_factory=_create_arm_tester_tosa_fp,
20+
supports_serialize=False,
21+
)
22+
23+
24+
ARM_TOSA_FLOW = _create_tosa_flow()

backends/test/suite/flows/coreml.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ def _create_coreml_flow(
1919
CoreMLTester, minimum_deployment_target=minimum_deployment_target
2020
),
2121
quantize=quantize,
22+
skip_patterns=["test_argmin", "test_argmax"],
2223
)
2324

2425

backends/test/suite/flows/vulkan.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ def _create_vulkan_flow_base(
2020
tester_factory=VulkanTester,
2121
quantize=quantize_stage_factory is not None,
2222
quantize_stage_factory=quantize_stage_factory,
23+
skip_patterns=["float16", "float64"], # Not supported in swiftshader
2324
)
2425

2526

backends/test/suite/generate_markdown_summary.py

Lines changed: 171 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,58 @@
11
import argparse
22
import csv
3+
import json
34
import sys
45

6+
from dataclasses import dataclass, field
7+
8+
9+
@dataclass
10+
class ResultCounts:
11+
"""
12+
Represents aggregated result counts for each status.
13+
"""
14+
15+
total: int = 0
16+
passes: int = 0
17+
fails: int = 0
18+
skips: int = 0
19+
by_detail: dict[str, int] = field(default_factory=lambda: {})
20+
21+
def add_row(self, result_value: str, result_detail: str) -> None:
22+
"""
23+
Update the result counts for the specified row.
24+
"""
25+
26+
self.total += 1
27+
28+
if result_value == "Pass":
29+
self.passes += 1
30+
elif result_value == "Fail":
31+
self.fails += 1
32+
elif result_value == "Skip":
33+
self.skips += 1
34+
else:
35+
raise RuntimeError(f"Unknown result value {result_value}")
36+
37+
if result_detail:
38+
if result_detail not in self.by_detail:
39+
self.by_detail[result_detail] = 0
40+
41+
self.by_detail[result_detail] += 1
42+
43+
44+
@dataclass
45+
class AggregatedSummary:
46+
"""
47+
Represents aggegrated summary data for the test run.
48+
"""
49+
50+
counts: ResultCounts
51+
counts_by_params: dict[str, ResultCounts]
52+
failed_tests: list[list[str]]
53+
header: list[str]
54+
55+
556
#
657
# A standalone script to generate a Markdown representation of a test report.
758
# This is primarily intended to be used with GitHub actions to generate a nice
@@ -12,93 +63,167 @@
1263
#
1364

1465

15-
def generate_markdown(csv_path: str, exit_code: int = 0): # noqa (C901)
16-
# Print warning if exit code is non-zero
17-
if exit_code != 0:
18-
print("> [!WARNING]")
19-
print(
20-
f"> Exit code {exit_code} was non-zero. Test process may have crashed. Check the job logs for more information.\n"
21-
)
22-
66+
def aggregate_results(csv_path: str) -> AggregatedSummary:
2367
with open(csv_path, newline="", encoding="utf-8") as f:
2468
reader = csv.reader(f)
2569
rows = list(reader)
2670

2771
header = rows[0]
2872
data_rows = rows[1:]
2973

30-
# Find the Result and Result Detail column indices
31-
result_column_index = None
32-
result_detail_column_index = None
33-
for i, col in enumerate(header):
34-
if col.lower() == "result":
35-
result_column_index = i
36-
elif col.lower() == "result detail":
37-
result_detail_column_index = i
74+
header_indices_by_name = {n.lower(): i for (i, n) in enumerate(header)}
75+
params_column_index = header_indices_by_name.get("params", None)
76+
result_column_index = header_indices_by_name["result"]
77+
result_detail_column_index = header_indices_by_name["result detail"]
3878

3979
# Count results and prepare data
40-
pass_count = 0
41-
fail_count = 0
42-
skip_count = 0
80+
counts = ResultCounts()
4381
failed_tests = []
44-
processed_rows = []
45-
result_detail_counts = {}
82+
counts_by_param = {}
4683

4784
for row in data_rows:
85+
result = row[result_column_index]
86+
result_detail = row[result_detail_column_index]
87+
88+
counts.add_row(result, result_detail)
89+
90+
params = row[params_column_index] if params_column_index else None
91+
if params:
92+
if params not in counts_by_param:
93+
counts_by_param[params] = ResultCounts()
94+
counts_by_param[params].add_row(result, result_detail)
95+
4896
# Make a copy of the row to avoid modifying the original
49-
processed_row = row.copy()
97+
processed_row = [escape_for_markdown(cell) for cell in row]
5098

5199
# Count results and collect failed tests
52100
if result_column_index is not None and result_column_index < len(row):
53101
result_value = row[result_column_index].strip().lower()
54102
if result_value == "pass":
55-
pass_count += 1
56103
processed_row[result_column_index] = (
57104
'<span style="color:green">Pass</span>'
58105
)
59106
elif result_value == "fail":
60-
fail_count += 1
61107
processed_row[result_column_index] = (
62108
'<span style="color:red">Fail</span>'
63109
)
64110
failed_tests.append(processed_row.copy())
65111
elif result_value == "skip":
66-
skip_count += 1
67112
processed_row[result_column_index] = (
68113
'<span style="color:gray">Skip</span>'
69114
)
70115

71-
# Count result details (excluding empty ones)
72-
if result_detail_column_index is not None and result_detail_column_index < len(
73-
row
74-
):
75-
result_detail_value = row[result_detail_column_index].strip()
76-
if result_detail_value: # Only count non-empty result details
77-
if result_detail_value in result_detail_counts:
78-
result_detail_counts[result_detail_value] += 1
79-
else:
80-
result_detail_counts[result_detail_value] = 1
116+
return AggregatedSummary(
117+
counts=counts,
118+
failed_tests=failed_tests,
119+
counts_by_params=counts_by_param,
120+
header=header,
121+
)
122+
123+
124+
def escape_for_markdown(text: str) -> str:
125+
"""
126+
Modify a string to properly display in a markdown table cell.
127+
"""
128+
if not text:
129+
return text
81130

82-
processed_rows.append(processed_row)
131+
# Replace newlines with <br /> tags
132+
escaped = text.replace("\n", "<br />")
133+
134+
# Escape backslashes.
135+
escaped = escaped.replace("\\", "\\\\")
136+
137+
# Escape pipe characters that would break table structure
138+
escaped = escaped.replace("|", "\\|")
139+
140+
return escaped
141+
142+
143+
def generate_markdown(csv_path: str, exit_code: int = 0): # noqa (C901)
144+
# Print warning if exit code is non-zero
145+
if exit_code != 0:
146+
print("> [!WARNING]")
147+
print(
148+
f"> Exit code {exit_code} was non-zero. Test process may have crashed. Check the job logs for more information.\n"
149+
)
150+
151+
results = aggregate_results(csv_path)
83152

84153
# Generate Summary section
85-
total_rows = len(data_rows)
86154
print("# Summary\n")
87-
print(f"- **Pass**: {pass_count}/{total_rows}")
88-
print(f"- **Fail**: {fail_count}/{total_rows}")
89-
print(f"- **Skip**: {skip_count}/{total_rows}")
155+
total_excluding_skips = results.counts.passes + results.counts.fails
156+
pass_fraction = results.counts.passes / total_excluding_skips
157+
fail_fraction = results.counts.fails / total_excluding_skips
158+
print(
159+
f"- **Pass**: {results.counts.passes}/{total_excluding_skips} ({pass_fraction*100:.2f}%)"
160+
)
161+
print(
162+
f"- **Fail**: {results.counts.fails}/{total_excluding_skips} ({fail_fraction*100:.2f}%)"
163+
)
164+
print(f"- **Skip**: {results.counts.skips}")
165+
166+
if results.counts_by_params:
167+
print("\n## Results by Parameters\n")
168+
169+
# Extract all unique parameter keys from the JSON strings
170+
all_param_keys = set()
171+
parsed_params = {}
172+
173+
for params_str in results.counts_by_params.keys():
174+
# Parse the JSON string (it's a string representation of a dict)
175+
params_dict = json.loads(params_str)
176+
parsed_params[params_str] = params_dict
177+
all_param_keys.update(params_dict.keys())
178+
179+
if parsed_params and len(parsed_params) > 1:
180+
# Sort parameter keys for consistent column ordering
181+
sorted_param_keys = sorted(all_param_keys)
182+
183+
# Create table header
184+
header_cols = sorted_param_keys + ["Pass", "Fail", "Skip", "Pass %"]
185+
print("| " + " | ".join(header_cols) + " |")
186+
print("|" + "|".join(["---"] * len(header_cols)) + "|")
187+
188+
# Create table rows
189+
for params_str, counts in results.counts_by_params.items():
190+
if params_str in parsed_params:
191+
params_dict = parsed_params[params_str]
192+
row_values = []
193+
194+
# Add parameter values
195+
for key in sorted_param_keys:
196+
value = params_dict.get(key, "")
197+
row_values.append(str(value))
198+
199+
pass_fraction = counts.passes / (counts.passes + counts.fails)
200+
201+
# Add count values
202+
row_values.extend(
203+
[
204+
str(counts.passes),
205+
str(counts.fails),
206+
str(counts.skips),
207+
f"{pass_fraction*100:.2f}%",
208+
]
209+
)
210+
211+
print("| " + " | ".join(row_values) + " |")
212+
213+
print()
90214

91215
print("## Failure Breakdown:")
92-
total_rows_with_result_detail = sum(result_detail_counts.values())
93-
for detail, count in sorted(result_detail_counts.items()):
216+
total_rows_with_result_detail = sum(results.counts.by_detail.values())
217+
for detail, count in sorted(results.counts.by_detail.items()):
94218
print(f"- **{detail}**: {count}/{total_rows_with_result_detail}")
95219

96220
# Generate Failed Tests section
97221
print("# Failed Tests\n")
98-
if failed_tests:
99-
print("| " + " | ".join(header) + " |")
100-
print("|" + "|".join(["---"] * len(header)) + "|")
101-
for row in failed_tests:
222+
if results.failed_tests:
223+
escaped_header = [escape_for_markdown(col) for col in results.header]
224+
print("| " + " | ".join(escaped_header) + " |")
225+
print("|" + "|".join(["---"] * len(results.header)) + "|")
226+
for row in results.failed_tests:
102227
print("| " + " | ".join(row) + " |")
103228
else:
104229
print("No failed tests.\n")

backends/test/suite/models/__init__.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,11 @@ def wrapped_test(self):
5252
"use_dynamic_shapes": use_dynamic_shapes,
5353
}
5454
with TestContext(test_name, test_func.__name__, flow.name, params):
55+
if flow.should_skip_test(test_name):
56+
raise unittest.SkipTest(
57+
f"Skipping test due to matching flow {flow.name} skip patterns"
58+
)
59+
5560
test_func(self, flow, dtype, use_dynamic_shapes)
5661

5762
wrapped_test._name = test_func.__name__ # type: ignore

backends/test/suite/operators/__init__.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,11 @@ def _make_wrapped_test(
9797
):
9898
def wrapped_test(self):
9999
with TestContext(test_name, test_base_name, flow.name, params):
100+
if flow.should_skip_test(test_name):
101+
raise unittest.SkipTest(
102+
f"Skipping test due to matching flow {flow.name} skip patterns"
103+
)
104+
100105
test_kwargs = copy.copy(params) or {}
101106
test_kwargs["flow"] = flow
102107

backends/test/suite/operators/test_abs.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
# pyre-unsafe
88

99

10+
import unittest
11+
1012
import torch
1113
from executorch.backends.test.suite.flow import TestFlow
1214

@@ -45,6 +47,7 @@ def test_abs_shapes(self, flow: TestFlow) -> None:
4547
# 3D tensor
4648
self._test_op(AbsModel(), (torch.randn(3, 4, 5),), flow)
4749

50+
@unittest.skip("NaN and Inf are not enforced for backends.")
4851
def test_abs_edge_cases(self, flow: TestFlow) -> None:
4952
# Test edge cases
5053

0 commit comments

Comments
 (0)