Skip to content

Commit d09dd79

Browse files
authored
[Backend Tester] Migrate to pytest (#14456)
Refactor the backend test suites to use pytest. This includes the following changes: * Define pytest markers for each backend and test flow (recipe). This allows for easy filter, such as by running `pytest some/path/... -m backend_xnnpack`. * Use a parameterized pytest fixture to handle test generation / expansion for each test flow. * Switch to using the pytest-json-report plugin for reporting. Update the markdown generation script to take json. * Shim the existing unittest-based logic for op tests. * I've updated add.py to show what they should look like long-term. I've also just updated the model tests, since there aren't as many. I'll update the remaining op tests later in this stack, though this is purely to clean up the code. The shimming logic makes them work properly with pytest in this PR. * Update the backend test CI to use pytest. This also has the benefit of making the jobs much faster by leveraging parallel execution. I've also added a repro command to the markdown summary.
1 parent 668e730 commit d09dd79

File tree

14 files changed

+757
-545
lines changed

14 files changed

+757
-545
lines changed

.ci/scripts/test_backend_linux.sh renamed to .ci/scripts/test_backend.sh

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,16 +10,26 @@ SUITE=$1
1010
FLOW=$2
1111
ARTIFACT_DIR=$3
1212

13-
REPORT_FILE="$ARTIFACT_DIR/test-report-$FLOW-$SUITE.csv"
13+
REPORT_FILE="$ARTIFACT_DIR/test-report-$FLOW-$SUITE.json"
1414

1515
echo "Running backend test job for suite $SUITE, flow $FLOW."
1616
echo "Saving job artifacts to $ARTIFACT_DIR."
1717

18-
# The generic Linux job chooses to use base env, not the one setup by the image
1918
eval "$(conda shell.bash hook)"
2019
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
2120
conda activate "${CONDA_ENV}"
2221

22+
if [[ "$(uname)" == "Darwin" ]]; then
23+
bash .ci/scripts/setup-conda.sh
24+
eval "$(conda shell.bash hook)"
25+
CONDA_RUN_CMD="${CONDA_RUN} --no-capture-output"
26+
${CONDA_RUN_CMD} pip install awscli==1.37.21
27+
IS_MACOS=1
28+
else
29+
CONDA_RUN_CMD=""
30+
IS_MACOS=0
31+
fi
32+
2333
export PYTHON_EXECUTABLE=python
2434

2535
# CMake options to use, in addition to the defaults.
@@ -50,11 +60,14 @@ if [[ "$FLOW" == *arm* ]]; then
5060
.ci/scripts/setup-arm-baremetal-tools.sh
5161
fi
5262

53-
# We need the runner to test the built library.
54-
PYTHON_EXECUTABLE=python CMAKE_ARGS="$EXTRA_BUILD_ARGS" .ci/scripts/setup-linux.sh --build-tool cmake --build-mode Release --editable true
63+
if [[ $IS_MACOS -eq 1 ]]; then
64+
SETUP_SCRIPT=.ci/scripts/setup-macos.sh
65+
else
66+
SETUP_SCRIPT=.ci/scripts/setup-linux.sh
67+
fi
68+
CMAKE_ARGS="$EXTRA_BUILD_ARGS" ${CONDA_RUN_CMD} $SETUP_SCRIPT --build-tool cmake --build-mode Release --editable true
5569

5670
EXIT_CODE=0
57-
python -m executorch.backends.test.suite.runner $SUITE --flow $FLOW --report "$REPORT_FILE" || EXIT_CODE=$?
58-
71+
${CONDA_RUN_CMD} pytest -c /dev/nul -n auto backends/test/suite/$SUITE/ -m flow_$FLOW --json-report --json-report-file="$REPORT_FILE" || EXIT_CODE=$?
5972
# Generate markdown summary.
60-
python -m executorch.backends.test.suite.generate_markdown_summary "$REPORT_FILE" > ${GITHUB_STEP_SUMMARY:-"step_summary.md"} --exit-code $EXIT_CODE
73+
${CONDA_RUN_CMD} python -m executorch.backends.test.suite.generate_markdown_summary_json "$REPORT_FILE" > ${GITHUB_STEP_SUMMARY:-"step_summary.md"} --exit-code $EXIT_CODE

.ci/scripts/test_backend_macos.sh

Lines changed: 0 additions & 30 deletions
This file was deleted.

.github/workflows/_test_backend.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ jobs:
5757
script: |
5858
set -eux
5959
60-
source .ci/scripts/test_backend_linux.sh "${{ matrix.suite }}" "${{ matrix.flow }}" "${RUNNER_ARTIFACT_DIR}"
60+
source .ci/scripts/test_backend.sh "${{ matrix.suite }}" "${{ matrix.flow }}" "${RUNNER_ARTIFACT_DIR}"
6161
6262
test-backend-macos:
6363
if: ${{ inputs.run-macos }}
@@ -81,4 +81,4 @@ jobs:
8181
# This is needed to get the prebuilt PyTorch wheel from S3
8282
${CONDA_RUN} --no-capture-output pip install awscli==1.37.21
8383
84-
source .ci/scripts/test_backend_macos.sh "${{ matrix.suite }}" "${{ matrix.flow }}" "${RUNNER_ARTIFACT_DIR}"
84+
source .ci/scripts/test_backend.sh "${{ matrix.suite }}" "${{ matrix.flow }}" "${RUNNER_ARTIFACT_DIR}"

backends/test/suite/__init__.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
import os
1212

1313
import executorch.backends.test.suite.flow
14+
import torch
1415

1516
from executorch.backends.test.suite.flow import TestFlow
1617
from executorch.backends.test.suite.runner import runner_main
@@ -55,6 +56,11 @@ def get_test_flows() -> dict[str, TestFlow]:
5556
return _ALL_TEST_FLOWS
5657

5758

59+
def dtype_to_str(dtype: torch.dtype) -> str:
60+
# Strip off "torch."
61+
return str(dtype)[6:]
62+
63+
5864
def load_tests(loader, suite, pattern):
5965
package_dir = os.path.dirname(__file__)
6066
discovered_suite = loader.discover(

backends/test/suite/conftest.py

Lines changed: 182 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,182 @@
1+
from typing import Any
2+
3+
import pytest
4+
import torch
5+
6+
from executorch.backends.test.suite.flow import all_flows
7+
from executorch.backends.test.suite.reporting import _sum_op_counts
8+
from executorch.backends.test.suite.runner import run_test
9+
10+
11+
def pytest_configure(config):
12+
backends = set()
13+
14+
for flow in all_flows().values():
15+
config.addinivalue_line(
16+
"markers",
17+
f"flow_{flow.name}: mark a test as testing the {flow.name} flow",
18+
)
19+
20+
if flow.backend not in backends:
21+
config.addinivalue_line(
22+
"markers",
23+
f"backend_{flow.backend}: mark a test as testing the {flow.backend} backend",
24+
)
25+
backends.add(flow.backend)
26+
27+
28+
class TestRunner:
29+
def __init__(self, flow, test_name, test_base_name):
30+
self._flow = flow
31+
self._test_name = test_name
32+
self._test_base_name = test_base_name
33+
self._subtest = 0
34+
self._results = []
35+
36+
def lower_and_run_model(
37+
self,
38+
model: torch.nn.Module,
39+
inputs: Any,
40+
generate_random_test_inputs=True,
41+
dynamic_shapes=None,
42+
):
43+
run_summary = run_test(
44+
model,
45+
inputs,
46+
self._flow,
47+
self._test_name,
48+
self._test_base_name,
49+
self._subtest,
50+
None,
51+
generate_random_test_inputs=generate_random_test_inputs,
52+
dynamic_shapes=dynamic_shapes,
53+
)
54+
55+
self._subtest += 1
56+
self._results.append(run_summary)
57+
58+
if not run_summary.result.is_success():
59+
if run_summary.result.is_backend_failure():
60+
raise RuntimeError("Test failure.") from run_summary.error
61+
else:
62+
# Non-backend failure indicates a bad test. Mark as skipped.
63+
pytest.skip(
64+
f"Test failed for reasons other than backend failure. Error: {run_summary.error}"
65+
)
66+
67+
68+
@pytest.fixture(
69+
params=[
70+
pytest.param(
71+
f,
72+
marks=[
73+
getattr(pytest.mark, f"flow_{f.name}"),
74+
getattr(pytest.mark, f"backend_{f.backend}"),
75+
],
76+
)
77+
for f in all_flows().values()
78+
],
79+
ids=str,
80+
)
81+
def test_runner(request):
82+
return TestRunner(request.param, request.node.name, request.node.originalname)
83+
84+
85+
@pytest.hookimpl(optionalhook=True)
86+
def pytest_json_runtest_metadata(item, call):
87+
# Store detailed results in the test report under the metadata key.
88+
metadata = {"subtests": []}
89+
90+
if hasattr(item, "funcargs") and "test_runner" in item.funcargs:
91+
runner_instance = item.funcargs["test_runner"]
92+
93+
for record in runner_instance._results:
94+
subtest_metadata = {}
95+
96+
error_message = ""
97+
if record.error is not None:
98+
error_str = str(record.error)
99+
if len(error_str) > 400:
100+
error_message = error_str[:200] + "..." + error_str[-200:]
101+
else:
102+
error_message = error_str
103+
104+
subtest_metadata["Test ID"] = record.name
105+
subtest_metadata["Test Case"] = record.base_name
106+
subtest_metadata["Subtest"] = record.subtest_index
107+
subtest_metadata["Flow"] = record.flow
108+
subtest_metadata["Result"] = record.result.to_short_str()
109+
subtest_metadata["Result Detail"] = record.result.to_detail_str()
110+
subtest_metadata["Error"] = error_message
111+
subtest_metadata["Delegated"] = "True" if record.is_delegated() else "False"
112+
subtest_metadata["Quantize Time (s)"] = (
113+
f"{record.quantize_time.total_seconds():.3f}"
114+
if record.quantize_time
115+
else None
116+
)
117+
subtest_metadata["Lower Time (s)"] = (
118+
f"{record.lower_time.total_seconds():.3f}"
119+
if record.lower_time
120+
else None
121+
)
122+
123+
for output_idx, error_stats in enumerate(record.tensor_error_statistics):
124+
subtest_metadata[f"Output {output_idx} Error Max"] = (
125+
f"{error_stats.error_max:.3f}"
126+
)
127+
subtest_metadata[f"Output {output_idx} Error MAE"] = (
128+
f"{error_stats.error_mae:.3f}"
129+
)
130+
subtest_metadata[f"Output {output_idx} SNR"] = f"{error_stats.sqnr:.3f}"
131+
132+
subtest_metadata["Delegated Nodes"] = _sum_op_counts(
133+
record.delegated_op_counts
134+
)
135+
subtest_metadata["Undelegated Nodes"] = _sum_op_counts(
136+
record.undelegated_op_counts
137+
)
138+
if record.delegated_op_counts:
139+
subtest_metadata["Delegated Ops"] = dict(record.delegated_op_counts)
140+
if record.undelegated_op_counts:
141+
subtest_metadata["Undelegated Ops"] = dict(record.undelegated_op_counts)
142+
subtest_metadata["PTE Size (Kb)"] = (
143+
f"{record.pte_size_bytes / 1000.0:.3f}" if record.pte_size_bytes else ""
144+
)
145+
146+
metadata["subtests"].append(subtest_metadata)
147+
return metadata
148+
149+
150+
@pytest.hookimpl(optionalhook=True)
151+
def pytest_json_modifyreport(json_report):
152+
# Post-process the report, mainly to populate metadata for crashed tests. The runtest_metadata
153+
# hook doesn't seem to be called when there's a native crash, but xdist still creates a report
154+
# entry.
155+
156+
for test_data in json_report["tests"]:
157+
if "metadata" not in test_data:
158+
test_data["metadata"] = {}
159+
metadata = test_data["metadata"]
160+
if "subtests" not in metadata:
161+
metadata["subtests"] = []
162+
subtests = metadata["subtests"]
163+
164+
# Native crashes are recorded differently and won't have the full metadata.
165+
# Pytest-xdist records crash info under the "???" key.
166+
if "???" in test_data:
167+
test_id = test_data["nodeid"].removeprefix("::") # Remove leading ::
168+
test_base_id = test_id.split("[")[
169+
0
170+
] # Strip parameterization to get the base test case
171+
params = test_id[len(test_base_id) + 1 : -1].split("-")
172+
flow = params[0]
173+
174+
crashed_test_meta = {
175+
"Test ID": test_id,
176+
"Test Case": test_base_id,
177+
"Flow": flow,
178+
"Result": "Fail",
179+
"Result Detail": "Process Crash",
180+
"Error": test_data["???"].get("longrepr", "Process crashed."),
181+
}
182+
subtests.append(crashed_test_meta)

backends/test/suite/flow.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,9 @@ class TestFlow:
4444
def should_skip_test(self, test_name: str) -> bool:
4545
return any(pattern in test_name for pattern in self.skip_patterns)
4646

47+
def __str__(self):
48+
return self.name
49+
4750

4851
def all_flows() -> dict[str, TestFlow]:
4952
flows = []

0 commit comments

Comments
 (0)