diff --git a/.ci/scripts/test_backend_linux.sh b/.ci/scripts/test_backend_linux.sh index 243602fea21..d230860875d 100755 --- a/.ci/scripts/test_backend_linux.sh +++ b/.ci/scripts/test_backend_linux.sh @@ -39,12 +39,17 @@ if [[ "$FLOW" == *qnn* ]]; then fi if [[ "$FLOW" == *vulkan* ]]; then - # Setup swiftshader and Vulkan SDK which are required to build the Vulkan delegate + # Setup swiftshader and Vulkan SDK which are required to build the Vulkan delegate. source .ci/scripts/setup-vulkan-linux-deps.sh EXTRA_BUILD_ARGS+=" -DEXECUTORCH_BUILD_VULKAN=ON" fi +if [[ "$FLOW" == *arm* ]]; then + # Setup ARM deps. + .ci/scripts/setup-arm-baremetal-tools.sh +fi + # We need the runner to test the built library. PYTHON_EXECUTABLE=python CMAKE_ARGS="$EXTRA_BUILD_ARGS" .ci/scripts/setup-linux.sh --build-tool cmake --build-mode Release --editable true diff --git a/.github/workflows/test-backend-arm.yml b/.github/workflows/test-backend-arm.yml new file mode 100644 index 00000000000..e57be2704a2 --- /dev/null +++ b/.github/workflows/test-backend-arm.yml @@ -0,0 +1,27 @@ +name: Test ARM Backend + +on: + schedule: + - cron: 0 2 * * * + push: + tags: + - ciflow/nightly/* + pull_request: + paths: + - .github/workflows/test-backend-arm.yml + - .github/workflows/_test_backend.yml + workflow_dispatch: + +concurrency: + group: ${{ github.workflow }}--${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }} + cancel-in-progress: true + +jobs: + test-arm: + uses: ./.github/workflows/_test_backend.yml + with: + backend: arm + flows: '["arm_tosa"]' + ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} + timeout: 120 + run-linux: true diff --git a/backends/arm/test/tester/arm_tester.py b/backends/arm/test/tester/arm_tester.py index bb249644c47..8bf72827549 100644 --- a/backends/arm/test/tester/arm_tester.py +++ b/backends/arm/test/tester/arm_tester.py @@ -57,6 +57,7 @@ from executorch.backends.arm.vgf import VgfCompileSpec, VgfPartitioner +from executorch.backends.test.harness.error_statistics import ErrorStatistics from executorch.backends.test.harness.stages import Stage, StageType from executorch.backends.xnnpack.test.tester import Tester from executorch.devtools.backend_debug import get_delegation_info @@ -333,6 +334,7 @@ def to_edge_transform_and_lower( transform_passes: Optional[ Union[Sequence[PassType], Dict[str, Sequence[PassType]]] ] = None, + generate_etrecord: bool = False, ): if transform_passes is not None: raise RuntimeError( @@ -367,7 +369,9 @@ def to_edge_transform_and_lower( to_edge_and_lower_stage.partitioners = partitioners if edge_compile_config is not None: to_edge_and_lower_stage.edge_compile_conf = edge_compile_config - return super().to_edge_transform_and_lower(to_edge_and_lower_stage) + return super().to_edge_transform_and_lower( + to_edge_and_lower_stage, generate_etrecord=generate_etrecord + ) def to_executorch(self, to_executorch_stage: Optional[ToExecutorch] | None = None): if to_executorch_stage is None: @@ -402,6 +406,7 @@ def run_method_and_compare_outputs( qtol=0, error_callbacks=None, run_eager_mode=False, + statistics_callback: Callable[[ErrorStatistics], None] | None = None, ): """ Compares the run_artifact output of 'stage' with the output of a reference stage. @@ -657,10 +662,17 @@ def _compare_outputs( rtol=1e-03, qtol=0, error_callbacks=None, + statistics_callback: Callable[[ErrorStatistics], None] | None = None, ): try: super()._compare_outputs( - reference_output, stage_output, quantization_scale, atol, rtol, qtol + reference_output, + stage_output, + quantization_scale, + atol, + rtol, + qtol, + statistics_callback=statistics_callback, ) except AssertionError as e: if error_callbacks is None: diff --git a/backends/test/suite/flow.py b/backends/test/suite/flow.py index 9df3805444a..a4b34fee98d 100644 --- a/backends/test/suite/flow.py +++ b/backends/test/suite/flow.py @@ -38,6 +38,9 @@ class TestFlow: skip_patterns: list[str] = field(default_factory=lambda: []) """ Tests with names containing any substrings in this list are skipped. """ + supports_serialize: bool = True + """ True if the test flow supports the Serialize stage. """ + def should_skip_test(self, test_name: str) -> bool: return any(pattern in test_name for pattern in self.skip_patterns) @@ -115,4 +118,13 @@ def all_flows() -> dict[str, TestFlow]: except Exception as e: logger.info(f"Skipping QNN flow registration: {e}") + try: + from executorch.backends.test.suite.flows.arm import ARM_TOSA_FLOW + + flows += [ + ARM_TOSA_FLOW, + ] + except Exception as e: + logger.info(f"Skipping ARM flow registration: {e}") + return {f.name: f for f in flows if f is not None} diff --git a/backends/test/suite/flows/arm.py b/backends/test/suite/flows/arm.py new file mode 100644 index 00000000000..baa2df79de9 --- /dev/null +++ b/backends/test/suite/flows/arm.py @@ -0,0 +1,24 @@ +from executorch.backends.arm.test import common +from executorch.backends.arm.test.tester.arm_tester import ArmTester +from executorch.backends.test.suite.flow import TestFlow + + +def _create_arm_tester_tosa_fp(*args, **kwargs) -> ArmTester: + kwargs["compile_spec"] = common.get_tosa_compile_spec(tosa_spec="TOSA-1.0+FP") + + return ArmTester( + *args, + **kwargs, + ) + + +def _create_tosa_flow() -> TestFlow: + return TestFlow( + "arm_tosa", + backend="arm", + tester_factory=_create_arm_tester_tosa_fp, + supports_serialize=False, + ) + + +ARM_TOSA_FLOW = _create_tosa_flow() diff --git a/backends/test/suite/runner.py b/backends/test/suite/runner.py index 1f84db9c730..eeea09e0fc1 100644 --- a/backends/test/suite/runner.py +++ b/backends/test/suite/runner.py @@ -15,6 +15,7 @@ UNSUPPORTED_PORTABLE_OPS = { "aten::_embedding_bag", "aten::_adaptive_avg_pool2d", + "aten::adaptive_max_pool2d", "aten::median", "aten::median.dim", "aten::round.decimals", @@ -34,6 +35,7 @@ TestResult, ) from executorch.exir import EdgeProgramManager +from executorch.exir.dialects._ops import ops as exir_ops # A list of all runnable test suites and the corresponding python package. @@ -43,6 +45,24 @@ } +def _graph_has_unsupported_patterns(program: torch.export.ExportedProgram) -> bool: + # Returns true if the model contains patterns that will fail when running on the ET + # portable kernel library. + + # Check for 3d convolutions. All convs (1d, 2d, 3d) use the same op, so we need to look at + # the input meta to determine the rank. + for node in program.graph.nodes: + if ( + node.op == "call_function" + and node.target == exir_ops.edge.aten.convolution.default + ): + in_rank = node.args[0].meta["val"].dim() + if in_rank != 4: + return True + + return False + + def _get_test_seed(test_base_name: str) -> int: # Set the seed based on the test base name to give consistent inputs between backends. Add the # run seed to allow for reproducible results, but still allow for run-to-run variation. @@ -162,7 +182,7 @@ def build_result( # Check if any undelegated ops are in the unsupported ops set. has_unsupported_ops = any( op in UNSUPPORTED_PORTABLE_OPS for op in undelegated_op_counts.keys() - ) + ) or _graph_has_unsupported_patterns(edge_manager._etrecord.edge_dialect_program) # Skip the test if there are unsupported portable ops remaining. if has_unsupported_ops: @@ -171,8 +191,11 @@ def build_result( # Only run the runtime portion if something was delegated (or the flow doesn't delegate) if is_delegated or not flow.is_delegated: try: - tester.to_executorch().serialize() - extra_stats["pte_size_bytes"] = len(tester.get_artifact()) + tester.to_executorch() + + if flow.supports_serialize: + tester.serialize() + extra_stats["pte_size_bytes"] = len(tester.get_artifact()) except Exception as e: # We could introduce a result value for this, but I'm not sure it's necessary. # We can do this if we ever see to_executorch() or serialize() fail due a backend issue.