From 7283e4c4d79872bacab29423bd873d66295c8fd1 Mon Sep 17 00:00:00 2001 From: Adrian Lundell Date: Thu, 3 Oct 2024 11:13:57 +0200 Subject: [PATCH 1/3] Add max_pool2d op to Arm backend. - Adds node visitor and unittests - Adds remove_getitem_op pass to convert (maxpool_get inidices + getitem) -> maxpool2d op Change-Id: I404a663ffc0f27619c4053fc2ddf54260d2476c2 --- backends/arm/_passes/arm_pass_manager.py | 2 + backends/arm/arm_partitioner.py | 1 + backends/arm/operators/__init__.py | 1 + backends/arm/operators/op_max_pool2d.py | 77 ++++++ backends/arm/quantizer/arm_quantizer_utils.py | 1 + backends/arm/test/ops/test_max_pool.py | 248 ++++++++++++++++++ 6 files changed, 330 insertions(+) create mode 100644 backends/arm/operators/op_max_pool2d.py create mode 100644 backends/arm/test/ops/test_max_pool.py diff --git a/backends/arm/_passes/arm_pass_manager.py b/backends/arm/_passes/arm_pass_manager.py index e0566438b7c..b885476bc72 100644 --- a/backends/arm/_passes/arm_pass_manager.py +++ b/backends/arm/_passes/arm_pass_manager.py @@ -27,6 +27,7 @@ ScalarsToAttributePass, ) from executorch.backends.arm._passes.size_adjust_conv2d_pass import SizeAdjustConv2DPass +from executorch.backends.xnnpack._passes.remove_getitem_op import RemoveGetItemPass from executorch.exir import ExportedProgram from executorch.exir.backend.compile_spec_schema import CompileSpec from executorch.exir.pass_manager import PassManager @@ -42,6 +43,7 @@ def transform_to_backend_pipeline( ): """Apply passes before transforming program to backend""" self.add_pass(CastInt64ToInt32Pass(exported_program)) + self.add_pass(RemoveGetItemPass()) self.add_pass(SizeAdjustConv2DPass()) self.add_pass(RemoveClonePass()) self.add_pass(ConvertExpandCopyToRepeatPass()) diff --git a/backends/arm/arm_partitioner.py b/backends/arm/arm_partitioner.py index 22fb5ac6ac0..06335295d6e 100644 --- a/backends/arm/arm_partitioner.py +++ b/backends/arm/arm_partitioner.py @@ -54,6 +54,7 @@ def is_node_supported(self, submodules, node: torch.fx.Node) -> bool: exir_ops.edge.aten.mul.Tensor, exir_ops.edge.aten._native_batch_norm_legit_no_training.default, exir_ops.edge.aten.avg_pool2d.default, + exir_ops.edge.aten.max_pool2d_with_indices.default, exir_ops.edge.aten.sigmoid.default, exir_ops.edge.aten.mm.default, exir_ops.edge.aten.repeat.default, diff --git a/backends/arm/operators/__init__.py b/backends/arm/operators/__init__.py index 6d08290f038..b371a78c15a 100644 --- a/backends/arm/operators/__init__.py +++ b/backends/arm/operators/__init__.py @@ -20,6 +20,7 @@ op_get_item, op_hardtanh, op_log, + op_max_pool2d, op_mean_dim, op_mm, op_mul, diff --git a/backends/arm/operators/op_max_pool2d.py b/backends/arm/operators/op_max_pool2d.py new file mode 100644 index 00000000000..0752d8242f7 --- /dev/null +++ b/backends/arm/operators/op_max_pool2d.py @@ -0,0 +1,77 @@ +# Copyright 2024 Arm Limited and/or its affiliates. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +# pyre-unsafe +from typing import cast, List + +import serializer.tosa_serializer as ts +import torch +from executorch.backends.arm.operators.node_visitor import ( + NodeVisitor, + register_node_visitor, +) +from executorch.backends.arm.tosa_mapping import TosaArg +from executorch.backends.arm.tosa_utils import get_quant_node_args + +from serializer.tosa_serializer import TosaOp + + +@register_node_visitor +class MaxPool2dVisitor(NodeVisitor): + target = "aten.max_pool2d.default" + + def __init__(self, *args): + super().__init__(*args) + + def define_node( + self, + node: torch.fx.Node, + tosa_graph: ts.TosaSerializer, + inputs: List[TosaArg], + output: TosaArg, + is_quant_node: bool, + ) -> None: + + input_tensor = inputs[0] + kernel_size = inputs[1].special + stride = inputs[2].special + + try: + padding = [*inputs[3].special, *inputs[3].special] + except IndexError: + padding = [0, 0, 0, 0] + + accumulator_type = input_tensor.dtype + + if is_quant_node: + # Accumulator type always is int8 when input tensor is an integer type. + accumulator_type = ts.DType.INT8 + + # Initilize zero point to zero. + input_zp = 0 + output_zp = 0 + + if is_quant_node: + input_zp = get_quant_node_args( + cast(torch.fx.Node, node.all_input_nodes[0]) + ).zp + output_zp = get_quant_node_args(list(node.users)[0]).zp + + attr = ts.TosaSerializerAttribute() + attr.PoolAttribute( + kernel=kernel_size, + stride=stride, + pad=padding, + input_zp=input_zp, + output_zp=output_zp, + accum_dtype=accumulator_type, + ) + + tosa_graph.addOperator( + TosaOp.Op().MAX_POOL2D, + [input_tensor.name], + [output.name], + attr, + ) diff --git a/backends/arm/quantizer/arm_quantizer_utils.py b/backends/arm/quantizer/arm_quantizer_utils.py index 6102a96606c..2054dca2a71 100644 --- a/backends/arm/quantizer/arm_quantizer_utils.py +++ b/backends/arm/quantizer/arm_quantizer_utils.py @@ -152,6 +152,7 @@ def is_share_obs_or_fq_op(op: Callable) -> bool: # TODO: remove? torch.ops.aten.adaptive_avg_pool2d.default, torch.ops.aten.avg_pool2d.default, + torch.ops.aten.max_pool2d.default, torch.ops.aten.view_copy.default, torch.ops.aten.view.default, torch.ops.aten.slice.Tensor, diff --git a/backends/arm/test/ops/test_max_pool.py b/backends/arm/test/ops/test_max_pool.py new file mode 100644 index 00000000000..13b6aba5613 --- /dev/null +++ b/backends/arm/test/ops/test_max_pool.py @@ -0,0 +1,248 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright 2024 Arm Limited and/or its affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +import logging +import unittest + +from typing import Tuple + +import torch +from executorch.backends.arm.quantizer.arm_quantizer import ( + ArmQuantizer, + get_symmetric_quantization_config, +) +from executorch.backends.arm.test import common +from executorch.backends.arm.test.tester.arm_tester import ArmTester + +from executorch.backends.xnnpack.test.tester.tester import Quantize +from executorch.exir.backend.backend_details import CompileSpec +from parameterized import parameterized + +logger = logging.getLogger(__name__) +logger.setLevel(logging.INFO) + +test_data_suite = [ + # (test_name, test_data, [kernel_size, stride, padding]) + ("zeros", torch.zeros(1, 1, 4, 8), [2, 2, 1]), + ("ones", torch.ones(1, 16, 50, 32), [4, 2, 0]), + ("rand", torch.rand(1, 16, 52, 16), [4, 3, 0]), +] + +test_data_suite_mult_batches = [ + ("randn", torch.randn(5, 16, 50, 32), [4, 2, 0]), +] + + +class TestMaxPool2d(unittest.TestCase): + """Tests MaxPool2d.""" + + class MaxPool2d(torch.nn.Module): + def __init__( + self, + kernel_size: int | Tuple[int, int], + stride: int | Tuple[int, int], + padding: int | Tuple[int, int], + ): + super().__init__() + self.max_pool_2d = torch.nn.MaxPool2d( + kernel_size=kernel_size, stride=stride, padding=padding + ) + + def forward(self, x): + return self.max_pool_2d(x) + + def _test_maxpool2d_tosa_MI_pipeline( + self, module: torch.nn.Module, test_data: Tuple[torch.tensor] + ): + ( + ArmTester( + module, + example_inputs=test_data, + compile_spec=common.get_tosa_compile_spec(permute_memory_to_nhwc=True), + ) + .export() + .check(["torch.ops.aten.max_pool2d.default"]) + .check_not(["torch.ops.quantized_decomposed"]) + .to_edge() + .partition() + .check_not(["executorch_exir_dialects_edge__ops_aten_max_pool2d_default"]) + .check_not( + [ + "executorch_exir_dialects_edge__ops_aten_max_pool2d_with_indices_default" + ] + ) + .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) + .to_executorch() + ) + + def _test_maxpool2d_tosa_BI_pipeline( + self, module: torch.nn.Module, test_data: Tuple[torch.tensor] + ): + quantizer = ArmQuantizer().set_io(get_symmetric_quantization_config()) + ( + ArmTester( + module, + example_inputs=test_data, + compile_spec=common.get_tosa_compile_spec(permute_memory_to_nhwc=True), + ) + .quantize(Quantize(quantizer, get_symmetric_quantization_config())) + .export() + .check_count({"torch.ops.aten.max_pool2d.default": 1}) + .check(["torch.ops.quantized_decomposed"]) + .to_edge() + .partition() + .check_not(["executorch_exir_dialects_edge__ops_aten_max_pool2d_default"]) + .check_not( + [ + "executorch_exir_dialects_edge__ops_aten_max_pool2d_with_indices_default" + ] + ) + .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) + .to_executorch() + .run_method_and_compare_outputs(inputs=test_data, qtol=1) + ) + + def _test_maxpool2d_tosa_ethos_BI_pipeline( + self, + module: torch.nn.Module, + compile_spec: CompileSpec, + test_data: Tuple[torch.tensor], + ): + quantizer = ArmQuantizer().set_io(get_symmetric_quantization_config()) + tester = ( + ArmTester( + module, + example_inputs=test_data, + compile_spec=compile_spec, + ) + .quantize(Quantize(quantizer, get_symmetric_quantization_config())) + .export() + .check_count({"torch.ops.aten.max_pool2d.default": 1}) + .check(["torch.ops.quantized_decomposed"]) + .to_edge() + .partition() + .check_not(["executorch_exir_dialects_edge__ops_aten_max_pool2d_default"]) + .check_count({"torch.ops.higher_order.executorch_call_delegate": 1}) + .to_executorch() + .serialize() + ) + + return tester + + @parameterized.expand(test_data_suite) + def test_maxpool2d_tosa_MI( + self, + test_name: str, + test_data: torch.Tensor, + model_params: int | Tuple[int, int], + ): + self._test_maxpool2d_tosa_MI_pipeline( + self.MaxPool2d(*model_params), (test_data,) + ) + + @parameterized.expand(test_data_suite) + def test_maxpool2d_tosa_BI( + self, + test_name: str, + test_data: torch.Tensor, + model_params: int | Tuple[int, int], + ): + self._test_maxpool2d_tosa_BI_pipeline( + self.MaxPool2d(*model_params), (test_data,) + ) + + @parameterized.expand(test_data_suite) + def test_maxpool2d_tosa_u55_BI( + self, + test_name: str, + test_data: torch.Tensor, + model_params: int | Tuple[int, int], + ): + tester = self._test_maxpool2d_tosa_ethos_BI_pipeline( + self.MaxPool2d(*model_params), + common.get_u55_compile_spec(permute_memory_to_nhwc=True), + (test_data,), + ) + if common.is_option_enabled("corstone300"): + tester.run_method_and_compare_outputs( + qtol=1, inputs=(test_data,), target_board="corstone-300" + ) + + @parameterized.expand(test_data_suite) + def test_maxpool2d_tosa_u85_BI( + self, + test_name: str, + test_data: torch.Tensor, + model_params: int | Tuple[int, int], + ): + tester = self._test_maxpool2d_tosa_ethos_BI_pipeline( + self.MaxPool2d(*model_params), + common.get_u85_compile_spec(permute_memory_to_nhwc=True), + (test_data,), + ) + if common.is_option_enabled("corstone300"): + tester.run_method_and_compare_outputs( + qtol=1, inputs=(test_data,), target_board="corstone-320" + ) + + @parameterized.expand(test_data_suite_mult_batches) + def test_maxpool2d_tosa_MI_mult_batches( + self, + test_name: str, + test_data: torch.Tensor, + model_params: int | Tuple[int, int], + ): + self._test_maxpool2d_tosa_MI_pipeline( + self.MaxPool2d(*model_params), (test_data,) + ) + + @parameterized.expand(test_data_suite_mult_batches) + def test_maxpool2d_tosa_BI_mult_batches( + self, + test_name: str, + test_data: torch.Tensor, + model_params: int | Tuple[int, int], + ): + self._test_maxpool2d_tosa_BI_pipeline( + self.MaxPool2d(*model_params), (test_data,) + ) + + @parameterized.expand(test_data_suite_mult_batches) + @unittest.expectedFailure # TODO: MLETORCH-433 + def test_maxpool2d_tosa_u55_BI_mult_batches( + self, + test_name: str, + test_data: torch.Tensor, + model_params: int | Tuple[int, int], + ): + tester = self._test_maxpool2d_tosa_ethos_BI_pipeline( + self.MaxPool2d(*model_params), + common.get_u55_compile_spec(permute_memory_to_nhwc=True), + (test_data,), + ) + if common.is_option_enabled("corstone300"): + tester.run_method_and_compare_outputs( + qtol=1, inputs=(test_data,), target_board="corstone-300" + ) + + @parameterized.expand(test_data_suite_mult_batches) + @unittest.expectedFailure # TODO: MLETORCH-433 + def test_maxpool2d_tosa_u85_BI_mult_batches( + self, + test_name: str, + test_data: torch.Tensor, + model_params: int | Tuple[int, int], + ): + tester = self._test_maxpool2d_tosa_ethos_BI_pipeline( + self.MaxPool2d(*model_params), + common.get_u85_compile_spec(permute_memory_to_nhwc=True), + (test_data,), + ) + if common.is_option_enabled("corstone300"): + tester.run_method_and_compare_outputs( + qtol=1, inputs=(test_data,), target_board="corstone-320" + ) From 5ead05547d272856a09f304bc4c273e31e1f7029 Mon Sep 17 00:00:00 2001 From: Adrian Lundell Date: Wed, 30 Oct 2024 10:16:26 +0100 Subject: [PATCH 2/3] Expected failures only for FVP Change-Id: I1f1aed353711f51fbdd83f5af3c9708de69dce5e --- backends/arm/test/common.py | 11 +++++++++++ backends/arm/test/ops/test_max_pool.py | 4 ++-- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/backends/arm/test/common.py b/backends/arm/test/common.py index 2ae86b1d1eb..b79cf8b0892 100644 --- a/backends/arm/test/common.py +++ b/backends/arm/test/common.py @@ -56,6 +56,17 @@ def pytest_collection_modifyitems(config, items): # ==== End of Pytest hooks ===== +# ==== Custom Pytest decorators ===== + + +def expectedFailureOnFVP(test_item): + if is_option_enabled("corstone300"): + test_item.__unittest_expecting_failure__ = True + return test_item + + +# ==== End of Custom Pytest decorators ===== + def load_libquantized_ops_aot_lib(): so_ext = { diff --git a/backends/arm/test/ops/test_max_pool.py b/backends/arm/test/ops/test_max_pool.py index 13b6aba5613..5c48afa3ce1 100644 --- a/backends/arm/test/ops/test_max_pool.py +++ b/backends/arm/test/ops/test_max_pool.py @@ -212,7 +212,7 @@ def test_maxpool2d_tosa_BI_mult_batches( ) @parameterized.expand(test_data_suite_mult_batches) - @unittest.expectedFailure # TODO: MLETORCH-433 + @common.expectedFailureOnFVP # TODO: MLETORCH-433 def test_maxpool2d_tosa_u55_BI_mult_batches( self, test_name: str, @@ -230,7 +230,7 @@ def test_maxpool2d_tosa_u55_BI_mult_batches( ) @parameterized.expand(test_data_suite_mult_batches) - @unittest.expectedFailure # TODO: MLETORCH-433 + @common.expectedFailureOnFVP # TODO: MLETORCH-433 def test_maxpool2d_tosa_u85_BI_mult_batches( self, test_name: str, From e3b46a6668ea48d905dbfe006eb2cd0592907b99 Mon Sep 17 00:00:00 2001 From: Adrian Lundell Date: Tue, 5 Nov 2024 13:47:32 +0100 Subject: [PATCH 3/3] Fix lintrunner issue --- backends/arm/_passes/arm_pass_manager.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backends/arm/_passes/arm_pass_manager.py b/backends/arm/_passes/arm_pass_manager.py index 63ad05fd16e..a6c9cf1d06b 100644 --- a/backends/arm/_passes/arm_pass_manager.py +++ b/backends/arm/_passes/arm_pass_manager.py @@ -40,10 +40,10 @@ ScalarsToAttributePass, ) from executorch.backends.arm._passes.size_adjust_conv2d_pass import SizeAdjustConv2DPass -from executorch.backends.xnnpack._passes.remove_getitem_op import RemoveGetItemPass from executorch.backends.arm._passes.unsqueeze_scalar_placeholders_pass import ( UnsqueezeScalarPlaceholdersPass, ) +from executorch.backends.xnnpack._passes.remove_getitem_op import RemoveGetItemPass from executorch.exir import ExportedProgram from executorch.exir.backend.compile_spec_schema import CompileSpec from executorch.exir.pass_manager import PassManager