diff --git a/backends/arm/_passes/arm_pass_manager.py b/backends/arm/_passes/arm_pass_manager.py index 0b4e27e5aaa..14972601b65 100644 --- a/backends/arm/_passes/arm_pass_manager.py +++ b/backends/arm/_passes/arm_pass_manager.py @@ -7,7 +7,6 @@ # pyre-unsafe -import torch from executorch.backends.arm._passes.annotate_channels_last_dim_order_pass import ( AnnotateChannelsLastDimOrder, ) @@ -47,7 +46,7 @@ ) from executorch.backends.arm._passes.match_arg_ranks_pass import MatchArgRanksPass from executorch.backends.arm._passes.meandim_to_averagepool_pass import ( - ConvertMeanDimToAveragePool, + ConvertMeanDimToAveragePoolPass, ) from executorch.backends.arm._passes.mm_to_bmm_pass import ConvertMmToBmmPass from executorch.backends.arm._passes.remove_clone_pass import RemoveClonePass @@ -61,86 +60,98 @@ from executorch.backends.arm._passes.unsqueeze_scalar_placeholders_pass import ( UnsqueezeScalarPlaceholdersPass, ) +from executorch.backends.arm.tosa_specification import TosaSpecification from executorch.backends.xnnpack._passes.remove_getitem_op import RemoveGetItemPass from executorch.exir import ExportedProgram -from executorch.exir.dialects._ops import ops as exir_ops from executorch.exir.pass_manager import PassManager +from torch.fx import GraphModule class ArmPassManager(PassManager): - def _transform(self, graph_module: torch.fx.GraphModule): + def __init__(self, tosa_spec: TosaSpecification) -> None: + self.tosa_spec = tosa_spec + super().__init__() + + def _transform(self, graph_module: GraphModule): return self(graph_module).graph_module - def transform_to_backend_pipeline(self, exported_program: ExportedProgram): - """Apply passes before transforming program to backend""" + def _tosa_080_BI_pipeline(self, exported_program: ExportedProgram) -> GraphModule: self.add_pass(FuseQuantizedActivationPass()) + self.add_pass(RemoveGetItemPass()) + self.add_pass(ConvertSplitToSlicePass()) + self.add_pass(ConvertMmToBmmPass()) self.add_pass(DecomposeLinearPass()) + self.add_pass(ConvertMeanDimToAveragePoolPass()) + + self.add_pass(AnnotateDecomposedMatmulPass()) + self.add_pass(QuantizeFullArgument()) + self.add_pass(FoldAndAnnotateQParamsPass()) + self.add_pass(RetraceFoldedDtypesPass()) + self.add_pass(InsertTableOpsPass(exported_program)) + + self.add_pass(RemoveClonePass()) + self.add_pass(SizeAdjustConv2DPass()) + self.add_pass(ConvertExpandCopyToRepeatPass()) + self.add_pass(UnsqueezeBeforeRepeatPass()) + self.add_pass(UnsqueezeScalarPlaceholdersPass(exported_program)) + self.add_pass(CastInt64ToInt32Pass(exported_program)) + self.add_pass(MatchArgRanksPass(exported_program)) + self.add_pass(KeepDimsFalseToSqueezePass()) + self.add_pass(Conv1dUnsqueezePass(exported_program)) + self.add_pass(DecomposeSelectPass()) + + self.add_pass(AnnotateChannelsLastDimOrder()) + + return self._transform(exported_program.graph_module) + + def _tosa_080_MI_pipeline(self, exported_program: ExportedProgram) -> GraphModule: + + self.add_pass(FuseQuantizedActivationPass()) self.add_pass(RemoveGetItemPass()) + self.add_pass(ConvertSplitToSlicePass()) + self.add_pass(ConvertMmToBmmPass()) + self.add_pass(DecomposeLinearPass()) self.add_pass(DecomposeLayerNormPass()) self.add_pass(DecomposeVarPass()) - self.add_pass(ConvertMeanDimToAveragePool()) self.add_pass(DecomposeMeanDimPass()) - self.add_pass(ConvertSplitToSlicePass()) - self.add_pass(ConvertMmToBmmPass()) - # TODO MLETORCH-558 + self.add_pass(ConvertMeanDimToAveragePoolPass()) + self.add_pass(DecomposeDivPass()) + self.add_pass(DecomposeSoftmaxesPass()) + self.add_pass(AnnotateDecomposedMatmulPass()) self.add_pass(QuantizeFullArgument()) - self.add_pass( - FoldAndAnnotateQParamsPass( - [ - exir_ops.edge.aten.minimum.default, - exir_ops.edge.aten.maximum.default, - exir_ops.edge.aten.add.Tensor, - exir_ops.edge.aten.avg_pool2d.default, - exir_ops.edge.aten.bmm.default, - exir_ops.edge.aten.cat.default, - exir_ops.edge.aten.convolution.default, - exir_ops.edge.aten.clone.default, - exir_ops.edge.aten.exp.default, - exir_ops.edge.aten.expand_copy.default, - exir_ops.edge.aten.full.default, - exir_ops.edge.aten.hardtanh.default, - exir_ops.edge.aten.log.default, - exir_ops.edge.aten.max_pool2d.default, - exir_ops.edge.aten.mul.Tensor, - exir_ops.edge.aten.permute_copy.default, - exir_ops.edge.aten.reciprocal.default, - exir_ops.edge.aten.relu.default, - exir_ops.edge.aten.repeat.default, - exir_ops.edge.aten.rsqrt.default, - exir_ops.edge.aten.select_copy.int, - exir_ops.edge.aten.sigmoid.default, - exir_ops.edge.aten.slice_copy.Tensor, - exir_ops.edge.aten.squeeze_copy.dims, - exir_ops.edge.aten.sub.Tensor, - exir_ops.edge.aten.sum.dim_IntList, - exir_ops.edge.aten.tanh.default, - exir_ops.edge.aten.unsqueeze_copy.default, - exir_ops.edge.aten.upsample_nearest2d.vec, - exir_ops.edge.aten.view_copy.default, - ] - ) - ) + self.add_pass(FoldAndAnnotateQParamsPass()) self.add_pass(RetraceFoldedDtypesPass()) self.add_pass(InsertTableOpsPass(exported_program)) + + self.add_pass(RemoveClonePass()) + self.add_pass(SizeAdjustConv2DPass()) self.add_pass(ConvertExpandCopyToRepeatPass()) self.add_pass(UnsqueezeBeforeRepeatPass()) - self.add_pass(CastInt64ToInt32Pass(exported_program)) self.add_pass(UnsqueezeScalarPlaceholdersPass(exported_program)) - self.add_pass(SizeAdjustConv2DPass()) - self.add_pass(RemoveClonePass()) + self.add_pass(CastInt64ToInt32Pass(exported_program)) self.add_pass(MatchArgRanksPass(exported_program)) - self.add_pass(DecomposeDivPass()) self.add_pass(KeepDimsFalseToSqueezePass()) self.add_pass(Conv1dUnsqueezePass(exported_program)) - self.add_pass(DecomposeSoftmaxesPass()) self.add_pass(DecomposeSelectPass()) + self.add_pass(AnnotateChannelsLastDimOrder()) return self._transform(exported_program.graph_module) - def transform_for_annotation_pipeline(self, graph_module: torch.fx.GraphModule): + def transform_to_backend_pipeline(self, exported_program: ExportedProgram): + """Apply passes before transforming program to backend""" + if self.tosa_spec == TosaSpecification.create_from_string("TOSA-0.80.0+BI"): + return self._tosa_080_BI_pipeline(exported_program) + elif self.tosa_spec == TosaSpecification.create_from_string("TOSA-0.80.0+MI"): + return self._tosa_080_MI_pipeline(exported_program) + else: + raise NotImplementedError( + f"No pass pipeline implemented for {self.tosa_spec=}" + ) + + def transform_for_annotation_pipeline(self, graph_module: GraphModule): self.add_pass(ScalarsToAttributePass()) self.add_pass(DecomposeLayerNormPass()) self.add_pass(DecomposeVarPass()) diff --git a/backends/arm/_passes/cast_int64_pass.py b/backends/arm/_passes/cast_int64_pass.py index aab6ed8eb42..dffa4c199a4 100644 --- a/backends/arm/_passes/cast_int64_pass.py +++ b/backends/arm/_passes/cast_int64_pass.py @@ -1,4 +1,4 @@ -# Copyright 2024 Arm Limited and/or its affiliates. +# Copyright 2024-2025 Arm Limited and/or its affiliates. # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. @@ -17,6 +17,10 @@ class CastInt64ToInt32Pass(ExportPass): + """ + Cast int64 buffers to int32 if the int64 data is in int32 range. + """ + def __init__(self, exported_program: torch.export.ExportedProgram): super(CastInt64ToInt32Pass, self).__init__() self.exported_program = exported_program diff --git a/backends/arm/_passes/fold_qdq_with_annotated_qparams_pass.py b/backends/arm/_passes/fold_qdq_with_annotated_qparams_pass.py index 045506f19dd..5a6b06d100d 100644 --- a/backends/arm/_passes/fold_qdq_with_annotated_qparams_pass.py +++ b/backends/arm/_passes/fold_qdq_with_annotated_qparams_pass.py @@ -1,4 +1,4 @@ -# Copyright 2024 Arm Limited and/or its affiliates. +# Copyright 2024-2025 Arm Limited and/or its affiliates. # All rights reserved. # # This source code is licensed under the BSD-style license found in the @@ -6,7 +6,7 @@ import copy -from typing import cast, Dict, Iterable, Set, Tuple +from typing import cast, Dict, Set, Tuple from executorch.backends.arm.tosa_quant_utils import QuantArgs @@ -55,7 +55,7 @@ def get_output_qparams(node: Node) -> dict[int, QuantArgs]: class FoldAndAnnotateQParamsPass(ExportPass): """ A pass that walks the graph and removes any DQ and Q nodes before and after the target - node in the supplied list of operators. + node. The quantization parameters from the DQ/Q nodes are stored as meta values to be accessible for later lowering and serialization passes. The assumption is that the quantization annotatation adds DQ nodes for all tensor @@ -82,9 +82,8 @@ class FoldAndAnnotateQParamsPass(ExportPass): """ - def __init__(self, targeted_ops: Iterable[EdgeOpOverload]) -> None: + def __init__(self) -> None: super().__init__() - self.targeted_ops = targeted_ops def fold_and_annotate_arg( self, graph_module: GraphModule, node: Node, arg_list: list[Node], i: int @@ -131,7 +130,7 @@ def call(self, graph_module: GraphModule) -> PassResult: # Loop over the graph nodes and find any node in the 'targeted_ops' list. for n in graph_module.graph.nodes: n = cast(Node, n) - if n.op != "call_function" or n.target not in self.targeted_ops: + if n.op != "call_function": continue # Make sure we haven't already set qparams meta information on the node @@ -180,7 +179,7 @@ class QuantizeFullArgument(ExportPass): def call(self, graph_module: GraphModule) -> PassResult: modified = False - # Loop over the graph nodes and find any node in the 'targeted_ops' list. + # Loop over the graph nodes and find full.default nodes. for n in graph_module.graph.nodes: n = cast(Node, n) if n.target != exir_ops.edge.aten.full.default: diff --git a/backends/arm/_passes/meandim_to_averagepool_pass.py b/backends/arm/_passes/meandim_to_averagepool_pass.py index 0974eac740c..9a755191504 100644 --- a/backends/arm/_passes/meandim_to_averagepool_pass.py +++ b/backends/arm/_passes/meandim_to_averagepool_pass.py @@ -1,4 +1,4 @@ -# Copyright 2024 Arm Limited and/or its affiliates. +# Copyright 2024-2025 Arm Limited and/or its affiliates. # All rights reserved. # # This source code is licensed under the BSD-style license found in the @@ -16,7 +16,7 @@ Argument = Any -class ConvertMeanDimToAveragePool(ExportPass): +class ConvertMeanDimToAveragePoolPass(ExportPass): """ Replace a mean operation with dim = [-1, -2] and keep_dim = True with an average pool operation. """ diff --git a/backends/arm/_passes/remove_clone_pass.py b/backends/arm/_passes/remove_clone_pass.py index ac992ce2a0c..9542a4097af 100644 --- a/backends/arm/_passes/remove_clone_pass.py +++ b/backends/arm/_passes/remove_clone_pass.py @@ -1,4 +1,4 @@ -# Copyright 2024 Arm Limited and/or its affiliates. +# Copyright 2024-2025 Arm Limited and/or its affiliates. # All rights reserved. # # This source code is licensed under the BSD-style license found in the @@ -11,6 +11,7 @@ class RemoveClonePass(ExportPass): + """Remove all clones from graph_module""" def call_operator(self, op, args, kwargs, meta): if op != exir_ops.edge.aten.clone.default: diff --git a/backends/arm/arm_backend.py b/backends/arm/arm_backend.py index 601cac3692d..7bdbdf39474 100644 --- a/backends/arm/arm_backend.py +++ b/backends/arm/arm_backend.py @@ -50,7 +50,7 @@ def __init__(self): self.output_format = None self.path_for_intermediates = None self.quantize_io = False - self.tosa_version = None + self.tosa_spec = None self.input_order = None def ethosu_compile_spec( @@ -92,11 +92,13 @@ def ethosu_compile_spec( if "u55" in config: # Add the Ethos-U55 extension marker base_tosa_version += "+u55" - self.tosa_version = TosaSpecification.create_from_string(base_tosa_version) + self.tosa_spec = TosaSpecification.create_from_string(base_tosa_version) return self - def tosa_compile_spec(self, tosa_version: str) -> "ArmCompileSpecBuilder": + def tosa_compile_spec( + self, tosa_spec: str | TosaSpecification + ) -> "ArmCompileSpecBuilder": """ Generate compile spec for TOSA flatbuffer output """ @@ -104,7 +106,12 @@ def tosa_compile_spec(self, tosa_version: str) -> "ArmCompileSpecBuilder": self.output_format is None ), f"Output format already set: {self.output_format}" self.output_format = "tosa" - self.tosa_version = TosaSpecification.create_from_string(tosa_version) + if isinstance(tosa_spec, TosaSpecification): + self.tosa_spec = tosa_spec + elif isinstance(tosa_spec, str): + self.tosa_spec = TosaSpecification.create_from_string(tosa_spec) + else: + raise RuntimeError(f"Invalid type for {tosa_spec}!") return self def dump_intermediate_artifacts_to( @@ -138,12 +145,10 @@ def build(self) -> List[CompileSpec]: """ Generate a list of compile spec objects from the builder """ - assert self.tosa_version + assert self.tosa_spec # Always supply a TOSA version - self.compile_spec = [ - CompileSpec("tosa_version", str(self.tosa_version).encode()) - ] + self.compile_spec = [CompileSpec("tosa_version", str(self.tosa_spec).encode())] if self.output_format == "vela": self.compile_spec += [ @@ -253,7 +258,7 @@ def preprocess( # noqa: C901 # Converted output for this subgraph, serializer needs path early as it emits # const data directly. Path created and data written only in debug builds. tosa_graph = ts.TosaSerializer(artifact_path) - graph_module = ArmPassManager().transform_to_backend_pipeline( + graph_module = ArmPassManager(tosa_spec).transform_to_backend_pipeline( exported_program=edge_program ) diff --git a/backends/arm/quantizer/arm_quantizer.py b/backends/arm/quantizer/arm_quantizer.py index fe104db972b..cba66cfe561 100644 --- a/backends/arm/quantizer/arm_quantizer.py +++ b/backends/arm/quantizer/arm_quantizer.py @@ -1,5 +1,5 @@ # Copyright (c) Meta Platforms, Inc. and affiliates. -# Copyright 2024 Arm Limited and/or its affiliates. +# Copyright 2024-2025 Arm Limited and/or its affiliates. # All rights reserved. # # This source code is licensed under the BSD-style license found in the @@ -24,6 +24,7 @@ from executorch.backends.arm.quantizer.quantization_annotator import annotate_graph from executorch.backends.arm.quantizer.quantization_config import QuantizationConfig +from executorch.backends.arm.tosa_specification import TosaSpecification from torch.ao.quantization.fake_quantize import ( FakeQuantize, FusedMovingAvgObsFakeQuantize, @@ -205,8 +206,10 @@ def not_module_type_or_name_filter(n: Node) -> bool: class ArmQuantizer(Quantizer): - def __init__(self) -> None: + + def __init__(self, tosa_spec: TosaSpecification) -> None: super().__init__() + self.tosa_spec = tosa_spec self.global_config: Optional[QuantizationConfig] = None self.io_config: Optional[QuantizationConfig] = None self.module_type_config: Dict[Callable, Optional[QuantizationConfig]] = {} @@ -250,7 +253,9 @@ def transform_for_annotation(self, model: GraphModule) -> GraphModule: Currently transforms scalar values to tensor attributes. """ - return ArmPassManager().transform_for_annotation_pipeline(graph_module=model) + return ArmPassManager(self.tosa_spec).transform_for_annotation_pipeline( + graph_module=model + ) def annotate(self, model: GraphModule) -> GraphModule: """Performs the quantization annotation on the graph. diff --git a/backends/arm/test/common.py b/backends/arm/test/common.py index bcd68cb1737..c0f81bbe2e4 100644 --- a/backends/arm/test/common.py +++ b/backends/arm/test/common.py @@ -12,6 +12,7 @@ from pathlib import Path from executorch.backends.arm.arm_backend import ArmCompileSpecBuilder +from executorch.backends.arm.tosa_specification import TosaSpecification from executorch.exir.backend.compile_spec_schema import CompileSpec @@ -53,15 +54,17 @@ def maybe_get_tosa_collate_path() -> str | None: return None -def get_tosa_compile_spec(tosa_version: str, custom_path=None) -> list[CompileSpec]: +def get_tosa_compile_spec( + tosa_spec: str | TosaSpecification, custom_path=None +) -> list[CompileSpec]: """ Default compile spec for TOSA tests. """ - return get_tosa_compile_spec_unbuilt(tosa_version, custom_path).build() + return get_tosa_compile_spec_unbuilt(tosa_spec, custom_path).build() def get_tosa_compile_spec_unbuilt( - tosa_version: str, custom_path=None + tosa_spec: str | TosaSpecification, custom_path=None ) -> ArmCompileSpecBuilder: """Get the ArmCompileSpecBuilder for the default TOSA tests, to modify the compile spec before calling .build() to finalize it. @@ -73,7 +76,7 @@ def get_tosa_compile_spec_unbuilt( os.makedirs(custom_path, exist_ok=True) compile_spec_builder = ( ArmCompileSpecBuilder() - .tosa_compile_spec(tosa_version) + .tosa_compile_spec(tosa_spec) .dump_intermediate_artifacts_to(custom_path) .set_quantize_io(True) ) diff --git a/backends/arm/test/ops/test_avg_pool.py b/backends/arm/test/ops/test_avg_pool.py index bc37fbb1364..16396950dc4 100644 --- a/backends/arm/test/ops/test_avg_pool.py +++ b/backends/arm/test/ops/test_avg_pool.py @@ -18,6 +18,7 @@ ) from executorch.backends.arm.test import common, conftest from executorch.backends.arm.test.tester.arm_tester import ArmTester +from executorch.backends.arm.tosa_specification import TosaSpecification from executorch.backends.xnnpack.test.tester.tester import Quantize from executorch.exir.backend.backend_details import CompileSpec from parameterized import parameterized @@ -73,14 +74,14 @@ def _test_avgpool2d_tosa_MI_pipeline( def _test_avgpool2d_tosa_BI_pipeline( self, module: torch.nn.Module, test_data: Tuple[torch.tensor] ): - quantizer = ArmQuantizer().set_io(get_symmetric_quantization_config()) + tosa_spec = TosaSpecification.create_from_string("TOSA-0.80+BI") + compile_spec = common.get_tosa_compile_spec(tosa_spec) + quantizer = ArmQuantizer(tosa_spec).set_io(get_symmetric_quantization_config()) ( ArmTester( module, example_inputs=test_data, - compile_spec=common.get_tosa_compile_spec( - "TOSA-0.80+BI", - ), + compile_spec=compile_spec, ) .quantize(Quantize(quantizer, get_symmetric_quantization_config())) .export() @@ -100,7 +101,8 @@ def _test_avgpool2d_tosa_ethos_BI_pipeline( compile_spec: CompileSpec, test_data: Tuple[torch.tensor], ): - quantizer = ArmQuantizer().set_io(get_symmetric_quantization_config()) + tosa_spec = TosaSpecification.create_from_compilespecs(compile_spec) + quantizer = ArmQuantizer(tosa_spec).set_io(get_symmetric_quantization_config()) tester = ( ArmTester( module, diff --git a/backends/arm/test/ops/test_clone.py b/backends/arm/test/ops/test_clone.py index 300ebb6f37d..1d46173a689 100644 --- a/backends/arm/test/ops/test_clone.py +++ b/backends/arm/test/ops/test_clone.py @@ -1,4 +1,4 @@ -# Copyright 2024 Arm Limited and/or its affiliates. +# Copyright 2024-2025 Arm Limited and/or its affiliates. # All rights reserved. # # This source code is licensed under the BSD-style license found in the @@ -19,6 +19,7 @@ ) from executorch.backends.arm.test import common from executorch.backends.arm.test.tester.arm_tester import ArmTester +from executorch.backends.arm.tosa_specification import TosaSpecification from executorch.backends.xnnpack.test.tester.tester import Quantize @@ -60,13 +61,11 @@ def _test_clone_tosa_MI_pipeline( def _test_clone_tosa_BI_pipeline( self, module: torch.nn.Module, test_data: Tuple[torch.Tensor] ): - quantizer = ArmQuantizer().set_io(get_symmetric_quantization_config()) + tosa_spec = TosaSpecification.create_from_string("TOSA-0.80+BI") + compile_spec = common.get_tosa_compile_spec(tosa_spec) + quantizer = ArmQuantizer(tosa_spec).set_io(get_symmetric_quantization_config()) ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=common.get_tosa_compile_spec("TOSA-0.80+BI"), - ) + ArmTester(module, example_inputs=test_data, compile_spec=compile_spec) .quantize(Quantize(quantizer, get_symmetric_quantization_config())) .export() .check_count({"torch.ops.aten.clone.default": 1}) diff --git a/backends/arm/test/ops/test_expand.py b/backends/arm/test/ops/test_expand.py index 915b1fe7e00..116f5d64e87 100644 --- a/backends/arm/test/ops/test_expand.py +++ b/backends/arm/test/ops/test_expand.py @@ -1,4 +1,4 @@ -# Copyright 2024 Arm Limited and/or its affiliates. +# Copyright 2024-2025 Arm Limited and/or its affiliates. # All rights reserved. # # This source code is licensed under the BSD-style license found in the @@ -22,6 +22,7 @@ ) from executorch.backends.arm.test import common, conftest from executorch.backends.arm.test.tester.arm_tester import ArmTester +from executorch.backends.arm.tosa_specification import TosaSpecification from executorch.backends.xnnpack.test.tester.tester import Quantize from executorch.exir.backend.backend_details import CompileSpec @@ -64,13 +65,11 @@ def _test_expand_tosa_MI_pipeline(self, module: torch.nn.Module, test_data: Tupl ) def _test_expand_tosa_BI_pipeline(self, module: torch.nn.Module, test_data: Tuple): - quantizer = ArmQuantizer().set_io(get_symmetric_quantization_config()) + tosa_spec = TosaSpecification.create_from_string("TOSA-0.80+BI") + compile_spec = common.get_tosa_compile_spec(tosa_spec) + quantizer = ArmQuantizer(tosa_spec).set_io(get_symmetric_quantization_config()) ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=common.get_tosa_compile_spec("TOSA-0.80+BI"), - ) + ArmTester(module, example_inputs=test_data, compile_spec=compile_spec) .quantize(Quantize(quantizer, get_symmetric_quantization_config())) .export() .check_count({"torch.ops.aten.expand.default": 1}) @@ -85,7 +84,8 @@ def _test_expand_tosa_BI_pipeline(self, module: torch.nn.Module, test_data: Tupl def _test_expand_ethosu_BI_pipeline( self, compile_spec: CompileSpec, module: torch.nn.Module, test_data: Tuple ): - quantizer = ArmQuantizer().set_io(get_symmetric_quantization_config()) + tosa_spec = TosaSpecification.create_from_compilespecs(compile_spec) + quantizer = ArmQuantizer(tosa_spec).set_io(get_symmetric_quantization_config()) tester = ( ArmTester( module, diff --git a/backends/arm/test/ops/test_hardtanh.py b/backends/arm/test/ops/test_hardtanh.py index 7125920c8c9..cf0a49827a8 100644 --- a/backends/arm/test/ops/test_hardtanh.py +++ b/backends/arm/test/ops/test_hardtanh.py @@ -1,5 +1,5 @@ # Copyright (c) Meta Platforms, Inc. and affiliates. -# Copyright 2024 Arm Limited and/or its affiliates. +# Copyright 2024-2025 Arm Limited and/or its affiliates. # All rights reserved. # # This source code is licensed under the BSD-style license found in the @@ -17,9 +17,10 @@ ArmQuantizer, get_symmetric_quantization_config, ) - from executorch.backends.arm.test import common, conftest from executorch.backends.arm.test.tester.arm_tester import ArmTester + +from executorch.backends.arm.tosa_specification import TosaSpecification from executorch.backends.xnnpack.test.tester.tester import Quantize from parameterized import parameterized @@ -71,13 +72,11 @@ def _test_hardtanh_tosa_MI_pipeline( def _test_hardtanh_tosa_BI_pipeline( self, module: torch.nn.Module, test_data: Tuple[torch.tensor] ): - quantizer = ArmQuantizer().set_io(get_symmetric_quantization_config()) + tosa_spec = TosaSpecification.create_from_string("TOSA-0.80+BI") + compile_spec = common.get_tosa_compile_spec(tosa_spec) + quantizer = ArmQuantizer(tosa_spec).set_io(get_symmetric_quantization_config()) ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=common.get_tosa_compile_spec("TOSA-0.80+BI"), - ) + ArmTester(module, example_inputs=test_data, compile_spec=compile_spec) .quantize(Quantize(quantizer, get_symmetric_quantization_config())) .export() .check_count({"torch.ops.aten.hardtanh.default": 1}) @@ -93,7 +92,8 @@ def _test_hardtanh_tosa_BI_pipeline( def _test_hardtanh_tosa_ethosu_BI_pipeline( self, compile_spec, module: torch.nn.Module, test_data: Tuple[torch.tensor] ): - quantizer = ArmQuantizer().set_io(get_symmetric_quantization_config()) + tosa_spec = TosaSpecification.create_from_compilespecs(compile_spec) + quantizer = ArmQuantizer(tosa_spec).set_io(get_symmetric_quantization_config()) tester = ( ArmTester( module, diff --git a/backends/arm/test/ops/test_max_pool.py b/backends/arm/test/ops/test_max_pool.py index 81f27beab45..e3502baf2c7 100644 --- a/backends/arm/test/ops/test_max_pool.py +++ b/backends/arm/test/ops/test_max_pool.py @@ -19,6 +19,7 @@ ) from executorch.backends.arm.test import common, conftest from executorch.backends.arm.test.tester.arm_tester import ArmTester +from executorch.backends.arm.tosa_specification import TosaSpecification from executorch.backends.xnnpack.test.tester.tester import Quantize from executorch.exir.backend.backend_details import CompileSpec @@ -86,15 +87,11 @@ def _test_maxpool2d_tosa_MI_pipeline( def _test_maxpool2d_tosa_BI_pipeline( self, module: torch.nn.Module, test_data: Tuple[torch.tensor] ): - quantizer = ArmQuantizer().set_io(get_symmetric_quantization_config()) + tosa_spec = TosaSpecification.create_from_string("TOSA-0.80+BI") + compile_spec = common.get_tosa_compile_spec(tosa_spec) + quantizer = ArmQuantizer(tosa_spec).set_io(get_symmetric_quantization_config()) ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=common.get_tosa_compile_spec( - "TOSA-0.80+BI", - ), - ) + ArmTester(module, example_inputs=test_data, compile_spec=compile_spec) .quantize(Quantize(quantizer, get_symmetric_quantization_config())) .export() .check_count({"torch.ops.aten.max_pool2d.default": 1}) @@ -118,7 +115,8 @@ def _test_maxpool2d_tosa_ethos_BI_pipeline( compile_spec: CompileSpec, test_data: Tuple[torch.tensor], ): - quantizer = ArmQuantizer().set_io(get_symmetric_quantization_config()) + tosa_spec = TosaSpecification.create_from_compilespecs(compile_spec) + quantizer = ArmQuantizer(tosa_spec).set_io(get_symmetric_quantization_config()) tester = ( ArmTester( module, diff --git a/backends/arm/test/ops/test_permute.py b/backends/arm/test/ops/test_permute.py index ec7ecaa81b3..f0bfe23cffa 100644 --- a/backends/arm/test/ops/test_permute.py +++ b/backends/arm/test/ops/test_permute.py @@ -17,9 +17,9 @@ ArmQuantizer, get_symmetric_quantization_config, ) - from executorch.backends.arm.test import common, conftest from executorch.backends.arm.test.tester.arm_tester import ArmTester +from executorch.backends.arm.tosa_specification import TosaSpecification from executorch.backends.xnnpack.test.tester.tester import Quantize from executorch.exir.backend.compile_spec_schema import CompileSpec from parameterized import parameterized @@ -74,13 +74,11 @@ def _test_permute_tosa_MI_pipeline( def _test_permute_tosa_BI_pipeline( self, module: torch.nn.Module, test_data: Tuple[torch.tensor] ): - quantizer = ArmQuantizer().set_io(get_symmetric_quantization_config()) + tosa_spec = TosaSpecification.create_from_string("TOSA-0.80+BI") + compile_spec = common.get_tosa_compile_spec(tosa_spec) + quantizer = ArmQuantizer(tosa_spec).set_io(get_symmetric_quantization_config()) ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=common.get_tosa_compile_spec("TOSA-0.80+BI"), - ) + ArmTester(module, example_inputs=test_data, compile_spec=compile_spec) .quantize(Quantize(quantizer, get_symmetric_quantization_config())) .export() .check_count({"torch.ops.aten.permute.default": 1}) @@ -99,7 +97,8 @@ def _test_permute_ethos_BI_pipeline( compile_spec: CompileSpec, test_data: Tuple[torch.Tensor], ): - quantizer = ArmQuantizer().set_io(get_symmetric_quantization_config()) + tosa_spec = TosaSpecification.create_from_compilespecs(compile_spec) + quantizer = ArmQuantizer(tosa_spec).set_io(get_symmetric_quantization_config()) tester = ( ArmTester( module, diff --git a/backends/arm/test/ops/test_relu.py b/backends/arm/test/ops/test_relu.py index 5a7bd4f5ecf..dd2bc4817e8 100644 --- a/backends/arm/test/ops/test_relu.py +++ b/backends/arm/test/ops/test_relu.py @@ -1,5 +1,5 @@ # Copyright (c) Meta Platforms, Inc. and affiliates. -# Copyright 2024 Arm Limited and/or its affiliates. +# Copyright 2024-2025 Arm Limited and/or its affiliates. # All rights reserved. # # This source code is licensed under the BSD-style license found in the @@ -16,6 +16,7 @@ ) from executorch.backends.arm.test import common from executorch.backends.arm.test.tester.arm_tester import ArmTester +from executorch.backends.arm.tosa_specification import TosaSpecification from executorch.backends.xnnpack.test.tester.tester import Quantize from executorch.exir.backend.backend_details import CompileSpec from parameterized import parameterized @@ -64,13 +65,11 @@ def _test_relu_tosa_MI_pipeline( def _test_relu_tosa_BI_pipeline( self, module: torch.nn.Module, test_data: Tuple[torch.tensor] ): - quantizer = ArmQuantizer().set_io(get_symmetric_quantization_config()) + tosa_spec = TosaSpecification.create_from_string("TOSA-0.80+BI") + compile_spec = common.get_tosa_compile_spec(tosa_spec) + quantizer = ArmQuantizer(tosa_spec).set_io(get_symmetric_quantization_config()) ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=common.get_tosa_compile_spec("TOSA-0.80+BI"), - ) + ArmTester(module, example_inputs=test_data, compile_spec=compile_spec) .quantize(Quantize(quantizer, get_symmetric_quantization_config())) .export() .check_count({"torch.ops.aten.relu.default": 1}) @@ -89,7 +88,8 @@ def _test_relu_ethosu_BI_pipeline( module: torch.nn.Module, test_data: Tuple[torch.tensor], ): - quantizer = ArmQuantizer().set_io(get_symmetric_quantization_config()) + tosa_spec = TosaSpecification.create_from_compilespecs(compile_spec) + quantizer = ArmQuantizer(tosa_spec).set_io(get_symmetric_quantization_config()) ( ArmTester( module, diff --git a/backends/arm/test/ops/test_repeat.py b/backends/arm/test/ops/test_repeat.py index bad872792be..d35f699b720 100644 --- a/backends/arm/test/ops/test_repeat.py +++ b/backends/arm/test/ops/test_repeat.py @@ -19,6 +19,7 @@ ) from executorch.backends.arm.test import common from executorch.backends.arm.test.tester.arm_tester import ArmTester +from executorch.backends.arm.tosa_specification import TosaSpecification from executorch.backends.xnnpack.test.tester.tester import Quantize from executorch.exir.backend.backend_details import CompileSpec @@ -61,13 +62,11 @@ def _test_repeat_tosa_MI_pipeline(self, module: torch.nn.Module, test_data: Tupl ) def _test_repeat_tosa_BI_pipeline(self, module: torch.nn.Module, test_data: Tuple): - quantizer = ArmQuantizer().set_io(get_symmetric_quantization_config()) + tosa_spec = TosaSpecification.create_from_string("TOSA-0.80+BI") + compile_spec = common.get_tosa_compile_spec(tosa_spec) + quantizer = ArmQuantizer(tosa_spec).set_io(get_symmetric_quantization_config()) ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=common.get_tosa_compile_spec("TOSA-0.80+BI"), - ) + ArmTester(module, example_inputs=test_data, compile_spec=compile_spec) .quantize(Quantize(quantizer, get_symmetric_quantization_config())) .export() .check_count({"torch.ops.aten.repeat.default": 1}) @@ -82,7 +81,8 @@ def _test_repeat_tosa_BI_pipeline(self, module: torch.nn.Module, test_data: Tupl def _test_repeat_ethosu_pipeline( self, compile_spec: CompileSpec, module: torch.nn.Module, test_data: Tuple ): - quantizer = ArmQuantizer().set_io(get_symmetric_quantization_config()) + tosa_spec = TosaSpecification.create_from_compilespecs(compile_spec) + quantizer = ArmQuantizer(tosa_spec).set_io(get_symmetric_quantization_config()) ( ArmTester( module, diff --git a/backends/arm/test/ops/test_var.py b/backends/arm/test/ops/test_var.py index e1fed058177..fd45c2d83fc 100644 --- a/backends/arm/test/ops/test_var.py +++ b/backends/arm/test/ops/test_var.py @@ -15,9 +15,10 @@ ArmQuantizer, get_symmetric_quantization_config, ) - from executorch.backends.arm.test import common from executorch.backends.arm.test.tester.arm_tester import ArmTester + +from executorch.backends.arm.tosa_specification import TosaSpecification from executorch.backends.xnnpack.test.tester.tester import Quantize from executorch.exir.backend.backend_details import CompileSpec @@ -112,13 +113,11 @@ def _test_var_tosa_BI_pipeline( test_data: torch.Tensor, target_str: str = None, ): - quantizer = ArmQuantizer().set_io(get_symmetric_quantization_config()) + tosa_spec = TosaSpecification.create_from_string("TOSA-0.80+BI") + compile_spec = common.get_tosa_compile_spec(tosa_spec) + quantizer = ArmQuantizer(tosa_spec).set_io(get_symmetric_quantization_config()) ( - ArmTester( - module, - example_inputs=test_data, - compile_spec=common.get_tosa_compile_spec("TOSA-0.80+BI"), - ) + ArmTester(module, example_inputs=test_data, compile_spec=compile_spec) .quantize(Quantize(quantizer, get_symmetric_quantization_config())) .export() .to_edge() @@ -135,7 +134,8 @@ def _test_var_ethosu_BI_pipeline( test_data: torch.Tensor, target_str: str = None, ): - quantizer = ArmQuantizer().set_io(get_symmetric_quantization_config()) + tosa_spec = TosaSpecification.create_from_compilespecs(compile_spec) + quantizer = ArmQuantizer(tosa_spec).set_io(get_symmetric_quantization_config()) ( ArmTester( module, diff --git a/backends/arm/test/passes/test_fold_qdq_pass.py b/backends/arm/test/passes/test_fold_qdq_pass.py index cd7cf751391..ebb96faf906 100644 --- a/backends/arm/test/passes/test_fold_qdq_pass.py +++ b/backends/arm/test/passes/test_fold_qdq_pass.py @@ -1,4 +1,4 @@ -# Copyright 2024 Arm Limited and/or its affiliates. +# Copyright 2024-2025 Arm Limited and/or its affiliates. # All rights reserved. # # This source code is licensed under the BSD-style license found in the @@ -16,8 +16,6 @@ from executorch.backends.xnnpack.test.tester.tester import RunPasses -from executorch.exir.dialects._ops import ops as exir_ops - class SimpleQuantizeModel(torch.nn.Module): def forward(self, x, y): @@ -27,16 +25,6 @@ def get_inputs(self): return (torch.rand(1, 1280, 7, 7), torch.rand(1, 1280, 7, 7)) -class FoldAndAnnotateQParamsPassTestClass(FoldAndAnnotateQParamsPass): - def __init__(self): - super(FoldAndAnnotateQParamsPassTestClass, self).__init__( - [ - exir_ops.edge.aten.add.Tensor, - exir_ops.edge.aten.maximum.default, - ] - ) - - class TestFoldAndAnnotateQParamsPass(unittest.TestCase): """ Tests the FoldAndAnnotateQParamsPass which folds dq/q nodes into @@ -49,7 +37,7 @@ def test_fold_qdq_pass(self): is removed from the representation. """ module = SimpleQuantizeModel() - test_pass_stage = RunPasses([FoldAndAnnotateQParamsPassTestClass]) + test_pass_stage = RunPasses([FoldAndAnnotateQParamsPass]) ( ArmTester( module, diff --git a/backends/arm/test/passes/test_meandim_to_averagepool2d.py b/backends/arm/test/passes/test_meandim_to_averagepool2d.py index 93badc64356..e07e91ed727 100644 --- a/backends/arm/test/passes/test_meandim_to_averagepool2d.py +++ b/backends/arm/test/passes/test_meandim_to_averagepool2d.py @@ -1,4 +1,4 @@ -# Copyright 2024 Arm Limited and/or its affiliates. +# Copyright 2024-2025 Arm Limited and/or its affiliates. # All rights reserved. # # This source code is licensed under the BSD-style license found in the @@ -8,7 +8,7 @@ import torch from executorch.backends.arm._passes.meandim_to_averagepool_pass import ( - ConvertMeanDimToAveragePool, + ConvertMeanDimToAveragePoolPass, ) from executorch.backends.arm.test import common @@ -41,7 +41,7 @@ class TestMeandimToAveragePool2dPass(unittest.TestCase): def test_tosa_BI_meandim_to_averagepool(self): module = MeanDim() - test_pass_stage = RunPasses([ConvertMeanDimToAveragePool]) + test_pass_stage = RunPasses([ConvertMeanDimToAveragePoolPass]) ( ArmTester( module, @@ -58,7 +58,7 @@ def test_tosa_BI_meandim_to_averagepool(self): def test_tosa_BI_meandim_no_modification(self): module = MeanDim2() - test_pass_stage = RunPasses([ConvertMeanDimToAveragePool]) + test_pass_stage = RunPasses([ConvertMeanDimToAveragePoolPass]) ( ArmTester( module, diff --git a/backends/arm/test/tester/arm_tester.py b/backends/arm/test/tester/arm_tester.py index abb192e3089..e5c700ec3c6 100644 --- a/backends/arm/test/tester/arm_tester.py +++ b/backends/arm/test/tester/arm_tester.py @@ -33,6 +33,7 @@ print_error_diffs, ) from executorch.backends.arm.tosa_mapping import extract_tensor_meta +from executorch.backends.arm.tosa_specification import TosaSpecification from executorch.backends.xnnpack.test.tester import Tester from executorch.devtools.backend_debug import get_delegation_info @@ -184,8 +185,11 @@ def __init__( def quantize(self, quantize_stage: Optional[tester.Quantize] = None): if quantize_stage is None: + tosa_spec: TosaSpecification = TosaSpecification.create_from_compilespecs( + compile_specs=self.compile_spec + ) quantize_stage = tester.Quantize( - ArmQuantizer(), + ArmQuantizer(tosa_spec), get_symmetric_quantization_config(is_per_channel=False), ) return super().quantize(quantize_stage) diff --git a/examples/arm/aot_arm_compiler.py b/examples/arm/aot_arm_compiler.py index 1208d79b061..bf7bbd87efd 100644 --- a/examples/arm/aot_arm_compiler.py +++ b/examples/arm/aot_arm_compiler.py @@ -16,12 +16,13 @@ from typing import Any, Dict, Optional, Tuple import torch -from executorch.backends.arm.arm_backend import ArmCompileSpecBuilder +from executorch.backends.arm.arm_backend import ArmCompileSpecBuilder, CompileSpec from executorch.backends.arm.arm_partitioner import ArmPartitioner from executorch.backends.arm.quantizer.arm_quantizer import ( ArmQuantizer, get_symmetric_quantization_config, ) +from executorch.backends.arm.tosa_specification import TosaSpecification from executorch.backends.arm.util.arm_model_evaluator import ( GenericModelEvaluator, @@ -88,6 +89,7 @@ def get_model_and_inputs_from_name(model_name: str) -> Tuple[torch.nn.Module, An def quantize( model: torch.nn.Module, model_name: str, + tosa_spec: TosaSpecification, example_inputs: Tuple[torch.Tensor], evaluator_name: str | None, evaluator_config: Dict[str, Any] | None, @@ -95,7 +97,7 @@ def quantize( """This is the official recommended flow for quantization in pytorch 2.0 export""" logging.info("Quantizing Model...") logging.debug(f"Original model: {model}") - quantizer = ArmQuantizer() + quantizer = ArmQuantizer(tosa_spec) # if we set is_per_channel to True, we also need to add out_variant of quantize_per_channel/dequantize_per_channel operator_config = get_symmetric_quantization_config(is_per_channel=False) @@ -260,7 +262,7 @@ def get_compile_spec( reorder_inputs: Optional[str] = None, system_config: Optional[str] = None, memory_mode: Optional[str] = None, -) -> ArmCompileSpecBuilder: +) -> list[CompileSpec]: spec_builder = None if target == "TOSA": spec_builder = ArmCompileSpecBuilder().tosa_compile_spec("TOSA-0.80+BI") @@ -513,17 +515,6 @@ def get_args(): # Quantize if required model_int8 = None - if args.quantize: - model = quantize( - model, args.model_name, example_inputs, args.evaluate, args.evaluate_config - ) - model_int8 = model - # Wrap quantized model back into an exported_program - exported_program = torch.export.export_for_training(model, example_inputs) - - if args.intermediates: - os.makedirs(args.intermediates, exist_ok=True) - if args.delegate: # As we can target multiple output encodings from ArmBackend, one must # be specified. @@ -534,6 +525,23 @@ def get_args(): args.system_config, args.memory_mode, ) + if args.quantize: + tosa_spec = TosaSpecification.create_from_compilespecs(compile_spec) + model = quantize( + model, + args.model_name, + tosa_spec, + example_inputs, + args.evaluate, + args.evaluate_config, + ) + model_int8 = model + # Wrap quantized model back into an exported_program + exported_program = torch.export.export_for_training(model, example_inputs) + + if args.intermediates: + os.makedirs(args.intermediates, exist_ok=True) + edge = to_edge_transform_and_lower( exported_program, partitioner=[ArmPartitioner(compile_spec)], @@ -542,7 +550,25 @@ def get_args(): _skip_dim_order=True, ), ) + else: + if args.quantize: + tosa_spec = TosaSpecification.create_from_string("TOSA-0.80.0+BI") + model = quantize( + model, + args.model_name, + tosa_spec, + example_inputs, + args.evaluate, + args.evaluate_config, + ) + model_int8 = model + # Wrap quantized model back into an exported_program + exported_program = torch.export.export_for_training(model, example_inputs) + + if args.intermediates: + os.makedirs(args.intermediates, exist_ok=True) + edge = to_edge_transform_and_lower( exported_program, compile_config=EdgeCompileConfig(