diff --git a/backends/arm/quantizer/arm_quantizer.py b/backends/arm/quantizer/arm_quantizer.py index 6a68eb2eb99..e61fbc5bbee 100644 --- a/backends/arm/quantizer/arm_quantizer.py +++ b/backends/arm/quantizer/arm_quantizer.py @@ -268,7 +268,6 @@ class ArmQuantizer(Quantizer): "sub", "mul", "mm", - "cat", "one_to_one", "generic", "sum", diff --git a/backends/arm/quantizer/arm_quantizer_utils.py b/backends/arm/quantizer/arm_quantizer_utils.py index 4a910611bcb..a1d7bfe296d 100644 --- a/backends/arm/quantizer/arm_quantizer_utils.py +++ b/backends/arm/quantizer/arm_quantizer_utils.py @@ -144,21 +144,10 @@ def is_share_obs_or_fq_op(op: Callable) -> bool: torch.ops.aten.mean.dim, torch.ops.aten.permute.default, torch.ops.aten.permute_copy.default, - torch.ops.aten.squeeze.dim, - torch.ops.aten.squeeze.dims, - torch.ops.aten.squeeze.default, - torch.ops.aten.squeeze_copy.dim, - torch.ops.aten.unsqueeze.default, - torch.ops.aten.unsqueeze_copy.default, # TODO: remove? torch.ops.aten.adaptive_avg_pool2d.default, torch.ops.aten.avg_pool2d.default, - torch.ops.aten.view_copy.default, - torch.ops.aten.view.default, torch.ops.aten.full.default, - torch.ops.aten.slice.Tensor, - torch.ops.aten.split.Tensor, - torch.ops.aten.split_with_sizes.default, torch.ops.aten.flatten.using_ints, torch.ops.aten.dropout.default, operator.getitem, diff --git a/backends/arm/quantizer/quantization_annotation/__init__.py b/backends/arm/quantizer/quantization_annotation/__init__.py index bc3184298f3..7eaa837c5bd 100644 --- a/backends/arm/quantizer/quantization_annotation/__init__.py +++ b/backends/arm/quantizer/quantization_annotation/__init__.py @@ -51,7 +51,6 @@ def decorator(annotator: AnnotatorType): from . import ( # noqa adaptive_ang_pool2d_annotator, add_annotator, - cat_annotator, conv_annotator, generic_annotator, linear_annotator, diff --git a/backends/arm/quantizer/quantization_annotation/cat_annotator.py b/backends/arm/quantizer/quantization_annotation/cat_annotator.py deleted file mode 100644 index 6e138cd9def..00000000000 --- a/backends/arm/quantizer/quantization_annotation/cat_annotator.py +++ /dev/null @@ -1,68 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# Copyright 2024 Arm Limited and/or its affiliates. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -# pyre-unsafe - -import itertools -from typing import Callable, cast, List, Optional - -import torch.fx -from executorch.backends.arm.quantizer import arm_quantizer_utils -from executorch.backends.arm.quantizer.quantization_annotation import register_annotator -from executorch.backends.arm.quantizer.quantization_config import QuantizationConfig -from torch.ao.quantization.quantizer import ( - QuantizationAnnotation, - SharedQuantizationSpec, -) -from torch.fx import Node -from torch.fx.passes.utils.source_matcher_utils import get_source_partitions - - -@register_annotator("cat") -def _annotate_cat( - gm: torch.fx.GraphModule, - quantization_config: QuantizationConfig, - filter_fn: Optional[Callable[[Node], bool]] = None, -) -> Optional[List[List[Node]]]: - cat_partitions = get_source_partitions(gm.graph, [torch.cat], filter_fn) - cat_partitions = list(itertools.chain.from_iterable(cat_partitions.values())) - annotated_partitions = [] - for cat_partition in cat_partitions: - annotated_partitions.append(cat_partition.nodes) - cat_node = cat_partition.output_nodes[0] - if arm_quantizer_utils.is_annotated(cat_node): - continue - - input_acts = cast(list[torch.fx.Node], cat_node.args[0]) - input_act0 = input_acts[0] - - input_act_qspec = quantization_config.get_input_act_qspec() - shared_with_input0_qspec = SharedQuantizationSpec((input_act0, cat_node)) - - input_qspec_map = {} - - # First input is set to input qspec from the quantization config. - if isinstance(input_act0, Node): - if not arm_quantizer_utils.is_input_ok_for_quantization(input_act0, gm): - continue - input_qspec_map[input_act0] = input_act_qspec - - # For the rest of the inputs, share qspec with first. - # If we can't quantize any of the inputs, abort annotation. - for input_act in input_acts[1:]: - if isinstance(input_act, Node): - if not arm_quantizer_utils.is_input_ok_for_quantization(input_act, gm): - continue - if input_act is not input_act0: - input_qspec_map[input_act] = shared_with_input0_qspec - - if input_qspec_map is not None: - cat_node.meta["quantization_annotation"] = QuantizationAnnotation( - input_qspec_map=input_qspec_map, - output_qspec=shared_with_input0_qspec, - _annotated=True, - ) - return annotated_partitions diff --git a/backends/arm/quantizer/quantization_annotation/generic_annotator.py b/backends/arm/quantizer/quantization_annotation/generic_annotator.py index f91df1398e8..126051f158f 100644 --- a/backends/arm/quantizer/quantization_annotation/generic_annotator.py +++ b/backends/arm/quantizer/quantization_annotation/generic_annotator.py @@ -4,7 +4,6 @@ # LICENSE file in the root directory of this source tree. # pyre-unsafe - from typing import Callable, List, Optional import torch @@ -24,6 +23,9 @@ # DATA LAYOUT OPS torch.ops.aten.squeeze.default, torch.ops.aten.squeeze_copy.default, + torch.ops.aten.squeeze_copy.dim, + torch.ops.aten.squeeze.dim, + torch.ops.aten.squeeze.dims, torch.ops.aten.unsqueeze.default, torch.ops.aten.unsqueeze_copy.default, torch.ops.aten.reshape.default, @@ -33,19 +35,21 @@ # torch.ops.aten.view_as_complex_copy.default, # torch.ops.aten.view_as_real.default, # torch.ops.aten.view_as_real_copy.default, + torch.ops.aten.view.default, torch.ops.aten.view_copy.default, torch.ops.aten.select.int, torch.ops.aten.select_copy.int, torch.ops.aten.slice.Tensor, torch.ops.aten.slice_copy.Tensor, - # 'concat' should be handled separately as it has a sequence of inputs and - # makes the implementation unnecessary complicated. - # torch.ops.aten.concat.default, + torch.ops.aten.split.Tensor, + torch.ops.aten.split_with_sizes.default, torch.ops.aten.transpose.Dimname, torch.ops.aten.transpose.int, torch.ops.aten.transpose_copy.int, torch.ops.aten.tile.default, torch.ops.aten.flip.default, + torch.ops.aten.cat.default, + torch.ops.aten.stack.default, ] @@ -66,15 +70,31 @@ def _annotate_generic( if arm_quantizer_utils.is_annotated(node): continue - input_node = node.args[0] + input_acts = node.args[0] + + # Check to see if there are multiple inputs. + # this allows for stack/cat ops to be annotated + # in a similar way. + has_multi_inputs = isinstance(input_acts, list) + + input_act0 = input_acts[0] if has_multi_inputs else input_acts # Using a non-shared quantization spec here as a SharedQuantizationSpec # can lead to a recursion. _annotate_input_qspec_map( - node, input_node, quantization_config.get_input_act_qspec() + node, input_act0, quantization_config.get_input_act_qspec() ) - _annotate_output_qspec(node, SharedQuantizationSpec((input_node, node))) + shared_with_input0_qspec = SharedQuantizationSpec((input_act0, node)) + + if has_multi_inputs: + # For the rest of the inputs, share qspec with first. + for input_act in input_acts[1:]: + if input_act is not input_act0: + node.meta["quantization_annotation"].input_qspec_map[ + input_act + ] = shared_with_input0_qspec + _annotate_output_qspec(node, shared_with_input0_qspec) arm_quantizer_utils.mark_nodes_as_annotated([node]) annotated_partitions.append([node]) diff --git a/backends/arm/test/ops/test_slice.py b/backends/arm/test/ops/test_slice.py index 0bab21f907b..18db358fdf4 100644 --- a/backends/arm/test/ops/test_slice.py +++ b/backends/arm/test/ops/test_slice.py @@ -8,13 +8,9 @@ from typing import Tuple import torch -from executorch.backends.arm.quantizer.arm_quantizer import ( - ArmQuantizer, - get_symmetric_quantization_config, -) + from executorch.backends.arm.test import common from executorch.backends.arm.test.tester.arm_tester import ArmTester -from executorch.backends.xnnpack.test.tester.tester import Quantize from executorch.exir.backend.compile_spec_schema import CompileSpec from parameterized import parameterized @@ -59,7 +55,6 @@ def _test_slice_tosa_BI_pipeline( self, module: torch.nn.Module, test_data: Tuple[torch.Tensor], permute: bool ): - quantizer = ArmQuantizer().set_io(get_symmetric_quantization_config()) ( ArmTester( module, @@ -68,7 +63,7 @@ def _test_slice_tosa_BI_pipeline( permute_memory_to_nhwc=permute ), ) - .quantize(Quantize(quantizer, get_symmetric_quantization_config())) + .quantize() .export() .check(["torch.ops.aten.slice.Tensor"]) .to_edge() @@ -84,14 +79,13 @@ def _test_slice_ethos_BI_pipeline( module: torch.nn.Module, test_data: Tuple[torch.Tensor], ): - quantizer = ArmQuantizer().set_io(get_symmetric_quantization_config()) ( ArmTester( module, example_inputs=test_data, compile_spec=common.get_u55_compile_spec(), ) - .quantize(Quantize(quantizer, get_symmetric_quantization_config())) + .quantize() .export() .check(["torch.ops.aten.slice.Tensor"]) .to_edge() diff --git a/backends/arm/test/ops/test_split.py b/backends/arm/test/ops/test_split.py index 3f6edc0c2b8..8ed0e723f18 100644 --- a/backends/arm/test/ops/test_split.py +++ b/backends/arm/test/ops/test_split.py @@ -7,13 +7,9 @@ import unittest import torch -from executorch.backends.arm.quantizer.arm_quantizer import ( - ArmQuantizer, - get_symmetric_quantization_config, -) + from executorch.backends.arm.test import common from executorch.backends.arm.test.tester.arm_tester import ArmTester -from executorch.backends.xnnpack.test.tester.tester import Quantize from executorch.exir.backend.compile_spec_schema import CompileSpec from parameterized import parameterized @@ -79,14 +75,13 @@ def _test_split_tosa_BI_pipeline( self, module: torch.nn.Module, test_data: test_data_t ): - quantizer = ArmQuantizer().set_io(get_symmetric_quantization_config()) ( ArmTester( module, example_inputs=test_data, compile_spec=common.get_tosa_compile_spec(), ) - .quantize(Quantize(quantizer, get_symmetric_quantization_config())) + .quantize() .export() .to_edge() .partition() @@ -98,14 +93,13 @@ def _test_split_tosa_BI_pipeline( def _test_split_ethosu_BI_pipeline( self, compile_spec: CompileSpec, module: torch.nn.Module, test_data: test_data_t ): - quantizer = ArmQuantizer().set_io(get_symmetric_quantization_config()) ( ArmTester( module, example_inputs=test_data, compile_spec=compile_spec, ) - .quantize(Quantize(quantizer, get_symmetric_quantization_config())) + .quantize() .export() .check(["torch.ops.aten.split.Tensor"]) .to_edge() diff --git a/backends/arm/test/ops/test_squeeze.py b/backends/arm/test/ops/test_squeeze.py index c9d7d421956..c3f1edf37be 100644 --- a/backends/arm/test/ops/test_squeeze.py +++ b/backends/arm/test/ops/test_squeeze.py @@ -13,14 +13,9 @@ import torch -from executorch.backends.arm.quantizer.arm_quantizer import ( - ArmQuantizer, - get_symmetric_quantization_config, -) from executorch.backends.arm.test import common from executorch.backends.arm.test.tester.arm_tester import ArmTester -from executorch.backends.xnnpack.test.tester.tester import Quantize from executorch.exir.backend.compile_spec_schema import CompileSpec from parameterized import parameterized @@ -83,14 +78,13 @@ def _test_squeeze_tosa_BI_pipeline( test_data: Tuple[torch.Tensor, Optional[tuple[int]]], export_target: str, ): - quantizer = ArmQuantizer().set_io(get_symmetric_quantization_config()) ( ArmTester( module, example_inputs=test_data, compile_spec=common.get_tosa_compile_spec(), ) - .quantize(Quantize(quantizer, get_symmetric_quantization_config())) + .quantize() .export() .check_count({export_target: 1}) .to_edge() @@ -107,10 +101,9 @@ def _test_squeeze_ethosu_BI_pipeline( test_data: Tuple[torch.Tensor, Optional[tuple[int]]], export_target: str, ): - quantizer = ArmQuantizer().set_io(get_symmetric_quantization_config()) ( ArmTester(module, example_inputs=test_data, compile_spec=compile_spec) - .quantize(Quantize(quantizer, get_symmetric_quantization_config())) + .quantize() .export() .check_count({export_target: 1}) .to_edge() diff --git a/backends/arm/test/ops/test_unsqueeze.py b/backends/arm/test/ops/test_unsqueeze.py index 1cc597c0661..36bb93b7960 100644 --- a/backends/arm/test/ops/test_unsqueeze.py +++ b/backends/arm/test/ops/test_unsqueeze.py @@ -13,14 +13,9 @@ import torch -from executorch.backends.arm.quantizer.arm_quantizer import ( - ArmQuantizer, - get_symmetric_quantization_config, -) from executorch.backends.arm.test import common from executorch.backends.arm.test.tester.arm_tester import ArmTester -from executorch.backends.xnnpack.test.tester.tester import Quantize from executorch.exir.backend.compile_spec_schema import CompileSpec from parameterized import parameterized @@ -54,14 +49,13 @@ def _test_unsqueeze_tosa_MI_pipeline( def _test_unsqueeze_tosa_BI_pipeline( self, module: torch.nn.Module, test_data: Tuple[torch.Tensor, int] ): - quantizer = ArmQuantizer().set_io(get_symmetric_quantization_config()) ( ArmTester( module, example_inputs=test_data, compile_spec=common.get_tosa_compile_spec(), ) - .quantize(Quantize(quantizer, get_symmetric_quantization_config())) + .quantize() .export() .check_count({"torch.ops.aten.unsqueeze.default": 1}) .to_edge() @@ -77,14 +71,13 @@ def _test_unsqueeze_ethosu_BI_pipeline( module: torch.nn.Module, test_data: Tuple[torch.Tensor, int], ): - quantizer = ArmQuantizer().set_io(get_symmetric_quantization_config()) ( ArmTester( module, example_inputs=test_data, compile_spec=compile_spec, ) - .quantize(Quantize(quantizer, get_symmetric_quantization_config())) + .quantize() .export() .check_count({"torch.ops.aten.unsqueeze.default": 1}) .to_edge() diff --git a/backends/arm/test/ops/test_view.py b/backends/arm/test/ops/test_view.py index fe1f2981da9..54e80702e39 100644 --- a/backends/arm/test/ops/test_view.py +++ b/backends/arm/test/ops/test_view.py @@ -13,14 +13,9 @@ import torch -from executorch.backends.arm.quantizer.arm_quantizer import ( - ArmQuantizer, - get_symmetric_quantization_config, -) from executorch.backends.arm.test import common from executorch.backends.arm.test.tester.arm_tester import ArmTester -from executorch.backends.xnnpack.test.tester.tester import Quantize from executorch.exir.backend.compile_spec_schema import CompileSpec from parameterized import parameterized @@ -74,14 +69,13 @@ def _test_view_tosa_MI_pipeline( def _test_view_tosa_BI_pipeline( self, module: torch.nn.Module, test_data: Tuple[torch.Tensor] ): - quantizer = ArmQuantizer().set_io(get_symmetric_quantization_config()) ( ArmTester( module, example_inputs=test_data, compile_spec=common.get_tosa_compile_spec(), ) - .quantize(Quantize(quantizer, get_symmetric_quantization_config())) + .quantize() .export() .check_count({"torch.ops.aten.view.default": 1}) .to_edge() @@ -97,10 +91,13 @@ def _test_view_ethos_BI_pipeline( module: torch.nn.Module, test_data: Tuple[torch.Tensor], ): - quantizer = ArmQuantizer().set_io(get_symmetric_quantization_config()) ( - ArmTester(module, example_inputs=test_data, compile_spec=compile_spec) - .quantize(Quantize(quantizer, get_symmetric_quantization_config())) + ArmTester( + module, + example_inputs=test_data, + compile_spec=compile_spec, + ) + .quantize() .export() .check_count({"torch.ops.aten.view.default": 1}) .to_edge()