diff --git a/backends/arm/_passes/__init__.py b/backends/arm/_passes/__init__.py index 37d3e4278df..e4a1526f573 100644 --- a/backends/arm/_passes/__init__.py +++ b/backends/arm/_passes/__init__.py @@ -24,6 +24,7 @@ from .decompose_gelu_pass import DecomposeGeluPass # noqa from .decompose_layernorm_pass import DecomposeLayerNormPass # noqa from .decompose_leaky_relu_pass import DecomposeLeakyReLUPass # noqa +from .decompose_linalg_vector_norm_pass import DecomposeLinearVectorNormPass # noqa from .decompose_linear_pass import DecomposeLinearPass # noqa from .decompose_meandim_pass import DecomposeMeanDimPass # noqa from .decompose_ne_pass import DecomposeNotEqualPass # noqa diff --git a/backends/arm/_passes/arm_pass_manager.py b/backends/arm/_passes/arm_pass_manager.py index 4123d217e94..5f79757f212 100644 --- a/backends/arm/_passes/arm_pass_manager.py +++ b/backends/arm/_passes/arm_pass_manager.py @@ -29,6 +29,7 @@ DecomposeLayerNormPass, DecomposeLeakyReLUPass, DecomposeLinearPass, + DecomposeLinearVectorNormPass, DecomposeMeanDimPass, DecomposeNotEqualPass, DecomposeSelectPass, @@ -86,6 +87,7 @@ def _tosa_080_BI_pipeline(self, exported_program: ExportedProgram) -> GraphModul self.add_pass(ConvertSplitToSlicePass()) self.add_pass(ConvertMmToBmmPass()) self.add_pass(DecomposeLinearPass()) + self.add_pass(DecomposeLinearVectorNormPass()) self.add_pass(DecomposeMeanDimPass()) self.add_pass(ConvertFullLikeToFullPass()) self.add_pass(ConvertToClampPass()) @@ -133,6 +135,7 @@ def _tosa_080_MI_pipeline(self, exported_program: ExportedProgram) -> GraphModul self.add_pass(FuseBatchnorm2DPass(exported_program)) self.add_pass(ConvertMmToBmmPass()) self.add_pass(DecomposeLinearPass()) + self.add_pass(DecomposeLinearVectorNormPass()) self.add_pass(DecomposeLeakyReLUPass()) self.add_pass(DecomposeBatchNormPass()) self.add_pass(DecomposeLayerNormPass()) @@ -207,6 +210,7 @@ def transform_for_annotation_pipeline(self, graph_module: GraphModule): self.add_pass(DecomposeCosineSimilarityPass()) self.add_pass(DecomposeDivPass()) self.add_pass(DecomposeLeakyReLUPass()) + self.add_pass(DecomposeLinearVectorNormPass()) self.add_pass(DecomposeSqrtPass()) self.add_pass(DecomposeSiluPass()) diff --git a/backends/arm/_passes/decompose_linalg_vector_norm_pass.py b/backends/arm/_passes/decompose_linalg_vector_norm_pass.py new file mode 100644 index 00000000000..78cb0deae62 --- /dev/null +++ b/backends/arm/_passes/decompose_linalg_vector_norm_pass.py @@ -0,0 +1,78 @@ +# Copyright 2025 Arm Limited and/or its affiliates. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +import torch +from executorch.exir.pass_base import ExportPass + + +class DecomposeLinearVectorNormPass(ExportPass): + """ + This pass decomposes aten.linalg_vector_norm.default into more primitive ops. + We need to add this pass before quantization for graph annotation. + By default, aten.linalg_vector_norm op is decomposed during legalization to Edge IR. + + The decomposition is as follows: + + For p == 1: + out = REDUCE_SUM(ABS(x), dims, keepdim) + + For p == 2: + out = SQRT(REDUCE_SUM(MUL(x, x), dims, keepdim)) + + For arbitrary p: + We dont support arbitrary p, because our decomposition looks like + out = POW(REDUCE_SUM(POW(ABS(x), p), dims, keepdim), 1/p) + In this case we need to wrap p into Tensor and we need to know + dtype prior, but we dont know this from FX graph. + """ + + torch_linalg_vector_norm = (torch.ops.aten.linalg_vector_norm.default,) + + def call_operator(self, op, args, kwargs, meta): + if op not in self.torch_linalg_vector_norm: + return super().call_operator(op, args, kwargs, meta) + + # Extract inputs and optional arguments. + # Expected args: + # args[0]: input tensor + # args[1]: norm order 'p' (optional, default: 2.0) + # args[2]: dimensions to reduce (should be provided) + # args[3]: keepdim flag (optional, default: False) + input_tensor = args[0] + norm_order = args[1] if len(args) > 1 else 2.0 + norm_dim = args[2] if len(args) > 2 else None + keepdim = args[3] if len(args) > 3 else False + + if norm_order not in (1, 2): + raise ValueError( + f"The order of {norm_order}\n" + f"is not supported for linalg_vector_norm operator" + ) + + if norm_dim is None: + raise ValueError("The norm_dim for linalg_vector_norm is None.") + + dims = [norm_dim] if isinstance(norm_dim, int) else list(norm_dim) + + # Decomposition based on norm order. + if norm_order == 1: + op1 = super().call_operator( + torch.ops.aten.abs.default, (input_tensor,), {}, meta + ) + op2 = super().call_operator( + torch.ops.aten.sum.dim_IntList, (op1, dims, keepdim), {}, meta + ) + return op2 + + elif norm_order == 2: + # For p == 2, decomposition is sqrt(sum(x * x, dims, keepdim)) + op1 = super().call_operator( + torch.ops.aten.mul.Tensor, (input_tensor, input_tensor), {}, meta + ) + op2 = super().call_operator( + torch.ops.aten.sum.dim_IntList, (op1, dims, keepdim), {}, meta + ) + op3 = super().call_operator(torch.ops.aten.sqrt.default, (op2,), {}, meta) + return op3 diff --git a/backends/arm/scripts/parse_test_names.py b/backends/arm/scripts/parse_test_names.py index 46cf3e17a73..c50f5520b49 100644 --- a/backends/arm/scripts/parse_test_names.py +++ b/backends/arm/scripts/parse_test_names.py @@ -8,6 +8,7 @@ CUSTOM_EDGE_OPS = [ "linspace.default", "eye.default", + "vector_norm.default", "hardsigmoid.default", "hardswish.default", "linear.default", diff --git a/backends/arm/test/ops/test_linalg_vector_norm.py b/backends/arm/test/ops/test_linalg_vector_norm.py new file mode 100644 index 00000000000..36533d786dd --- /dev/null +++ b/backends/arm/test/ops/test_linalg_vector_norm.py @@ -0,0 +1,131 @@ +# Copyright 2025 Arm Limited and/or its affiliates. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +from typing import Tuple + +import torch + +from executorch.backends.arm.test import common +from executorch.backends.arm.test.tester.test_pipeline import ( + EthosU55PipelineBI, + EthosU85PipelineBI, + TosaPipelineBI, + TosaPipelineMI, +) + +input_t = Tuple[torch.Tensor] + +aten_op_q_decomposed_q = "torch.ops.quantized_decomposed.quantize_per_tensor.default" +exir_op_q_decomposed = "executorch_exir_dialects_edge__ops_quantized_decomposed_quantize_per_tensor_default" + + +class VectorNormModel(torch.nn.Module): + def __init__( + self, + ord=None, + dim=1, + keepdim=False, + ): + """ + A simple module that applies torch.linalg.vector_norm to its input. + Ord is 2 by default. + """ + super().__init__() + self.ord = ord + self.dim = dim + self.keepdim = keepdim + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if self.ord is None and self.dim is None: + return torch.linalg.vector_norm(x, keepdim=self.keepdim) + elif self.ord is None: + return torch.linalg.vector_norm(x, dim=self.dim, keepdim=self.keepdim) + elif self.dim is None: + return torch.linalg.vector_norm(x, ord=self.ord, keepdim=self.keepdim) + else: + return torch.linalg.vector_norm( + x, ord=self.ord, dim=self.dim, keepdim=self.keepdim + ) + + +test_modules = { + "default": (VectorNormModel(dim=1), (torch.rand(10, 4),)), + "ord1": (VectorNormModel(ord=1, dim=1), (torch.rand(10, 4),)), + "ord2": (VectorNormModel(ord=2, dim=1), (torch.rand(10, 20),)), + # Norm computed along a specific dimension of a 3D tensor + "dim_3d": (VectorNormModel(dim=2), (torch.rand(4, 5, 6),)), +} + + +@common.parametrize("test_module", test_modules) +def test_vector_norm_tosa_MI(test_module): + model, input_tensor = test_module + + # We decompose LinalgVectorNorm before quantize stage to have annotations + # with q/dq nodes. In case of MI, this operator will be decomposed + # by global decompositions. + aten_op = "torch.ops.aten.linalg_vector_norm.default" + # Should not found this op + exir_op = "executorch_exir_dialects_edge__ops_aten_linalg_vector_norm_default" + + pipeline = TosaPipelineMI[input_t](model, input_tensor, aten_op, exir_op) + + pipeline.change_args("run_method_and_compare_outputs", qtol=1, atol=1e-4, rtol=1e-4) + pipeline.run() + + +@common.parametrize("test_module", test_modules) +def test_vector_norm_tosa_BI(test_module): + model, input_tensor = test_module + + # Should not found this op + exir_op = "executorch_exir_dialects_edge__ops_aten_linalg_vector_norm_default" + + pipeline = TosaPipelineBI[input_t]( + model, + input_tensor, + aten_op_q_decomposed_q, + exir_op, + symmetric_io_quantization=True, + ) + pipeline.change_args("run_method_and_compare_outputs", qtol=1, atol=1, rtol=1) + pipeline.run() + + +@common.parametrize("test_module", test_modules) +@common.XfailIfNoCorstone300 +def test_vector_norm_u55_BI_fvp(test_module): + model, input_tensor = test_module + + pipeline = EthosU55PipelineBI[input_t]( + model, + input_tensor, + aten_op_q_decomposed_q, + exir_op_q_decomposed, + run_on_fvp=True, + symmetric_io_quantization=True, + ) + pipeline.change_args("run_method_and_compare_outputs", qtol=1, atol=1, rtol=1) + pipeline.pop_stage("check_not.exir") + pipeline.run() + + +@common.parametrize("test_module", test_modules) +@common.XfailIfNoCorstone300 +def test_vector_norm_u85_BI_fvp(test_module): + model, input_tensor = test_module + + # The should be decomposed and annotated in DecomposeLinalgVectorNorm pass. + pipeline = EthosU85PipelineBI[input_t]( + model, + input_tensor, + aten_op_q_decomposed_q, + exir_op_q_decomposed, + run_on_fvp=True, + symmetric_io_quantization=True, + ) + pipeline.change_args("run_method_and_compare_outputs", qtol=1, atol=1, rtol=1) + pipeline.pop_stage("check_not.exir") + pipeline.run() diff --git a/backends/arm/test/passes/test_decompose_linalg_vector_norm_pass.py b/backends/arm/test/passes/test_decompose_linalg_vector_norm_pass.py new file mode 100644 index 00000000000..de605f666ac --- /dev/null +++ b/backends/arm/test/passes/test_decompose_linalg_vector_norm_pass.py @@ -0,0 +1,91 @@ +# Copyright 2025 Arm Limited and/or its affiliates. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +from typing import Tuple + +import torch + +from executorch.backends.arm._passes.decompose_linalg_vector_norm_pass import ( + DecomposeLinearVectorNormPass, +) +from executorch.backends.arm.test import common +from executorch.backends.arm.test.tester.test_pipeline import PassPipeline + +input_t = Tuple[torch.Tensor] + + +class VectorNormModel(torch.nn.Module): + """ + A test module with torch.linalg.vector_norm. + https://pytorch.org/docs/stable/generated/torch.linalg.vector_norm.html + + We support only order 1 or 2. + """ + + def __init__(self, ord: float = None, dim=None, keepdim: bool = False): + super().__init__() + self.ord = ord + self.dim = dim + self.keepdim = keepdim + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if self.ord is None and self.dim is None: + return torch.linalg.vector_norm(x, keepdim=self.keepdim) + elif self.ord is None: + return torch.linalg.vector_norm(x, dim=self.dim, keepdim=self.keepdim) + elif self.dim is None: + return torch.linalg.vector_norm(x, ord=self.ord, keepdim=self.keepdim) + else: + return torch.linalg.vector_norm( + x, ord=self.ord, dim=self.dim, keepdim=self.keepdim + ) + + def get_inputs(self) -> input_t: + return (torch.rand(4, 4),) + + +modules = { + # Default uses p=2 (l2 vector norm) + "default_p2": VectorNormModel(dim=1), + # p = 1: L1 norm over all elements + "p1": VectorNormModel(ord=1, dim=1), +} + + +@common.parametrize("module", modules) +def test_decompose_vector_norm_tosa_BI(module): + """ + This test creates a PassPipeline that applies the DecomposeLinearVectorNormPass. + The expected primitive ops vary depending on the norm order: + - p == 1: should decompose to ABS and SUM. + - p == 2 (default): should decompose to MUL, SUM, and SQRT. + - Other p: should decompose to ABS, two instances of POW, and SUM. + """ + ord_val = module.ord if module.ord is not None else 2.0 + + if ord_val == 1: + ops_after_pass = { + "executorch_exir_dialects_edge__ops_aten_abs_default": 1, + "executorch_exir_dialects_edge__ops_aten_sum_dim_IntList": 1, + } + elif ord_val == 2: + ops_after_pass = { + "executorch_exir_dialects_edge__ops_aten_pow_Tensor_Scalar": 2, + "executorch_exir_dialects_edge__ops_aten_sum_dim_IntList": 1, + } + + pipeline = PassPipeline[input_t]( + module, + module.get_inputs(), + # The op is decomposed in legalization aten -> edge, so we are not able to check ops before + ops_before_pass=None, + ops_not_before_pass=None, + ops_after_pass=ops_after_pass, + ops_not_after_pass=[ + "executorch_exir_dialects_edge__ops_aten_linarg_vector_norm_default", + ], + pass_list=[DecomposeLinearVectorNormPass], + ) + pipeline.run()