Arm Backend: Add support for ELU.default operator

agrima1304 · agrima1304 · commit 10eac2d9bcdc · 2025-07-30T15:34:44.000+01:00
Signed-off-by: Agrima Khare &lt;agrima.khare@arm.com&gt;

Change-Id: I032414e7454d5e2cada05b788e9eed0f7b2dc97c
diff --git a/backends/arm/_passes/__init__.py b/backends/arm/_passes/__init__.py
@@ -15,6 +15,7 @@
 from .cast_to_int32_pass import CastToInt32Pass  # noqa
 from .conv1d_unsqueeze_pass import Conv1dUnsqueezePass  # noqa
 from .convert_any_default_dim_dims_pass import ConvertAnyDefaultDimDimsPass  # noqa
+from .convert_elu_params import ConvertELUParamsPass  # noqa
 from .convert_expand_copy_to_repeat import ConvertExpandCopyToRepeatPass  # noqa
 from .convert_full_like_to_full_pass import ConvertFullLikeToFullPass  # noqa
 from .convert_int_pow_to_mul import ConvertIntPowToMuls  # noqa
@@ -32,6 +33,7 @@
 from .decompose_batch_norm_no_stats import DecomposeBatchNormNoStatsPass  # noqa
 from .decompose_cosine_similarity_pass import DecomposeCosineSimilarityPass  # noqa
 from .decompose_div_pass import DecomposeDivPass  # noqa
+from .decompose_elu_pass import DecomposeEluPass  # noqa
 from .decompose_embedding_pass import DecomposeEmbeddingPass  # noqa  # noqa
 from .decompose_gelu_pass import DecomposeGeluPass  # noqa
 from .decompose_grouped_conv import DecomposeGroupedConv  # noqa
diff --git a/backends/arm/_passes/arm_pass_manager.py b/backends/arm/_passes/arm_pass_manager.py
@@ -19,6 +19,7 @@
     ComputeConstantOpsAOT,
     Conv1dUnsqueezePass,
     ConvertAnyDefaultDimDimsPass,
+    ConvertELUParamsPass,
     ConvertExpandCopyToRepeatPass,
     ConvertFullLikeToFullPass,
     ConvertIntPowToMuls,
@@ -37,6 +38,7 @@
     DecomposeBatchNormNoStatsPass,
     DecomposeCosineSimilarityPass,
     DecomposeDivPass,
+    DecomposeEluPass,
     DecomposeEmbeddingPass,
     DecomposeGeluPass,
     DecomposeGroupedConv,
@@ -127,6 +129,7 @@ def _tosa_080_BI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
         self.add_pass(ReplaceScalarWithTensorArgPassTOSABI())
         self.add_pass(AnnotateDecomposedMatmulPass())
         self.add_pass(QuantizeOperatorArguments())
+        self.add_pass(ConvertELUParamsPass())
         self.add_pass(FoldAndAnnotateQParamsPass(exported_program))  # type: ignore[call-arg]
         self.add_pass(RetraceFoldedDtypesPass())
         self.add_pass(UnsqueezeScalarPlaceholdersPass(exported_program))
@@ -171,6 +174,7 @@ def _tosa_080_MI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
         self.add_pass(DecomposeAtanPass())
         self.add_pass(DecomposeAtanhPass())
         self.add_pass(DecomposeAddmmPass())
+        self.add_pass(DecomposeEluPass())
         self.add_pass(ConvertIntPowToMuls())
         self.add_pass(CastBoolToInt8Pass())
         self.add_pass(DecomposeSinhPass())
diff --git a/backends/arm/_passes/convert_elu_params.py b/backends/arm/_passes/convert_elu_params.py
@@ -0,0 +1,46 @@
+# Copyright 2025 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+from executorch.backends.arm._passes.arm_pass_utils import create_node
+from executorch.exir.dialects._ops import ops as exir_ops
+from executorch.exir.pass_base import ExportPass, PassResult
+
+
+class ConvertELUParamsPass(ExportPass):
+    """
+    Pass to convert the input_scale kwarg of ELU operator from float to
+    int.
+
+    It has been set to 2 as the outputs seem to stay the same regardless of what
+    the value of input_scale is, as long as that value is not 1.
+    """
+
+    def call(self, graph_module: torch.fx.GraphModule):
+        modified_graph = False
+        graph = graph_module.graph
+        node_list = graph.find_nodes(
+            op="call_function", target=exir_ops.edge.aten.elu.default
+        )
+        for node in node_list:
+            with graph.inserting_after(node):
+                replace_node = create_node(graph, exir_ops.edge.aten.elu.default)
+                replace_node.args = (
+                    node.args[0],
+                    int(node.args[1]) if len(node.args) > 1 else 1,
+                )
+                updated_kwargs = dict(node.kwargs)
+                updated_kwargs["input_scale"] = int(2)
+                replace_node.kwargs = updated_kwargs
+
+                node.replace_all_uses_with(replace_node)
+                graph.erase_node(node)
+
+                modified_graph = True
+        if modified_graph:
+            graph_module.recompile()
+            graph_module = super().call(graph_module).graph_module
+
+        return PassResult(graph_module, modified_graph)
diff --git a/backends/arm/_passes/decompose_elu_pass.py b/backends/arm/_passes/decompose_elu_pass.py
@@ -0,0 +1,100 @@
+# Copyright 2025 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+from executorch.backends.arm._passes import ArmPass
+from executorch.exir.dialects._ops import ops as exir_ops
+
+edge_elu_ops = (exir_ops.edge.aten.elu.default,)
+aten_elu_ops = (torch.ops.aten.elu.default, torch.ops.aten.elu_.default)
+
+
+def get_elu_decomposition(op) -> tuple:
+    """
+    Returns the decomposition of the given aten.elu operation into
+    its equivalent TOSA-supported operations
+
+    This handles both edge dialect ops and core PyTorch ops. The decomposition strategy
+    is:
+        elu(x, y) → where(greater_or_eq(x, 0), (exp(x)-1), x)
+
+    Returns:
+        A tuple (exp_op, sub_op, ge_op, where_op) corresponding to the appropriate operator
+        overloads for the input op.
+
+    Raises:
+        RuntimeError: If the provided operator is not a supported elu variant.
+    """
+
+    if op in edge_elu_ops:
+        return (
+            exir_ops.edge.aten.add.Scalar,
+            exir_ops.edge.aten.exp.default,
+            exir_ops.edge.aten.ge.Scalar,
+            exir_ops.edge.aten.where.self,
+            exir_ops.edge.aten.mul.Scalar,
+        )
+
+    if op in aten_elu_ops:
+        return (
+            torch.ops.aten.add.Scalar,
+            torch.ops.aten.exp.default,
+            torch.ops.aten.ge.Scalar,
+            torch.ops.aten.where.self,
+            torch.ops.aten.mul.Scalar,
+        )
+
+    raise RuntimeError(f"Can't get elu decomposition for op {op}")
+
+
+class DecomposeEluPass(ArmPass):
+    """
+    A transformation pass that decomposes unsupported 'aten.elu' operations
+    into a combination of supported TOSA-equivalent operations.
+
+    Since TOSA does not provide a native ELU operator, this pass rewrites:
+        elu(x) → where(greater_or_eq(x, 0), (alpha*(exp(x)-1)), x)
+
+    Supported input ops:
+        - aten.elu(x)
+        - aten.elu_(x)
+        - exir_ops.edge.aten.elu.Tensor(x)
+
+    These are replaced with:
+        - aten.exp or exir_ops.edge.aten.exp
+        - aten.sub.Scalar or exir_ops.edge.aten.sub.Scalar
+        - aten.ge.Scalar or exir_ops.edge.aten.ge.Scalar
+        - aten.where.self or exir_ops.edge.aten.where.self
+        - aten.mul.Scalar or exir_ops.edge.aten.mul.Scalar
+    """
+
+    def call_operator(self, op, args, kwargs, meta):
+        if op not in (edge_elu_ops + aten_elu_ops):
+            return super().call_operator(op, args, kwargs, meta, updated=False)
+
+        (
+            add_op,
+            exp_op,
+            ge_op,
+            where_op,
+            mul_op,
+        ) = get_elu_decomposition(op)
+
+        input = args[0]
+        alpha = int(args[1]) if len(args) > 1 else 1
+
+        exp_node = super().call_operator(exp_op, (input,), {}, meta, updated=True)
+        sub_node = super().call_operator(
+            add_op, (exp_node, -1.0), {}, meta, updated=True
+        )
+        mul_node = super().call_operator(
+            mul_op, (sub_node, alpha), {}, meta, updated=True
+        )
+        ge_node = super().call_operator(ge_op, (input, 0.0), {}, meta, updated=True)
+        where_node = super().call_operator(
+            where_op, (ge_node, input, mul_node), {}, meta, updated=True
+        )
+
+        return where_node
diff --git a/backends/arm/_passes/insert_table_ops.py b/backends/arm/_passes/insert_table_ops.py
@@ -64,6 +64,7 @@ class TableOps:
     special_table_ops: Set[EdgeOpOverload] = {
         exir_ops.edge.aten.pow.Tensor_Scalar,
         exir_ops.edge.aten.gelu.default,
+        exir_ops.edge.aten.elu.default,
     }
 
     def __init__(self, exported_program: ExportedProgram):
@@ -97,6 +98,11 @@ def __getitem__(self, node: Node):
                     return lambda x: torch.nn.functional.gelu(
                         x, approximate=approximate
                     ).flatten()
+                case exir_ops.edge.aten.elu.default:
+                    input_alpha = cast(int, node.args[1]) if len(node.args) > 1 else 1
+                    return lambda x: torch.nn.functional.elu(
+                        x, alpha=input_alpha
+                    ).flatten()
                 case _:
                     # Op must be handled if it's inside self.special_ops
                     raise AssertionError("Unhandled table operation")
diff --git a/backends/arm/operator_support/tosa_supported_operators.py b/backends/arm/operator_support/tosa_supported_operators.py
@@ -258,6 +258,7 @@ def is_node_supported(
             exir_ops.edge.aten.atanh.default,
             exir_ops.edge.aten.addmm.default,
             exir_ops.edge.aten.masked_fill.Scalar,
+            exir_ops.edge.aten.elu.default,
         ]
 
         return supported
diff --git a/backends/arm/quantizer/quantization_annotator.py b/backends/arm/quantizer/quantization_annotator.py
@@ -198,6 +198,7 @@ def _match_pattern(
     torch.ops.aten.ceil.default,
     torch.ops.aten.erf.default,
     torch.ops.aten.exp.default,
+    torch.ops.aten.elu.default,
     torch.ops.aten.floor.default,
     torch.ops.aten.log.default,
     torch.ops.aten.reciprocal.default,
diff --git a/backends/arm/test/ops/test_elu.py b/backends/arm/test/ops/test_elu.py
@@ -0,0 +1,94 @@
+# Copyright 2025 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Tuple
+
+import torch
+import torch.nn as nn
+
+from executorch.backends.arm.test import common
+from executorch.backends.arm.test.tester.test_pipeline import (
+    EthosU55PipelineBI,
+    EthosU85PipelineBI,
+    TosaPipelineBI,
+    TosaPipelineMI,
+)
+
+test_data_suite = {
+    # (test_name, test_data)
+    "zeros_default": lambda: (1.0, torch.zeros(1, 10, 10, 10)),
+    "ones_default": lambda: (1.0, torch.ones(10, 10, 10)),
+    "rand_default": lambda: (1.0, torch.rand(10, 10) - 0.5),
+    "randn_pos_default": lambda: (1.0, torch.randn(1, 2, 3, 3) + 10),
+    "randn_neg_default": lambda: (1.0, torch.randn(2, 4, 3) - 10),
+    "ramp_default": lambda: (1.0, torch.arange(-16, 16, 0.2)),
+    "large_pos_default": lambda: (1.0, torch.randn(3, 3) * 1e6 + 1e7),
+    "large_neg_default": lambda: (1.0, -torch.empty(5).uniform_(1e5, 1e8)),
+    "small_pos_default": lambda: (1.0, torch.empty(5).uniform_(1e-8, 1e-5)),
+    "small_neg_default": lambda: (1.0, -torch.empty(5).uniform_(1e-8, 1e-5)),
+    "zeros_custom": lambda: (2.0, torch.zeros(1, 10, 10, 10)),
+    "ones_custom": lambda: (2.0, torch.ones(10, 10, 10)),
+    "rand_custom": lambda: (2.0, torch.rand(10, 10) - 0.5),
+    "randn_pos_custom": lambda: (2.0, torch.randn(1, 3, 3) + 10),
+    "randn_neg_custom": lambda: (2.0, torch.randn(1, 2, 4, 3) - 10),
+    "ramp_custom": lambda: (2.0, torch.arange(-16, 16, 0.2)),
+    "large_pos_custom": lambda: (2.0, torch.randn(3, 3) * 1e6 + 1e7),
+    "large_neg_custom": lambda: (2, -torch.empty(5).uniform_(1e5, 1e8)),
+    "small_pos_custom": lambda: (2.0, torch.empty(5).uniform_(1e-8, 1e-5)),
+    "small_neg_custom": lambda: (2.0, -torch.empty(5).uniform_(1e-8, 1e-5)),
+}
+
+
+class Elu(nn.Module):
+    aten_op = "torch.ops.aten.elu.default"
+    exir_op = "executorch_exir_dialects_edge__ops_aten__elu_default"
+
+    def __init__(self, input_alpha: float = 1.0):
+        super().__init__()
+        self.elu = torch.nn.ELU(alpha=input_alpha)
+
+    def forward(self, input_: torch.Tensor):
+        return self.elu(input_)
+
+
+input_t1 = Tuple[torch.Tensor]
+
+
+@common.parametrize("test_module", test_data_suite)
+def test_elu_tosa_MI(test_module: input_t1):
+    alpha, test_data = test_module()
+    pipeline = TosaPipelineMI[input_t1](
+        Elu(alpha), (test_data,), aten_op=Elu.aten_op, exir_op=Elu.exir_op
+    )
+    pipeline.run()
+
+
+@common.parametrize("test_module", test_data_suite)
+def test_elu_tosa_BI(test_module: input_t1):
+    alpha, test_data = test_module()
+    pipeline = TosaPipelineBI[input_t1](
+        Elu(alpha), (test_data,), aten_op=Elu.aten_op, exir_op=Elu.exir_op
+    )
+    pipeline.run()
+
+
+@common.XfailIfNoCorstone300
+@common.parametrize("test_module", test_data_suite)
+def test_elu_u55_BI(test_module: input_t1):
+    alpha, test_data = test_module()
+    pipeline = EthosU55PipelineBI[input_t1](
+        Elu(alpha), (test_data,), aten_ops=Elu.aten_op, exir_ops=Elu.exir_op
+    )
+    pipeline.run()
+
+
+@common.XfailIfNoCorstone320
+@common.parametrize("test_module", test_data_suite)
+def test_elu_u85_BI(test_module: input_t1):
+    alpha, test_data = test_module()
+    pipeline = EthosU85PipelineBI[input_t1](
+        Elu(alpha), (test_data,), aten_ops=Elu.aten_op, exir_ops=Elu.exir_op
+    )
+    pipeline.run()

Original file line number	Diff line number	Diff line change
`@@ -258,6 +258,7 @@ def is_node_supported(`
`258`	`258`	`exir_ops.edge.aten.atanh.default,`
`259`	`259`	`exir_ops.edge.aten.addmm.default,`
`260`	`260`	`exir_ops.edge.aten.masked_fill.Scalar,`
	`261`	`+ exir_ops.edge.aten.elu.default,`
`261`	`262`	`]`
`262`	`263`
`263`	`264`	`return supported`