pytorch
diff --git a/‎backends/arm/_passes/__init__.py‎
Lines changed: 2 additions & 0 deletions b/‎backends/arm/_passes/__init__.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎backends/arm/_passes/arm_pass_manager.py‎
Lines changed: 5 additions & 0 deletions b/‎backends/arm/_passes/arm_pass_manager.py‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎backends/arm/_passes/convert_elu_params.py‎
Lines changed: 53 additions & 0 deletions b/‎backends/arm/_passes/convert_elu_params.py‎
Lines changed: 53 additions & 0 deletions
diff --git a/‎backends/arm/_passes/decompose_elu_pass.py‎
Lines changed: 85 additions & 0 deletions b/‎backends/arm/_passes/decompose_elu_pass.py‎
Lines changed: 85 additions & 0 deletions
diff --git a/‎backends/arm/_passes/insert_table_ops.py‎
Lines changed: 6 additions & 0 deletions b/‎backends/arm/_passes/insert_table_ops.py‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎backends/arm/_passes/to_tosa_memory_format_pass.py‎
Lines changed: 14 additions & 2 deletions b/‎backends/arm/_passes/to_tosa_memory_format_pass.py‎
Lines changed: 14 additions & 2 deletions
diff --git a/‎backends/arm/operator_support/tosa_supported_operators.py‎
Lines changed: 1 addition & 0 deletions b/‎backends/arm/operator_support/tosa_supported_operators.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎backends/arm/operators/op_add.py‎
Lines changed: 7 additions & 3 deletions b/‎backends/arm/operators/op_add.py‎
Lines changed: 7 additions & 3 deletions
diff --git a/‎backends/arm/operators/op_sub.py‎
Lines changed: 7 additions & 2 deletions b/‎backends/arm/operators/op_sub.py‎
Lines changed: 7 additions & 2 deletions
diff --git a/‎backends/arm/quantizer/quantization_annotator.py‎
Lines changed: 5 additions & 4 deletions b/‎backends/arm/quantizer/quantization_annotator.py‎
Lines changed: 5 additions & 4 deletions
@@ -14,6 +14,7 @@
 from .cast_to_int32_pass import CastToInt32Pass  # noqa
 from .conv1d_unsqueeze_pass import Conv1dUnsqueezePass  # noqa
 from .convert_any_default_dim_dims_pass import ConvertAnyDefaultDimDimsPass  # noqa
+from .convert_elu_params import ConvertELUParamsPass  # noqa
 from .convert_expand_copy_to_repeat import ConvertExpandCopyToRepeatPass  # noqa
 from .convert_full_like_to_full_pass import ConvertFullLikeToFullPass  # noqa
 from .convert_int64_const_ops_to_int32 import ConvertInt64ConstOpsToInt32Pass  # noqa
@@ -36,6 +37,7 @@
 from .decompose_cosine_similarity_pass import DecomposeCosineSimilarityPass  # noqa
 from .decompose_cumsum_pass import DecomposeCumsumPass  # noqa
 from .decompose_div_pass import DecomposeDivPass  # noqa
+from .decompose_elu_pass import DecomposeEluPass  # noqa
 from .decompose_embedding_pass import DecomposeEmbeddingPass  # noqa  # noqa
 from .decompose_expm1_pass import DecomposeExpm1Pass  # noqa
 from .decompose_gelu_pass import DecomposeGeluPass  # noqa
 
@@ -18,6 +18,7 @@
     ComputeConstantOpsAOT,
     Conv1dUnsqueezePass,
     ConvertAnyDefaultDimDimsPass,
+    ConvertELUParamsPass,
     ConvertExpandCopyToRepeatPass,
     ConvertFullLikeToFullPass,
     ConvertInt64ConstOpsToInt32Pass,
@@ -41,6 +42,7 @@
     DecomposeCosineSimilarityPass,
     DecomposeCumsumPass,
     DecomposeDivPass,
+    DecomposeEluPass,
     DecomposeEmbeddingPass,
     DecomposeExpm1Pass,
     DecomposeGeluPass,
@@ -135,6 +137,7 @@ def _tosa_INT_pipeline(self, exported_program: ExportedProgram) -> GraphModule:
         self.add_pass(ReplaceScalarWithTensorArgPassTOSABI())
         self.add_pass(AnnotateDecomposedMatmulPass())
         self.add_pass(QuantizeOperatorArguments())
+        self.add_pass(ConvertELUParamsPass())
         self.add_pass(FoldAndAnnotateQParamsPass(exported_program))  # type: ignore[call-arg]
         self.add_pass(RetraceFoldedDtypesPass())
         self.add_pass(UnsqueezeScalarPlaceholdersPass(exported_program))
@@ -183,6 +186,8 @@ def _tosa_FP_pipeline(self, exported_program: ExportedProgram) -> GraphModule:
         self.add_pass(DecomposeAtanPass())
         self.add_pass(DecomposeAtanhPass())
         self.add_pass(DecomposeAddmmPass())
+        self.add_pass(DecomposeEluPass())
+        self.add_pass(DecomposeExpm1Pass())
         self.add_pass(ConvertIntPowToMuls())
         self.add_pass(CastBoolToInt8Pass())
         self.add_pass(DecomposeSinhPass())
 
@@ -0,0 +1,53 @@
+# Copyright 2025 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+from executorch.backends.arm._passes.arm_pass_utils import create_node
+from executorch.exir.dialects._ops import ops as exir_ops
+from executorch.exir.pass_base import ExportPass, PassResult
+
+
+class ConvertELUParamsPass(ExportPass):
+    """
+    Pass to convert the input_scale kwarg of ELU operator from float to
+    int.
+
+    It has been set to 2 as the outputs seem to stay the same regardless of what
+    the value of input_scale is, as long as that value is not 1.
+    """
+
+    def call(self, graph_module: torch.fx.GraphModule):
+        modified_graph = False
+        graph = graph_module.graph
+        node_list = graph.find_nodes(
+            op="call_function", target=exir_ops.edge.aten.elu.default
+        )
+        for node in node_list:
+            with graph.inserting_after(node):
+                replace_node = create_node(graph, exir_ops.edge.aten.elu.default)
+                old_args = list(node.args)
+
+                alpha = old_args[1] if len(old_args) > 1 else 1.0
+                scale = 1.0
+                input_scale = 2.0
+
+                replace_node.args = (old_args[0],)
+
+                updated_kwargs = dict(node.kwargs)
+                updated_kwargs["alpha"] = int(alpha)
+                updated_kwargs["scale"] = int(scale)
+                updated_kwargs["input_scale"] = int(input_scale)
+
+                replace_node.kwargs = updated_kwargs
+
+                node.replace_all_uses_with(replace_node)
+                graph.erase_node(node)
+
+                modified_graph = True
+        if modified_graph:
+            graph_module.recompile()
+            graph_module = super().call(graph_module).graph_module
+
+        return PassResult(graph_module, modified_graph)
@@ -0,0 +1,85 @@
+# Copyright 2025 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from executorch.backends.arm._passes import ArmPass
+from executorch.exir.dialects._ops import ops as exir_ops
+
+edge_elu_ops = (exir_ops.edge.aten.elu.default,)
+
+
+def get_elu_decomposition(op) -> tuple:
+    """
+    Returns the decomposition of the given aten.elu operation into
+    its equivalent TOSA-supported operations
+
+    This handles both edge dialect ops and core PyTorch ops. The decomposition strategy
+    is:
+        elu(x, y) → where(greater_or_eq(x, 0), (exp(x)-1), x)
+
+    Returns:
+        A tuple (expm1_op, ge_op, where_op, mul_op) corresponding to the appropriate operator
+        overloads for the input op.
+
+    Raises:
+        RuntimeError: If the provided operator is not a supported elu variant.
+    """
+
+    if op in edge_elu_ops:
+        return (
+            exir_ops.edge.aten.expm1.default,
+            exir_ops.edge.aten.ge.Scalar,
+            exir_ops.edge.aten.where.self,
+            exir_ops.edge.aten.mul.Scalar,
+        )
+
+    raise RuntimeError(f"Can't get elu decomposition for op {op}")
+
+
+class DecomposeEluPass(ArmPass):
+    """
+    A transformation pass that decomposes unsupported 'aten.elu' operations
+    into a combination of supported TOSA-equivalent operations.
+
+    Since TOSA does not provide a native ELU operator, this pass rewrites:
+        elu(x) → where(greater_or_eq(x, 0), (alpha*(exp(x)-1)), x)
+
+    Supported input ops:
+        - exir_ops.edge.aten.elu.Tensor(x)
+
+    These are replaced with:
+        - exir_ops.edge.aten.expm1.default
+        - exir_ops.edge.aten.ge.Scalar
+        - exir_ops.edge.aten.where.self
+        - exir_ops.edge.aten.mul.Scalar
+    """
+
+    def call_operator(self, op, args, kwargs, meta):
+        if op not in edge_elu_ops:
+            return super().call_operator(op, args, kwargs, meta, updated=False)
+
+        (
+            expm1_op,
+            ge_op,
+            where_op,
+            mul_op,
+        ) = get_elu_decomposition(op)
+
+        input = args[0]
+        alpha = args[1] if len(args) > 1 else 1.0
+
+        if alpha == 0:
+            relu_op = exir_ops.edge.aten.relu.default
+            return super().call_operator(relu_op, (input,), {}, meta, updated=True)
+
+        expm1_node = super().call_operator(expm1_op, (input,), {}, meta, updated=True)
+        mul_node = super().call_operator(
+            mul_op, (expm1_node, alpha), {}, meta, updated=True
+        )
+        ge_node = super().call_operator(ge_op, (input, 0.0), {}, meta, updated=True)
+        where_node = super().call_operator(
+            where_op, (ge_node, input, mul_node), {}, meta, updated=True
+        )
+
+        return where_node
@@ -59,6 +59,7 @@ class TableOps:
     special_table_ops: Set[EdgeOpOverload] = {
         exir_ops.edge.aten.pow.Tensor_Scalar,
         exir_ops.edge.aten.gelu.default,
+        exir_ops.edge.aten.elu.default,
     }
 
     def __init__(self, exported_program: ExportedProgram):
@@ -92,6 +93,11 @@ def __getitem__(self, node: Node):
                     return lambda x: torch.nn.functional.gelu(
                         x, approximate=approximate
                     ).flatten()
+                case exir_ops.edge.aten.elu.default:
+                    input_alpha = cast(int, node.kwargs["alpha"])
+                    return lambda x: torch.nn.functional.elu(
+                        x, alpha=input_alpha
+                    ).flatten()
                 case _:
                     # Op must be handled if it's inside self.special_ops
                     raise AssertionError("Unhandled table operation")
 
@@ -12,7 +12,6 @@
     get_first_fake_tensor,
     is_param_node,
 )
-from executorch.backends.arm.tosa_utils import is_consumer_node_depthwise_conv2d
 from executorch.exir import ExportedProgram
 from executorch.exir.dialects._ops import ops as exir_ops
 from executorch.exir.pass_base import ExportPass, PassResult
@@ -43,6 +42,19 @@ def __init__(self, exported_program: ExportedProgram) -> None:
         self.exported_program = exported_program
         super().__init__()
 
+    @staticmethod
+    def _is_consumer_node_depthwise_conv2d(node: torch.fx.Node):
+        consumer_node = list(node.users)[0]
+        if consumer_node.target == exir_ops.edge.aten.convolution.default:
+            consumer_node_inputs = consumer_node.all_input_nodes
+            groups = consumer_node.args[-1]
+            in_channels = consumer_node_inputs[0].meta["val"].shape[1]
+            out_channels = consumer_node_inputs[1].meta["val"].shape[0]
+            if (in_channels == groups) and (out_channels % in_channels) == 0:
+                return True
+
+        return False
+
     def is_weight_node_for_depthwise_conv2d(self, node: torch.fx.Node):
         """
         returns True for w in the following sequence;
@@ -53,7 +65,7 @@ def is_weight_node_for_depthwise_conv2d(self, node: torch.fx.Node):
             consumer_node = list(node.users)[0]
             if self.is_weight_node_for_depthwise_conv2d(consumer_node):
                 return True
-            if is_consumer_node_depthwise_conv2d(node):
+            if self._is_consumer_node_depthwise_conv2d(node):
                 # Check that node is the weight-argument and not input or bias
                 return consumer_node.args[1] == node
 
 
@@ -263,6 +263,7 @@ def is_node_supported(
             exir_ops.edge.aten.glu.default,
             exir_ops.edge.aten.logit.default,
             exir_ops.edge.aten.acos.default,
+            exir_ops.edge.aten.elu.default,
         ]
 
         return supported
 
@@ -53,10 +53,9 @@ def define_node(
             [ts.DType.INT8, ts.DType.INT32],
             output.tosa_spec,
         )
-
         scale_back = 1.0
         if inputs[0].dtype == ts.DType.INT8:
-            rescaled_inputs, scale_back = tqutils.insert_rescale_ops_to_int32(
+            rescaled_inputs, scale_back = tqutils.insert_rescale_ops_to_int32_maxscale(
                 tosa_graph, inputs, node, self.tosa_spec
             )
         else:
@@ -85,7 +84,12 @@ def define_node(
             # Scale output back to 8 bit
             # pyre-ignore
             tqutils.insert_rescale_op_to_int8(
-                tosa_graph, add_output, scale_back, node, self.tosa_spec
+                tosa_graph,
+                add_output,
+                scale_back,
+                node,
+                compute_rescale=False,
+                tosa_spec=self.tosa_spec,
             )  # type: ignore[possibly-undefined]
 
 
 
@@ -56,7 +56,7 @@ def define_node(
 
         scale_back = 1.0
         if inputs[0].dtype == ts.DType.INT8:
-            rescaled_inputs, scale_back = tqutils.insert_rescale_ops_to_int32(
+            rescaled_inputs, scale_back = tqutils.insert_rescale_ops_to_int32_maxscale(
                 tosa_graph, inputs, node, self.tosa_spec
             )
         else:
@@ -86,7 +86,12 @@ def define_node(
             # Scale output back to 8 bit
             # pyre-ignore
             tqutils.insert_rescale_op_to_int8(
-                tosa_graph, sub_output, scale_back, node, self.tosa_spec
+                tosa_graph,
+                sub_output,
+                scale_back,
+                node,
+                compute_rescale=False,
+                tosa_spec=self.tosa_spec,
             )  # type: ignore[possibly-undefined]
 
 
 
@@ -266,6 +266,7 @@ def _match_pattern(
     torch.ops.aten.erf.default,
     torch.ops.aten.exp.default,
     torch.ops.aten.expm1.default,
+    torch.ops.aten.elu.default,
     torch.ops.aten.floor.default,
     torch.ops.aten.log.default,
     torch.ops.aten.reciprocal.default,
@@ -472,6 +473,10 @@ def any_or_hardtanh_min_zero(n: Node):
         ]
         quant_properties.quant_output = _QuantProperty(0, output_act_qspec)
     elif node.target in (
+        torch.ops.aten.add.Tensor,
+        torch.ops.aten.add_.Tensor,
+        torch.ops.aten.sub.Tensor,
+        torch.ops.aten.sub_.Tensor,
         torch.ops.aten.matmul.default,
         torch.ops.aten.mm.default,
         torch.ops.aten.bmm.default,
@@ -484,10 +489,6 @@ def any_or_hardtanh_min_zero(n: Node):
         ]
         quant_properties.quant_output = _QuantProperty(0, output_act_qspec)
     elif node.target in (
-        torch.ops.aten.add.Tensor,
-        torch.ops.aten.add_.Tensor,
-        torch.ops.aten.sub.Tensor,
-        torch.ops.aten.sub_.Tensor,
         torch.ops.aten.minimum.default,
         torch.ops.aten.maximum.default,
     ):
Original file line number	Diff line number	Diff line change
`@@ -263,6 +263,7 @@ def is_node_supported(`
`263`	`263`	`exir_ops.edge.aten.glu.default,`
`264`	`264`	`exir_ops.edge.aten.logit.default,`
`265`	`265`	`exir_ops.edge.aten.acos.default,`
	`266`	`+ exir_ops.edge.aten.elu.default,`
`266`	`267`	`]`
`267`	`268`
`268`	`269`	`return supported`