From 24994b5fc92ead23da6023e31f657aebdb17898a Mon Sep 17 00:00:00 2001
From: Emma Kujala <emma.kujala@arm.com>
Date: Tue, 17 Jun 2025 16:12:55 +0200
Subject: [PATCH] Arm backend: Add decomposition pass and test for asin

Change-Id: I8b4a23da4ecda88376ddae22da21d2c8ccc04796
Signed-off-by: Emma Kujala <emma.kujala@arm.com>
---
 backends/arm/_passes/__init__.py              |   1 +
 backends/arm/_passes/arm_pass_manager.py      |   2 +
 backends/arm/_passes/decompose_asin_pass.py   | 201 ++++++++++++++++++
 backends/arm/_passes/insert_table_ops.py      |   1 +
 .../tosa_supported_operators.py               |   1 +
 .../arm/quantizer/quantization_annotator.py   |   1 +
 backends/arm/test/ops/test_asin.py            |  80 +++++++
 7 files changed, 287 insertions(+)
 create mode 100644 backends/arm/_passes/decompose_asin_pass.py
 create mode 100644 backends/arm/test/ops/test_asin.py

diff --git a/backends/arm/_passes/__init__.py b/backends/arm/_passes/__init__.py
index f79dd61008d..02b0ba8e386 100644
--- a/backends/arm/_passes/__init__.py
+++ b/backends/arm/_passes/__init__.py
@@ -24,6 +24,7 @@
 from .convert_to_clamp import ConvertToClampPass  # noqa
 from .decompose_acosh_pass import DecomposeAcoshPass  # noqa
 from .decompose_adaptive_avg_pool2d_pass import DecomposeAdaptiveAvgPool2dPass  # noqa
+from .decompose_asin_pass import DecomposeAsinPass  # noqa
 from .decompose_atan_pass import DecomposeAtanPass  # noqa
 from .decompose_avg_pool2d import DecomposeAvgPool2d  # noqa
 from .decompose_batch_norm_no_stats import DecomposeBatchNormNoStatsPass  # noqa
diff --git a/backends/arm/_passes/arm_pass_manager.py b/backends/arm/_passes/arm_pass_manager.py
index f0f4f76769c..6570891509f 100644
--- a/backends/arm/_passes/arm_pass_manager.py
+++ b/backends/arm/_passes/arm_pass_manager.py
@@ -29,6 +29,7 @@
     ConvertToClampPass,
     DecomposeAcoshPass,
     DecomposeAdaptiveAvgPool2dPass,
+    DecomposeAsinPass,
     DecomposeAtanPass,
     DecomposeAvgPool2d,
     DecomposeBatchNormNoStatsPass,
@@ -158,6 +159,7 @@ def _tosa_080_BI_pipeline(self, exported_program: ExportedProgram) -> GraphModul
     def _tosa_080_MI_pipeline(self, exported_program: ExportedProgram) -> GraphModule:
         self.add_pass(DecomposeRoundPass())
         self.add_pass(DecomposeAcoshPass())
+        self.add_pass(DecomposeAsinPass())
         self.add_pass(DecomposeSqrtPass())
         self.add_pass(DecomposeAtanPass())
         self.add_pass(ConvertIntPowToMuls())
diff --git a/backends/arm/_passes/decompose_asin_pass.py b/backends/arm/_passes/decompose_asin_pass.py
new file mode 100644
index 00000000000..1330ca89264
--- /dev/null
+++ b/backends/arm/_passes/decompose_asin_pass.py
@@ -0,0 +1,201 @@
+# Copyright 2025 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+# pyre-unsafe
+
+import logging
+from math import pi
+
+import torch
+
+from executorch.backends.arm._passes import ArmPass
+from executorch.exir.dialects._ops import ops as exir_ops
+
+# For MI case
+edge_asin_op = (exir_ops.edge.aten.asin.default,)
+
+
+def get_asin_decomposition(op) -> tuple:
+    if op in edge_asin_op:
+        return (
+            exir_ops.edge.aten.mul.Tensor,
+            exir_ops.edge.aten.add.Tensor,
+            exir_ops.edge.aten.mul.Scalar,
+            exir_ops.edge.aten.sqrt.default,
+            exir_ops.edge.aten.abs.default,
+            exir_ops.edge.aten.sub.Scalar,
+            exir_ops.edge.aten.div.Tensor,
+            exir_ops.edge.aten.gt.Scalar,
+            exir_ops.edge.aten.lt.Scalar,
+            exir_ops.edge.aten.sub.Tensor,
+            exir_ops.edge.aten.full_like.default,
+            exir_ops.edge.aten.where.self,
+            exir_ops.edge.aten.neg.default,
+        )
+
+    raise RuntimeError(f"Can't get asin decomposition for op {op}")
+
+
+class DecomposeAsinPass(ArmPass):
+    """
+    This pass decomposes asin into a rational approximation for small values
+    and a transformed rational approximation for large values.
+    Example:
+        y = asin(x)
+    Becomes:
+        if abs(x) < 0.5:
+            y = x + P(x^2) / Q(x^2)
+        else:
+            y = π/2 - 2 * (s + s^3 * Q(z) / P(z))
+    where P and Q are polynomials defined in the function.
+    """
+
+    def _build_polynomial(
+        self, coefficients: list[float], variable: torch.Tensor, meta: dict[str, str]
+    ) -> torch.Tensor:
+        """
+        Helper function to build polynomial from coefficients and variable.
+        """
+        full_like_op, add_op, mul_op_scalar, mul_op = (
+            exir_ops.edge.aten.full_like.default,
+            exir_ops.edge.aten.add.Tensor,
+            exir_ops.edge.aten.mul.Scalar,
+            exir_ops.edge.aten.mul.Tensor,
+        )
+        result = super().call_operator(
+            full_like_op, (variable, coefficients[0]), {}, meta, True
+        )
+        for coeff in coefficients[1:]:
+            result = super().call_operator(
+                add_op,
+                (
+                    result,
+                    super().call_operator(
+                        mul_op_scalar, (variable, coeff), {}, meta, True
+                    ),
+                ),
+                {},
+                meta,
+            )
+            variable = super().call_operator(
+                mul_op, (variable, variable), {}, meta, True
+            )
+        return result
+
+    def call_operator(self, op, args, kwargs, meta):
+        logging.info(
+            f"Approximating asin. This may introduce small numerical errors. For details, see {__file__}."
+        )
+        if op not in edge_asin_op:
+            return super().call_operator(op, args, kwargs, meta)
+
+        x = args[0]
+        half = 0.5
+        one = 1.0
+        neg_half = -0.5
+        two = 2.0
+        pi_over_2 = pi / 2.0
+        zero = 0.0
+        neg_one = -1.0
+
+        (
+            mul_op,
+            add_op,
+            mul_op_scalar,
+            sqrt_op,
+            abs_op,
+            sub_op_scalar,
+            div_op,
+            gt_op,
+            lt_op,
+            sub_op,
+            full_like_op,
+            where_op,
+            neg_op,
+        ) = get_asin_decomposition(op)
+
+        # Coefficients for the rational approximation, calculated with the Minimax (Remez) method
+        p_coefficients = [
+            1.6666667163e-01,
+            -3.2556581497e-01,
+            2.0121252537e-01,
+            -4.0055535734e-02,
+            7.9153501429e-04,
+        ]
+
+        q_coefficients = [1.0, -2.4033949375e00, 2.0209457874e00, -6.8828397989e-01]
+
+        x_abs = super().call_operator(abs_op, (x,), {}, meta, True)
+
+        # Step 1: compute asin_small - rational approximation for [0,0.5]
+
+        y = super().call_operator(mul_op, (x_abs, x_abs), {}, meta, True)
+        x3 = super().call_operator(mul_op, (x_abs, y), {}, meta, True)
+
+        P = self._build_polynomial(p_coefficients, x_abs, meta)
+        Q = self._build_polynomial(q_coefficients, x_abs, meta)
+        numer = super().call_operator(mul_op, (x3, P), {}, meta, True)
+        r_small = super().call_operator(div_op, (numer, Q), {}, meta, True)
+        asin_small = super().call_operator(add_op, (x_abs, r_small), {}, meta, True)
+
+        # Step 2: Compute the transformed approximation for large values
+        # Calculate z = -0.5 * (|x| - 1)
+        tmp_ones = super().call_operator(full_like_op, (x_abs, one), {}, meta, True)
+        tmp = super().call_operator(sub_op, (x_abs, tmp_ones), {}, meta, True)
+        z = super().call_operator(mul_op_scalar, (tmp, neg_half), {}, meta, True)
+
+        # Calculate s-terms
+        s = super().call_operator(sqrt_op, (z,), {}, meta, True)
+        s2 = super().call_operator(mul_op, (s, s), {}, meta, True)
+        s3 = super().call_operator(mul_op, (s2, s), {}, meta, True)
+
+        Pz = self._build_polynomial(p_coefficients, z, meta)
+        Qz = self._build_polynomial(q_coefficients, z, meta)
+
+        numer = super().call_operator(mul_op, (s3, Pz), {}, meta, True)
+        # Calculate r_large = P(z) / Q(z)
+        r_large = super().call_operator(div_op, (numer, Qz), {}, meta, True)
+
+        # Calculate asin_large = pi/2 - 2 * (s + s^3 * Q(z) / P(z))
+        t1 = super().call_operator(add_op, (s, r_large), {}, meta, True)
+        t2 = super().call_operator(mul_op_scalar, (t1, two), {}, meta, True)
+        diff = super().call_operator(sub_op_scalar, (t2, pi_over_2), {}, meta, True)
+        tmp_neg_ones = super().call_operator(
+            full_like_op, (diff, neg_one), {}, meta, True
+        )
+        asin_large = super().call_operator(mul_op, (diff, tmp_neg_ones), {}, meta, True)
+
+        # Combine branches
+        is_large = super().call_operator(gt_op, (x_abs, half), {}, meta, True)
+        asin_unsigned = super().call_operator(
+            where_op,
+            (
+                is_large,
+                asin_large,
+                asin_small,
+            ),
+            {},
+            meta,
+            True,
+        )
+
+        # Handle x < 0
+        is_neg = super().call_operator(lt_op, (x, zero), {}, meta, True)
+        # Compute -asin_unsigned
+        negated_asin = super().call_operator(neg_op, (asin_unsigned,), {}, meta, True)
+        # Combine branches for signed asin
+        asin_signed = super().call_operator(
+            where_op,
+            (
+                is_neg,
+                negated_asin,
+                asin_unsigned,
+            ),
+            {},
+            meta,
+            True,
+        )
+
+        return asin_signed
diff --git a/backends/arm/_passes/insert_table_ops.py b/backends/arm/_passes/insert_table_ops.py
index 28b4700ce39..4127197e58d 100644
--- a/backends/arm/_passes/insert_table_ops.py
+++ b/backends/arm/_passes/insert_table_ops.py
@@ -56,6 +56,7 @@ class TableOps:
         exir_ops.edge.aten.hardswish.default: torch.nn.functional.hardswish,
         exir_ops.edge.aten.sinh.default: torch.sinh,
         exir_ops.edge.aten.acosh.default: torch.acosh,
+        exir_ops.edge.aten.asin.default: torch.asin,
     }
 
     # Targets that must be treated explicitly
diff --git a/backends/arm/operator_support/tosa_supported_operators.py b/backends/arm/operator_support/tosa_supported_operators.py
index d58cbfef524..2c158ca81e7 100644
--- a/backends/arm/operator_support/tosa_supported_operators.py
+++ b/backends/arm/operator_support/tosa_supported_operators.py
@@ -251,6 +251,7 @@ def is_node_supported(
             exir_ops.edge.aten.acosh.default,
             exir_ops.edge.aten._adaptive_avg_pool2d.default,
             exir_ops.edge.aten.sign.default,
+            exir_ops.edge.aten.asin.default,
         ]
 
         return supported
diff --git a/backends/arm/quantizer/quantization_annotator.py b/backends/arm/quantizer/quantization_annotator.py
index 76209aff83f..cc580e01563 100644
--- a/backends/arm/quantizer/quantization_annotator.py
+++ b/backends/arm/quantizer/quantization_annotator.py
@@ -217,6 +217,7 @@ def _match_pattern(
     torch.ops.aten.atan.default,
     torch.ops.aten.acosh.default,
     torch.ops.aten.sign.default,
+    torch.ops.aten.asin.default,
 ]
 
 _one_to_one_shared_input_qspec = [
diff --git a/backends/arm/test/ops/test_asin.py b/backends/arm/test/ops/test_asin.py
new file mode 100644
index 00000000000..ccb1b3bfc30
--- /dev/null
+++ b/backends/arm/test/ops/test_asin.py
@@ -0,0 +1,80 @@
+# Copyright 2025 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from typing import Tuple
+
+import torch
+
+from executorch.backends.arm.test import common
+from executorch.backends.arm.test.tester.test_pipeline import (
+    EthosU55PipelineBI,
+    EthosU85PipelineBI,
+    TosaPipelineBI,
+    TosaPipelineMI,
+)
+
+input_t = Tuple[torch.Tensor]  # Input x
+aten_op = "torch.ops.aten.asin.default"
+
+test_data_suite = {
+    "zeros": lambda: torch.zeros(1, 5, 3, 2),  # valid: asin(0) = 0
+    "ones": lambda: torch.ones(10, 5, 15),  # edge case: asin(1) = pi/2
+    "neg_ones": lambda: -torch.ones(10, 5, 15),  # edge case: asin(-1) = -pi/2
+    "rand": lambda: (torch.rand(10, 10, 5) * 2) - 1,  # uniform random in [-1, 1]
+    "ramp": lambda: torch.linspace(-1.0, 1.0, steps=160),  # full domain coverage
+    "near_bounds": lambda: torch.tensor(
+        [-0.999, -0.9, -0.5, 0.0, 0.5, 0.9, 0.999]
+    ),  # precision edge values
+    "pos_rand": lambda: torch.rand(7, 10, 2),  # positive random values in [0, 1]
+}
+
+
+class Asin(torch.nn.Module):
+    def forward(self, x):
+        return torch.asin(x)
+
+
+@common.parametrize("test_data", test_data_suite)
+def test_asin_tosa_MI(test_data: Tuple):
+    pipeline = TosaPipelineMI[input_t](
+        Asin(),
+        (test_data(),),
+        aten_op,
+        exir_op=[],
+    )
+    pipeline.run()
+
+
+@common.parametrize("test_data", test_data_suite)
+def test_asin_tosa_BI(test_data: Tuple):
+    pipeline = TosaPipelineBI[input_t](
+        Asin(),
+        (test_data(),),
+        aten_op=[],
+        exir_op=[],
+    )
+    pipeline.run()
+
+
+@common.parametrize("test_data", test_data_suite)
+@common.XfailIfNoCorstone300
+def test_asin_u55_BI(test_data: Tuple):
+    pipeline = EthosU55PipelineBI[input_t](
+        Asin(),
+        (test_data(),),
+        aten_ops=[],
+    )
+    pipeline.run()
+
+
+@common.parametrize("test_data", test_data_suite)
+@common.XfailIfNoCorstone320
+def test_asin_u85_BI(test_data: Tuple):
+    pipeline = EthosU85PipelineBI[input_t](
+        Asin(),
+        (test_data(),),
+        aten_ops=[],
+    )
+    pipeline.run()