From 6433646010718e3d0d273206e2601bba4f996743 Mon Sep 17 00:00:00 2001
From: Saoirse Stewart <saoirse.stewart@arm.com>
Date: Tue, 30 Jul 2024 12:31:50 +0100
Subject: [PATCH 1/2] Add missing unit tests for operators

* hardtanh
* permute

Change-Id: Ia1802bdc37d365af382835b3c14174d841892927
---
 backends/arm/test/ops/test_hardtanh.py | 125 ++++++++++++++++++++++++
 backends/arm/test/ops/test_permute.py  | 129 +++++++++++++++++++++++++
 2 files changed, 254 insertions(+)
 create mode 100644 backends/arm/test/ops/test_hardtanh.py
 create mode 100644 backends/arm/test/ops/test_permute.py

diff --git a/backends/arm/test/ops/test_hardtanh.py b/backends/arm/test/ops/test_hardtanh.py
new file mode 100644
index 00000000000..c7c3736e37b
--- /dev/null
+++ b/backends/arm/test/ops/test_hardtanh.py
@@ -0,0 +1,125 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# Copyright 2024 Arm Limited and/or its affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import unittest
+from typing import Tuple
+
+import torch
+
+from executorch.backends.arm.quantizer.arm_quantizer import (
+    ArmQuantizer,
+    get_symmetric_quantization_config,
+)
+
+from executorch.backends.arm.test import common
+from executorch.backends.arm.test.tester.arm_tester import ArmTester
+from executorch.backends.xnnpack.test.tester.tester import Quantize
+from parameterized import parameterized
+
+
+test_data_suite = [
+    # (test_name, test_data)
+    ("zeros", torch.zeros(1, 10, 10, 10)),
+    ("ones", torch.ones(10, 10, 10)),
+    ("rand", torch.rand(10, 10) - 0.5),
+    ("randn_pos", torch.randn(10) + 10),
+    ("randn_neg", torch.randn(10) - 10),
+    ("ramp", torch.arange(-16, 16, 0.2)),
+]
+
+
+class TestHardTanh(unittest.TestCase):
+    """Tests HardTanh Operator."""
+
+    class HardTanh(torch.nn.Module):
+
+        def __init__(self):
+            super().__init__()
+
+            self.hardTanh = torch.nn.Hardtanh()
+
+        def forward(self, x):
+            return self.hardTanh(x)
+
+    def _test_hardtanh_tosa_MI_pipeline(
+        self, module: torch.nn.Module, test_data: Tuple[torch.tensor]
+    ):
+        (
+            ArmTester(
+                module,
+                example_inputs=test_data,
+                compile_spec=common.get_tosa_compile_spec(),
+            )
+            .export()
+            .check(["torch.ops.aten.hardtanh.default"])
+            .check_not(["torch.ops.quantized_decomposed"])
+            .to_edge()
+            .partition()
+            .check_not(["executorch_exir_dialects_edge__ops_aten_hardtanh_default"])
+            .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
+            .to_executorch()
+            .run_method_and_compare_outputs(inputs=test_data)
+        )
+
+    def _test_hardtanh_tosa_BI_pipeline(
+        self, module: torch.nn.Module, test_data: Tuple[torch.tensor]
+    ):
+        quantizer = ArmQuantizer().set_io(get_symmetric_quantization_config())
+        (
+            ArmTester(
+                module,
+                example_inputs=test_data,
+                compile_spec=common.get_tosa_compile_spec(),
+            )
+            .quantize(Quantize(quantizer, get_symmetric_quantization_config()))
+            .export()
+            .check_count({"torch.ops.aten.hardtanh.default": 1})
+            .check(["torch.ops.quantized_decomposed"])
+            .to_edge()
+            .partition()
+            .check_not(["executorch_exir_dialects_edge__ops_aten_hardtanh_default"])
+            .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
+            .to_executorch()
+            .run_method_and_compare_outputs(inputs=test_data)
+        )
+
+    def _test_hardtanh_tosa_u55_BI_pipeline(
+        self, module: torch.nn.Module, test_data: Tuple[torch.tensor]
+    ):
+        quantizer = ArmQuantizer().set_io(get_symmetric_quantization_config())
+        (
+            ArmTester(
+                module,
+                example_inputs=test_data,
+                compile_spec=common.get_u55_compile_spec(),
+            )
+            .quantize(Quantize(quantizer, get_symmetric_quantization_config()))
+            .export()
+            .check_count({"torch.ops.aten.hardtanh.default": 1})
+            .check(["torch.ops.quantized_decomposed"])
+            .to_edge()
+            .partition()
+            .check_not(["executorch_exir_dialects_edge__ops_aten_hardtanh_default"])
+            .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
+            .to_executorch()
+        )
+
+    @parameterized.expand(test_data_suite)
+    def test_hardtanh_tosa_MI(
+        self,
+        test_name: str,
+        test_data: torch.Tensor,
+    ):
+        self._test_hardtanh_tosa_MI_pipeline(self.HardTanh(), (test_data,))
+
+    @parameterized.expand(test_data_suite)
+    def test_hardtanh_tosa_BI(self, test_name: str, test_data: torch.Tensor):
+        self._test_hardtanh_tosa_BI_pipeline(self.HardTanh(), (test_data,))
+
+    @parameterized.expand(test_data_suite)
+    def test_hardtanh_tosa_u55_BI(self, test_name: str, test_data: torch.Tensor):
+        self._test_hardtanh_tosa_u55_BI_pipeline(self.HardTanh(), (test_data,))
diff --git a/backends/arm/test/ops/test_permute.py b/backends/arm/test/ops/test_permute.py
new file mode 100644
index 00000000000..6cc76aa44ed
--- /dev/null
+++ b/backends/arm/test/ops/test_permute.py
@@ -0,0 +1,129 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# Copyright 2024 Arm Limited and/or its affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import unittest
+from typing import Tuple
+
+import torch
+
+from executorch.backends.arm.quantizer.arm_quantizer import (
+    ArmQuantizer,
+    get_symmetric_quantization_config,
+)
+
+from executorch.backends.arm.test import common
+from executorch.backends.arm.test.tester.arm_tester import ArmTester
+from executorch.backends.xnnpack.test.tester.tester import Quantize
+from parameterized import parameterized
+from torchvision.ops import Permute
+
+test_data_suite = [
+    # (test_name,test_data,dims)
+    ("zeros", torch.zeros(10, 10, 10, 10), [1, 0, 3, 2]),
+    ("ones", torch.ones(10, 10, 10, 10), [3, 1, 0, 2]),
+    ("rand", torch.rand(10, 10, 10, 10) - 0.5, [0, 2, 3, 1]),
+    ("randn_pos", torch.randn(10, 10, 10) + 10, [2, 0, 1]),
+    ("randn_neg", torch.randn(10, 10, 10) - 10, [1, 2, 0]),
+    ("ramp", torch.arange(-16, 16, 0.2), [0]),
+]
+
+
+class TestPermute(unittest.TestCase):
+    """Tests Permute Operator."""
+
+    class Permute(torch.nn.Module):
+
+        def __init__(self, dims: list[int]):
+            super().__init__()
+
+            self.permute = Permute(dims=dims)
+
+        def forward(self, x):
+            return self.permute(x)
+
+    def _test_permute_tosa_MI_pipeline(
+        self, module: torch.nn.Module, test_data: Tuple[torch.tensor]
+    ):
+        (
+            ArmTester(
+                module,
+                example_inputs=test_data,
+                compile_spec=common.get_tosa_compile_spec(),
+            )
+            .export()
+            .check(["torch.ops.aten.permute.default"])
+            .check_not(["torch.ops.quantized_decomposed"])
+            .to_edge()
+            .partition()
+            .check_not(["executorch_exir_dialects_edge__ops_aten_permute_default"])
+            .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
+            .to_executorch()
+            .run_method_and_compare_outputs(inputs=test_data)
+        )
+
+    def _test_permute_tosa_BI_pipeline(
+        self, module: torch.nn.Module, test_data: Tuple[torch.tensor]
+    ):
+        quantizer = ArmQuantizer().set_io(get_symmetric_quantization_config())
+        (
+            ArmTester(
+                module,
+                example_inputs=test_data,
+                compile_spec=common.get_tosa_compile_spec(),
+            )
+            .quantize(Quantize(quantizer, get_symmetric_quantization_config()))
+            .export()
+            .check_count({"torch.ops.aten.permute.default": 1})
+            .check(["torch.ops.quantized_decomposed"])
+            .to_edge()
+            .partition()
+            .check_not(["executorch_exir_dialects_edge__ops_aten_permute_default"])
+            .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
+            .to_executorch()
+            .run_method_and_compare_outputs(inputs=test_data)
+        )
+
+    def _test_permute_tosa_u55_BI_pipeline(
+        self, module: torch.nn.Module, test_data: Tuple[torch.tensor]
+    ):
+        quantizer = ArmQuantizer().set_io(get_symmetric_quantization_config())
+        (
+            ArmTester(
+                module,
+                example_inputs=test_data,
+                compile_spec=common.get_u55_compile_spec(),
+            )
+            .quantize(Quantize(quantizer, get_symmetric_quantization_config()))
+            .export()
+            .check_count({"torch.ops.aten.permute.default": 1})
+            .check(["torch.ops.quantized_decomposed"])
+            .to_edge()
+            .partition()
+            .check_not(["executorch_exir_dialects_edge__ops_aten_permute_default"])
+            .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
+            .to_executorch()
+        )
+
+    @parameterized.expand(test_data_suite)
+    def test_permute_tosa_MI(
+        self, test_name: str, test_data: torch.Tensor, dims: list[int]
+    ):
+        self._test_permute_tosa_MI_pipeline(self.Permute(dims=dims), (test_data,))
+
+    @parameterized.expand(test_data_suite)
+    def test_permute_tosa_BI(
+        self, test_name: str, test_data: torch.Tensor, dims: list[int]
+    ):
+        self._test_permute_tosa_BI_pipeline(self.Permute(dims=dims), (test_data,))
+
+    # Expected to fail as Permute is not supported by the NPU
+    @parameterized.expand(test_data_suite)
+    @unittest.expectedFailure
+    def test_permute_tosa_u55_BI(
+        self, test_name: str, test_data: torch.Tensor, dims: list[int]
+    ):
+        self._test_permute_tosa_u55_BI_pipeline(self.Permute(dims=dims), (test_data,))

From b86ae3304fb21bcb2ba2de247f5abc3554af149f Mon Sep 17 00:00:00 2001
From: Oscar Andersson <oscar.andersson@arm.com>
Date: Mon, 30 Sep 2024 11:27:59 +0200
Subject: [PATCH 2/2] Permute permutation vector for op_permute

Permute vector needs to be permuted when dim_order != (0, 1, 2, 3)

Change-Id: I2a35c6852376f9a57deeedd4fc38bda870e453a4
Signed-off-by: Oscar Andersson <oscar.andersson@arm.com>
Signed-off-by: Erik Lundell <erik.lundell@arm.com>
---
 backends/arm/operators/op_permute.py  | 77 ++++++++++++++++++++++++++-
 backends/arm/test/ops/test_permute.py | 55 +++++++++++++------
 2 files changed, 114 insertions(+), 18 deletions(-)

diff --git a/backends/arm/operators/op_permute.py b/backends/arm/operators/op_permute.py
index 167a0c382f4..69f6f6506c6 100644
--- a/backends/arm/operators/op_permute.py
+++ b/backends/arm/operators/op_permute.py
@@ -1,4 +1,4 @@
-# Copyright 2023 Arm Limited and/or its affiliates.
+# Copyright 2023-2024 Arm Limited and/or its affiliates.
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
@@ -18,6 +18,54 @@
 from serializer.tosa_serializer import TosaOp
 
 
+def permutation_vector_to_matrix(permutation_vector: list[int]) -> torch.Tensor:
+    """
+    Converts a permutation vector of length N to a NxN matrix that describes the same permutation.
+    for example:
+    (1,0,2)
+    ->
+    [0 1 0]
+    |1 0 0|
+    [0 0 1]
+    """
+    N = len(permutation_vector)
+    P = torch.zeros(N, N)
+    for row_index, col_index in enumerate(permutation_vector):
+        P[row_index][col_index] = 1
+    return P
+
+
+def permutation_matrix_to_vector(permutation_matrix: torch.Tensor) -> list[int]:
+    """
+    Converts a NxN permutation matrix to a permutation vector of length N that describes the same permutation.
+    [0 1 0]
+    |1 0 0|
+    [0 0 1]
+    ->
+    (1,0,2)
+    """
+    N = len(permutation_matrix)
+    assert N == len(
+        permutation_matrix[0]
+    ), f"A permutation matrix must be square, got shape {permutation_matrix.shape}"
+
+    p = [0] * N
+    for row_index, row in enumerate(permutation_matrix):
+        saw_one = False
+        for col_index, value in enumerate(row):
+            if value == 1:
+                assert (
+                    not saw_one
+                ), f"A permutation matrix can only have one 1 per row, got row {row}."
+                p[row_index] = col_index
+                saw_one = True
+            else:
+                assert (
+                    value == 0
+                ), f"A permutation matrix only contains 1's and 0's, got value {value}."
+    return p
+
+
 @register_node_visitor
 class PermuteVisitor(NodeVisitor):
     target = "aten.permute_copy.default"
@@ -40,8 +88,33 @@ def define_node(
             )
             return
 
+        # The permutation vector describes a permutation P in default Pytorch dim_order.
+        # For rank 4, the default dim_order NCHW.
+        # E.g. (2,3,0,1) -> permute (n,c,h,w) to (w,c,n,h)
+        permutation_vector = inputs[1].special
+
+        if output.dim_order != tuple(range(len(output.dim_order))):
+            # the permutation vector can't be used directly if we are not in NCHW dim_order.
+            # We need to first transform to NCHW, apply P,
+            # and then transform back to the original dim_order.
+            # This transformation, S, is also a permutation, with the dim_order as permutation vector.
+
+            # To do this, represent P and S with permutation matrices.
+            # Matrices can handle chained transformations and inversion easily.
+            S = permutation_vector_to_matrix(output.dim_order)
+            # The inverse of a permutation matrix is its transpose.
+            S_inverse = S.transpose(1, 0)
+            P = permutation_vector_to_matrix(permutation_vector)
+
+            # The complete transformation is S * P * S_inverse.
+            transformation_matrix = S.matmul(P.matmul(S_inverse))
+
+            # Luckily, since it is just a combination of permutations, the result is also a permutation
+            # that can again be described by a new permutation vector.
+            permutation_vector = permutation_matrix_to_vector(transformation_matrix)
+
         attr = ts.TosaSerializerAttribute()
-        attr.TransposeAttribute(inputs[1].special)
+        attr.TransposeAttribute(permutation_vector)
         tosa_graph.addOperator(
             TosaOp.Op().TRANSPOSE, [inputs[0].name], [output.name], attr
         )
diff --git a/backends/arm/test/ops/test_permute.py b/backends/arm/test/ops/test_permute.py
index 6cc76aa44ed..6346e847c98 100644
--- a/backends/arm/test/ops/test_permute.py
+++ b/backends/arm/test/ops/test_permute.py
@@ -18,17 +18,18 @@
 from executorch.backends.arm.test import common
 from executorch.backends.arm.test.tester.arm_tester import ArmTester
 from executorch.backends.xnnpack.test.tester.tester import Quantize
+from executorch.exir.backend.compile_spec_schema import CompileSpec
 from parameterized import parameterized
 from torchvision.ops import Permute
 
 test_data_suite = [
     # (test_name,test_data,dims)
-    ("zeros", torch.zeros(10, 10, 10, 10), [1, 0, 3, 2]),
-    ("ones", torch.ones(10, 10, 10, 10), [3, 1, 0, 2]),
-    ("rand", torch.rand(10, 10, 10, 10) - 0.5, [0, 2, 3, 1]),
-    ("randn_pos", torch.randn(10, 10, 10) + 10, [2, 0, 1]),
-    ("randn_neg", torch.randn(10, 10, 10) - 10, [1, 2, 0]),
-    ("ramp", torch.arange(-16, 16, 0.2), [0]),
+    ("rank_2", torch.rand(10, 10), [1, 0]),
+    ("rank_3", torch.rand(10, 10, 10), [2, 0, 1]),
+    ("rank_3", torch.rand(10, 10, 10), [1, 2, 0]),
+    ("rank_4", torch.rand(1, 5, 1, 10), [0, 2, 3, 1]),
+    ("rank_4", torch.rand(1, 2, 5, 10), [1, 0, 2, 3]),
+    ("rank_4", torch.rand(1, 10, 10, 5), [2, 0, 1, 3]),
 ]
 
 
@@ -46,13 +47,18 @@ def forward(self, x):
             return self.permute(x)
 
     def _test_permute_tosa_MI_pipeline(
-        self, module: torch.nn.Module, test_data: Tuple[torch.tensor]
+        self,
+        module: torch.nn.Module,
+        test_data: Tuple[torch.tensor],
+        permute_memory_to_nhwc: bool,
     ):
         (
             ArmTester(
                 module,
                 example_inputs=test_data,
-                compile_spec=common.get_tosa_compile_spec(),
+                compile_spec=common.get_tosa_compile_spec(
+                    permute_memory_to_nhwc=permute_memory_to_nhwc
+                ),
             )
             .export()
             .check(["torch.ops.aten.permute.default"])
@@ -87,15 +93,18 @@ def _test_permute_tosa_BI_pipeline(
             .run_method_and_compare_outputs(inputs=test_data)
         )
 
-    def _test_permute_tosa_u55_BI_pipeline(
-        self, module: torch.nn.Module, test_data: Tuple[torch.tensor]
+    def _test_permute_ethos_BI_pipeline(
+        self,
+        module: torch.nn.Module,
+        compile_spec: CompileSpec,
+        test_data: Tuple[torch.Tensor],
     ):
         quantizer = ArmQuantizer().set_io(get_symmetric_quantization_config())
         (
             ArmTester(
                 module,
                 example_inputs=test_data,
-                compile_spec=common.get_u55_compile_spec(),
+                compile_spec=compile_spec,
             )
             .quantize(Quantize(quantizer, get_symmetric_quantization_config()))
             .export()
@@ -106,13 +115,17 @@ def _test_permute_tosa_u55_BI_pipeline(
             .check_not(["executorch_exir_dialects_edge__ops_aten_permute_default"])
             .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
             .to_executorch()
+            .serialize()
         )
 
     @parameterized.expand(test_data_suite)
     def test_permute_tosa_MI(
         self, test_name: str, test_data: torch.Tensor, dims: list[int]
     ):
-        self._test_permute_tosa_MI_pipeline(self.Permute(dims=dims), (test_data,))
+        self._test_permute_tosa_MI_pipeline(self.Permute(dims=dims), (test_data,), True)
+        self._test_permute_tosa_MI_pipeline(
+            self.Permute(dims=dims), (test_data,), False
+        )
 
     @parameterized.expand(test_data_suite)
     def test_permute_tosa_BI(
@@ -120,10 +133,20 @@ def test_permute_tosa_BI(
     ):
         self._test_permute_tosa_BI_pipeline(self.Permute(dims=dims), (test_data,))
 
-    # Expected to fail as Permute is not supported by the NPU
-    @parameterized.expand(test_data_suite)
+    # Expected to fail as TOSA.Transpose is not supported by Ethos-U55.
+    @parameterized.expand(test_data_suite[0:1])
     @unittest.expectedFailure
-    def test_permute_tosa_u55_BI(
+    def test_permute_u55_BI(
         self, test_name: str, test_data: torch.Tensor, dims: list[int]
     ):
-        self._test_permute_tosa_u55_BI_pipeline(self.Permute(dims=dims), (test_data,))
+        self._test_permute_ethos_BI_pipeline(
+            self.Permute(dims=dims), common.get_u55_compile_spec(), (test_data,)
+        )
+
+    @parameterized.expand(test_data_suite)
+    def test_permute_u85_BI(
+        self, test_name: str, test_data: torch.Tensor, dims: list[int]
+    ):
+        self._test_permute_ethos_BI_pipeline(
+            self.Permute(dims=dims), common.get_u85_compile_spec(), (test_data,)
+        )