NXP backend: Add support for aten_permute_copy_default operator

roman-janik-nxp · MartinPavella · commit 72376bf1517e · 2025-10-14T10:03:10.000+02:00
diff --git a/backends/nxp/backend/ir/converter/node_converters/ops_converters/permute_copy_converter.py b/backends/nxp/backend/ir/converter/node_converters/ops_converters/permute_copy_converter.py
@@ -4,28 +4,55 @@
 # LICENSE file in the root directory of this source tree.
 
 import numpy as np
+import torch
 
 from executorch.backends.nxp.backend.ir.converter import quantization_utils
 from executorch.backends.nxp.backend.ir.converter.conversion.common import OpsList
 from executorch.backends.nxp.backend.ir.converter.node_converter import (
     CustomDelegationOptions,
+    NeutronTargetSpec,
     NodeConverter,
 )
 from executorch.backends.nxp.backend.ir.tflite_generator.builtin_options import (
     transpose_options,
 )
+from executorch.backends.nxp.backend.neutron_operator_support import (
+    transposition_is_supported_on_neutron,
+)
 from torch.fx import Node
 from torch.nn import Parameter
 
 
+def _get_shape(node: torch.fx.Node) -> list[int]:
+    return list(node.meta["val"].shape)
+
+
 class PermuteCopyConverter(NodeConverter):
+    @staticmethod
+    def _is_supported_on_target(
+        node: Node,
+        neutron_target_spec: NeutronTargetSpec,
+        parameters_mapping: dict[str, Parameter],
+        custom_delegation_options: CustomDelegationOptions,
+    ) -> bool:
+        input_shape = _get_shape(node.args[0])
+        permutation = list(node.args[1])
+
+        # TODO Handle tensor formats properly.
+
+        return transposition_is_supported_on_neutron(
+            input_shape, permutation, neutron_target_spec
+        )
 
     @staticmethod
     def _is_supported_in_IR(
         node: Node,
         parameters_mapping: dict[str, Parameter],
         custom_delegation_options: CustomDelegationOptions,
     ) -> bool:
+        if not NodeConverter._has_shared_q_params_if_quantized(node):
+            return False
+
         return True
 
     def convert(self, node: Node):
diff --git a/backends/nxp/neutron_partitioner.py b/backends/nxp/neutron_partitioner.py
@@ -210,12 +210,13 @@ def tag_qdq_clusters(self, nodes: list[torch.fx.Node]):
     exir_ops.edge.aten.max_pool2d_with_indices.default: MaxPool2dConverter,  # noqa F405
     exir_ops.edge.aten.mean.dim: MeanDimConverter,  # noqa F405
     exir_ops.edge.aten.mm.default: MMConverter,  # noqa F405
+    exir_ops.edge.aten.permute_copy.default: PermuteCopyConverter,  # noqa F405
     exir_ops.edge.aten.relu.default: ReLUConverter,  # noqa F405
     exir_ops.edge.aten._softmax.default: SoftmaxConverter,  # noqa F405
+    exir_ops.edge.aten.sigmoid.default: SigmoidConverter,  # noqa F405
     exir_ops.edge.aten.sub.Tensor: SubTensorConverter,  # noqa F405
     exir_ops.edge.aten.tanh.default: TanhConverter,  # noqa F405
     exir_ops.edge.aten.view_copy.default: ViewCopyConverter,  # noqa F405
-    exir_ops.edge.aten.sigmoid.default: SigmoidConverter,  # noqa F405
 }
 
 
diff --git a/backends/nxp/quantizer/neutron_quantizer.py b/backends/nxp/quantizer/neutron_quantizer.py
@@ -39,6 +39,7 @@
     SubTensorPattern,
     TanhInPlacePattern,
     TanhPattern,
+    TransposeIntPattern,
     ViewPattern,
 )
 from executorch.backends.nxp.quantizer.utils import (
@@ -212,6 +213,7 @@ def __init__(self):
                 NeutronAtenQuantizer(SubTensorPattern(), static_qconfig),
                 NeutronAtenQuantizer(TanhPattern(), static_qconfig),
                 NeutronAtenQuantizer(TanhInPlacePattern(), static_qconfig),
+                NeutronAtenQuantizer(TransposeIntPattern(), static_qconfig),
                 NeutronAtenQuantizer(ViewPattern(), static_qconfig),
             ]
         )
diff --git a/backends/nxp/quantizer/patterns.py b/backends/nxp/quantizer/patterns.py
@@ -513,6 +513,15 @@ def partition_types(self):
         return [torch.ops.aten.permute.default]
 
 
+class TransposeIntPattern(SharedSpecPattern):
+    """
+    Quantizer for Transpose Int operator.
+    """
+
+    def partition_types(self) -> list[OpOverload]:
+        return [torch.ops.aten.transpose.int]
+
+
 class ReluPattern(SharedSpecPattern):
     """
     Quantizer for Relu operator. Shared quantization spec is selected, as ReLU usually follows computation layer.
diff --git a/backends/nxp/tests/ir/converter/node_converter/test_permute_copy_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_permute_copy_converter.py
@@ -3,8 +3,10 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
+import unittest
+
+import kgb
 import numpy as np
-import pytest
 import torch
 
 from executorch.backends.nxp.backend.edge_program_converter import (
@@ -13,52 +15,187 @@
 from executorch.backends.nxp.tests.executorch_pipeline import to_quantized_edge_program
 from executorch.backends.nxp.tests.executors import (
     convert_run_compare,
-    ToNCHWPreprocess,
-    ToNHWCPreprocess,
+    graph_contains_any_of_ops,
+    ToChannelFirstPreprocess,
+    ToChannelLastPreprocess,
 )
 from executorch.backends.nxp.tests.models import Conv2dModule
+from executorch.exir.dialects._ops import ops as exir_ops
+from parameterized import parameterized
 from torch.export import ExportedProgram
 
 
-@pytest.fixture(autouse=True)
-def reseed_model_per_test_run():
-    torch.manual_seed(23)
-    np.random.seed(23)
+class Conv2dTransposeModule(torch.nn.Module):
+    def __init__(self, in_channels: int, dim0: int, dim1: int):
+        super().__init__()
+        self.dim0 = dim0
+        self.dim1 = dim1
+        self.conv = Conv2dModule(
+            in_channels=in_channels, out_channels=in_channels, kernel_size=(1, 1)
+        )
 
+    def forward(self, x):
+        x = self.conv(x)
+        return torch.transpose(x, self.dim0, self.dim1)
 
-class Conv2dPermuteCopyModule(torch.nn.Module):
-    def __init__(self, new_dims: tuple[int, ...]):
+
+class Conv2dPermuteModule(torch.nn.Module):
+    def __init__(self, in_channels: int, new_dims: tuple[int, ...]):
         super().__init__()
         self.new_dims = new_dims
-        self.conv = Conv2dModule()
+        self.conv = Conv2dModule(
+            in_channels=in_channels,
+            out_channels=in_channels,
+            stride=1,
+            kernel_size=3,
+            padding=1,
+        )
 
     def forward(self, x):
         x = self.conv(x)
         return torch.permute(x, self.new_dims)
 
 
-def test_permute_copy_quant_conversion__with_bias(mocker):
-    input_shape = (1, 4, 8, 8)
-    new_dims = (0, 2, 3, 1)
+class LinearPermuteModule(torch.nn.Module):
+    def __init__(self, in_features: int, new_dims: tuple[int, ...]):
+        super().__init__()
+        self.new_dims = new_dims
+        self.fc = torch.nn.Linear(in_features, in_features)
+
+    def forward(self, x):
+        x = self.fc(x)
+        return torch.permute(x, self.new_dims)
+
+
+class TestPermuteCopyConversion(kgb.SpyAgency, unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        torch.manual_seed(23)
+        np.random.seed(42)
+
+    @parameterized.expand(
+        [
+            ["To channel first permutation", (1, 16, 8, 8), (0, 3, 1, 2)],
+            ["To channel last permutation", (1, 16, 8, 8), (0, 2, 3, 1)],
+        ]
+    )
+    def test_permute_copy_conversion__from_permute_4D__quantized(
+        self, _: str, input_shape, new_dims
+    ):
+        with kgb.spy_on(
+            EdgeProgramToIRConverter.convert_program, call_original=True
+        ) as converter_spy:
+            model = Conv2dPermuteModule(input_shape[1], new_dims)
+
+            # Run conversion
+            edge_program = to_quantized_edge_program(
+                model, input_shape
+            ).exported_program()
 
-    converter_spy = mocker.spy(EdgeProgramToIRConverter, "convert_program")
+            # Make sure the `Permute_copy` was delegated.
+            assert not graph_contains_any_of_ops(
+                graph=edge_program.graph, ops=[exir_ops.edge.aten.permute_copy.default]
+            )
+            assert any(
+                "lowered_module" in node.name for node in edge_program.graph.nodes
+            )
 
-    # Run conversion
-    _ = to_quantized_edge_program(Conv2dPermuteCopyModule(new_dims), input_shape)
+            # Capture generated model
+            tflite_flatbuffers_model, io_formats = converter_spy.calls[-1].return_value
 
-    # Capture generated model
-    tflite_flatbuffers_model, io_formats = converter_spy.spy_return
+            # Capture converted program
+            exported_program: ExportedProgram = converter_spy.calls[-1].args[0]
 
-    # Capture converted program
-    edge_program: ExportedProgram = converter_spy.call_args.args[1]
+            input_data = (np.random.random(input_shape).astype(np.float32) * 50).astype(
+                np.int8
+            )
 
-    input_data = (np.random.random(input_shape).astype(np.float32) * 50).astype(np.int8)
+            convert_run_compare(
+                exported_program,
+                input_data,
+                tfl_model=tflite_flatbuffers_model,
+                atol=1.0,
+                tflite_input_preprocess=ToChannelLastPreprocess(),
+                tflite_output_preprocess=ToChannelFirstPreprocess(),
+            )
 
-    convert_run_compare(
-        edge_program,
-        input_data,
-        tfl_model=tflite_flatbuffers_model,
-        atol=1.0,
-        tflite_input_preprocess=ToNHWCPreprocess(),
-        tflite_output_preprocess=ToNCHWPreprocess(),
+    @parameterized.expand(
+        [
+            ["Permutation can be replaced by reshapes", (10, 1, 8), (0, 2, 1)],
+            ["Permutation can be replaced by reshapes", (10, 1, 1), (2, 1, 0)],
+            ["Permutation is identical and can be removed", (10, 1, 8), (0, 1, 2)],
+        ]
     )
+    def test_permute_copy_conversion__from_permute_3D__quantized(
+        self, _: str, input_shape, new_dims
+    ):
+        with kgb.spy_on(
+            EdgeProgramToIRConverter.convert_program, call_original=True
+        ) as converter_spy:
+            # Run conversion
+            edge_program = to_quantized_edge_program(
+                LinearPermuteModule(input_shape[2], new_dims), input_shape
+            ).exported_program()
+
+            # Make sure the `Permute_copy` was delegated.
+            assert not graph_contains_any_of_ops(
+                graph=edge_program.graph, ops=[exir_ops.edge.aten.permute_copy.default]
+            )
+            assert any(
+                "lowered_module" in node.name for node in edge_program.graph.nodes
+            )
+
+            # Capture generated model
+            tflite_flatbuffers_model, io_formats = converter_spy.calls[-1].return_value
+
+            # Capture converted program
+            exported_program: ExportedProgram = converter_spy.calls[-1].args[0]
+
+            input_data = (np.random.random(input_shape).astype(np.float32) * 50).astype(
+                np.int8
+            )
+
+            convert_run_compare(
+                exported_program,
+                input_data,
+                tfl_model=tflite_flatbuffers_model,
+                atol=1.0,
+            )
+
+    @parameterized.expand(
+        [
+            ["Transpose dims 1 and 2", (1, 16, 8, 8), (0, 2, 1, 3)],
+            ["To (2, 0, 1, 3) permutation", (1, 16, 8, 8), (2, 0, 1, 3)],
+            ["To  (3, 1, 2, 0) permutation", (1, 16, 8, 8), (3, 1, 2, 0)],
+            ["To  (3, 1, 0, 2) permutation", (1, 16, 8, 8), (3, 1, 0, 2)],
+        ]
+    )
+    def test_permute_copy_non_delegated_conversion__from_permute_4D__quantized(
+        self, _: str, input_shape, new_dims
+    ):
+        model = Conv2dPermuteModule(input_shape[1], new_dims)
+        edge_program = to_quantized_edge_program(model, input_shape).exported_program()
+
+        nodes = list(edge_program.graph.nodes)
+        assert len(nodes) == 10
+        assert (
+            nodes[6].target == exir_ops.edge.aten.permute_copy.default
+        )  # PermuteCopy not delegated.
+
+    @parameterized.expand(
+        [
+            ["Transpose dims 1 and 2", (1, 16, 8, 8), 1, 2],
+            ["Transpose dims 2 and 3", (1, 16, 8, 8), 2, 3],
+        ]
+    )
+    def test_permute_copy_non_delegated_conversion__from_transpose_4D__quantized(
+        self, _: str, input_shape, dim0, dim1
+    ):
+        model = Conv2dTransposeModule(input_shape[1], dim0, dim1)
+        edge_program = to_quantized_edge_program(model, input_shape).exported_program()
+
+        nodes = list(edge_program.graph.nodes)
+        assert len(nodes) == 10
+        assert (
+            nodes[6].target == exir_ops.edge.aten.permute_copy.default
+        )  # PermuteCopy not delegated.
diff --git a/backends/nxp/tests/test_neutron_backend.py b/backends/nxp/tests/test_neutron_backend.py

Original file line number	Diff line number	Diff line change
`@@ -39,6 +39,7 @@`
`39`	`39`	`SubTensorPattern,`
`40`	`40`	`TanhInPlacePattern,`
`41`	`41`	`TanhPattern,`
	`42`	`+ TransposeIntPattern,`
`42`	`43`	`ViewPattern,`
`43`	`44`	`)`
`44`	`45`	`from executorch.backends.nxp.quantizer.utils import (`
`@@ -212,6 +213,7 @@ def __init__(self):`
`212`	`213`	`NeutronAtenQuantizer(SubTensorPattern(), static_qconfig),`
`213`	`214`	`NeutronAtenQuantizer(TanhPattern(), static_qconfig),`
`214`	`215`	`NeutronAtenQuantizer(TanhInPlacePattern(), static_qconfig),`
	`216`	`+ NeutronAtenQuantizer(TransposeIntPattern(), static_qconfig),`
`215`	`217`	`NeutronAtenQuantizer(ViewPattern(), static_qconfig),`
`216`	`218`	`]`
`217`	`219`	`)`