NXP backend: Move format related transpositions of partition inputs/outputs to Neutron when possible.

MartinPavella · MartinPavella · commit 550cf89cf9d9 · 2025-11-11T11:13:28.000+01:00
Due to the different tensor formats used by Executorch and Neutron, the inputs/outputs often have to be transposed. This used to be done exclusively by the runtime. Now, the transpositions are done by Neutron when possible.
diff --git a/backends/nxp/backend/edge_program_converter.py b/backends/nxp/backend/edge_program_converter.py
@@ -87,13 +87,16 @@ def convert_program(
         self._convert_qdq_cluster_q_dq_nodes(edge_program.graph.nodes, cc)
         self._process_nodes(edge_program.graph.nodes, cc)
 
-        # Assign output
-        io_formats = cc.tflite_builder.assign_model_io_to_subgraph_and_get_io_formats(
-            edge_program.graph_signature
-        )
+        # Assign the model its inputs and outputs.
+        cc.tflite_builder.assign_model_io_to_subgraph(edge_program.graph_signature)
 
-        # TFLite model generation
+        # Apply optimizations and finalize the model.
         internal_tflite_model = cc.tflite_builder.finish()
+
+        # Extract the formats of the model's inputs and outputs.
+        io_formats = cc.tflite_builder.get_io_formats(edge_program.graph_signature)
+
+        # TFLite model generation
         flatbuffers_builder = flatbuffers.Builder()
         internal_tflite_model.gen_tflite(flatbuffers_builder)
 
diff --git a/backends/nxp/backend/ir/converter/builder/aten_model_builder_director.py b/backends/nxp/backend/ir/converter/builder/aten_model_builder_director.py
@@ -88,19 +88,40 @@ def append_operators(self, ops_to_add: list[tflite_model.Operator]):
 
             self.check_and_append_operator(op)
 
-    def assign_model_io_to_subgraph_and_get_io_formats(
-        self, graph_signature
-    ) -> dict[str, dict]:
-        """
-        Assign model's inputs/outputs to SubGraph.
+    def get_io_formats(self, graph_signature) -> dict[str, dict[str, TensorFormat]]:
+        """Get a mapping from tensor names to their formats.
 
-        :param graph_signature: Instance of GraphSignature.
+        :param graph_signature:  Instance of GraphSignature.
         :returns: Mapping between IO tensors' names and their formats.
         """
         io_formats = {
             "inputs": {},
             "outputs": {},
         }
+        for input_name in graph_signature.user_inputs:
+            tensor = self.tensor_for_name(input_name)
+            assert input_name == tensor.name, (
+                "Program's input name doesn't match with tensor name in TFLite. "
+                "Input was probably redirected."
+            )
+            io_formats["inputs"][tensor.name] = tensor.tensor_format
+
+        for output_name in graph_signature.user_outputs:
+            tensor = self.tensor_for_name(output_name)
+            assert output_name == tensor.name, (
+                "Program's output name doesn't match with tensor name in TFLite. "
+                "Output was probably redirected."
+            )
+            io_formats["outputs"][tensor.name] = tensor.tensor_format
+
+        return io_formats
+
+    def assign_model_io_to_subgraph(self, graph_signature):
+        """
+        Assign model's inputs/outputs to SubGraph.
+
+        :param graph_signature: Instance of GraphSignature.
+        """
 
         self.get_sub_graph().inputs = tflite_model.SubGraphInputs()
         for input_name in graph_signature.user_inputs:
@@ -110,7 +131,6 @@ def assign_model_io_to_subgraph_and_get_io_formats(
                 "Input was probably redirected."
             )
             self.get_sub_graph().inputs.tmp_inputs.append(tensor)
-            io_formats["inputs"][tensor.name] = tensor.tensor_format
 
         self.get_sub_graph().outputs = tflite_model.SubGraphOutputs()
         for output_name in graph_signature.user_outputs:
@@ -120,7 +140,3 @@ def assign_model_io_to_subgraph_and_get_io_formats(
                 "Output was probably redirected."
             )
             self.get_sub_graph().outputs.tmp_outputs.append(tensor)
-
-            io_formats["outputs"][tensor.name] = tensor.tensor_format
-
-        return io_formats
diff --git a/backends/nxp/backend/ir/converter/builder/model_builder.py b/backends/nxp/backend/ir/converter/builder/model_builder.py
@@ -48,6 +48,9 @@
     FlexTranspose,
 )
 from executorch.backends.nxp.backend.ir.tflite_optimizer import optimizer
+from executorch.backends.nxp.backend.neutron_operator_support import (
+    transposition_is_supported_on_neutron,
+)
 from executorch.backends.nxp.backend.neutron_target_spec import NeutronTargetSpec
 
 
@@ -355,6 +358,19 @@ def _make_inputs_channels_first(self):
             if input_tensor.tensor_format.is_channels_last():
                 # Create a Transpose operator and replace the graph input
 
+                new_input_shape = translator.channels_last_shape_to_channels_first(
+                    input_tensor.shape
+                )
+                perm = translator.create_channels_first_to_channels_last_permutation(
+                    input_tensor.rank
+                )
+
+                if not transposition_is_supported_on_neutron(
+                    new_input_shape.vector, list(perm), self.neutron_target_spec
+                ):
+                    new_inputs.append(input_tensor)
+                    continue
+
                 if input_tensor.rank > 6:
                     msg = (
                         f"Couldn't preserve the shape of input tensor '{input_tensor.name}', because it has "
@@ -365,14 +381,9 @@ def _make_inputs_channels_first(self):
                 new_input = self.duplicate_tensor(
                     input_tensor, input_tensor.name + "_channels_first"
                 )
-                new_input.shape = translator.channels_last_shape_to_channels_first(
-                    input_tensor.shape
-                )
+                new_input.shape = new_input_shape
                 new_input.tensor_format = input_tensor.tensor_format.to_node_format()
 
-                perm = translator.create_channels_first_to_channels_last_permutation(
-                    input_tensor.rank
-                )
                 transpose = self._create_transpose_operator(
                     new_input, input_tensor, perm
                 )
@@ -397,6 +408,16 @@ def _make_outputs_channels_first(self):
             if output_tensor.tensor_format.is_channels_last():
                 # Add a Transpose operator, to make the output channels first
 
+                shape = output_tensor.shape.vector
+                perm = translator.create_channels_last_to_channels_first_permutation(
+                    len(shape), True
+                )
+                if not transposition_is_supported_on_neutron(
+                    shape, perm, self.neutron_target_spec
+                ):
+                    new_outputs.append(output_tensor)
+                    continue
+
                 if output_tensor.rank > 6:
                     logger.e(
                         logger.Code.IO_PRESERVATION_ERROR,
diff --git a/backends/nxp/backend/neutron_operator_support.py b/backends/nxp/backend/neutron_operator_support.py
@@ -0,0 +1,79 @@
+# Copyright 2025 NXP
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from executorch.backends.nxp.backend.neutron_target_spec import NeutronTargetSpec
+
+
+def is_tensor_invariant_permutation(
+    input_shape: list[int], permutation: list[int]
+) -> bool:
+    def input_dim_is_not_one(index):
+        return input_shape[index] != 1
+
+    new_permutation = list(filter(input_dim_is_not_one, permutation))
+
+    return new_permutation == sorted(new_permutation)
+
+
+def transposition_is_supported_on_neutron(
+    input_shape: list[int],
+    permutation: list[int],
+    neutron_target_spec: NeutronTargetSpec,
+) -> bool:
+    """This function determines if the current NeutronSoftware properly supports a `Transpose` operator with given
+     `input_shape` and `permutation`.
+
+    :param input_shape: The shape of the main input tensor of the `Transpose` operator.
+    :param permutation: The permutation the `Transpose` operator is computing.
+    :param neutron_target_spec: Object holding some parameters of the target platform.
+    """
+    num_macs = neutron_target_spec.get_num_macs()
+
+    if is_tensor_invariant_permutation(input_shape, permutation):
+        # The `Transpose` will be turned into a `Reshape` by Neutron. The check includes the identity permutation.
+        return True
+
+    if permutation == [0, 3, 1, 2]:
+        # NHWC -> NCHW
+        n, h, w, c = input_shape
+
+        if h * w * c % num_macs != 0:  # Official Neutron requirement.
+            return False
+
+        if not (
+            c % num_macs == 0 and h * w % num_macs == 0
+        ):  # Neutron would produce incorrect outputs.
+            return False
+
+        if n != 1:
+            # Neutron only supports `Transpose` operators where the dimensions can be combined into 2 consecutive
+            #  groups. These 2 groups are then transposed like a matrix, and the result is reshaped. Therefore, for the
+            #  [0, 3, 1, 2] permutation, when h * w != 1 and c != 1, batch size must be 1.
+            return False
+
+        return True
+
+    elif permutation == [0, 2, 3, 1]:
+        # NCHW -> NHWC
+
+        n, c, h, w = input_shape
+
+        if w % num_macs != 0:  # Official Neutron requirement.
+            return False
+
+        if not (
+            c % num_macs == 0 and h * w % num_macs == 0
+        ):  # Neutron would produce incorrect outputs.
+            return False
+
+        if n != 1:
+            # Neutron only supports `Transpose` operators where the dimensions can be combined into 2 consecutive
+            #  groups. These 2 groups are then transposed like a matrix, and the result is reshaped. Therefore, for the
+            #  [0, 2, 3, 1] permutation, when h * w != 1 and c != 1, batch size must be 1.
+            return False
+
+        return True
+
+    return False
diff --git a/backends/nxp/tests/test_neutron_backend.py b/backends/nxp/tests/test_neutron_backend.py