NXP backend: Improve view_copy delegation (#15270)

MartinPavella · web-flow · commit 488d7619448f · 2025-12-08T08:00:22.000+01:00
### Summary The conversion of `aten.view_copy` to NeutronIR may require the insertion of extra `Transpose` operations, which may not be supported. This PR makes sure the `view_copy` is only delegated if the extra operations are supported too. ### Test plan Unit tests provided. cc @robert-kalmar
diff --git a/backends/nxp/backend/edge_helper.py b/backends/nxp/backend/edge_helper.py
@@ -136,3 +136,51 @@ def get_quantization_parameters_for(node: Node) -> tuple[Scale, ZeroPoint] | Non
         return None
 
     return node.args[1], node.args[2]  # Scale and zero_point
+
+
+def get_non_qdq_users(node: Node) -> list[Node]:
+    """Return a list of nodes which consume the output of `node`, but Quantize/Dequantize nodes from QDQ clusters are
+     ignored. Meaning, the list of nodes [<user_1>, ..., <user_N>] from the illustration below is returned.
+
+    If the graph does not follow the QDQ pattern, an empty list is returned.
+
+                │
+            ┌───▼────┐
+            │ `node` │
+            └───┬────┘
+           ┌────▼─────┐
+           │ Quantize │
+           └────┬─────┘
+                ├─────── ... ──────┐
+          ┌─────▼──────┐     ┌─────▼──────┐
+          │ Dequantize │ ... │ Dequantize │
+          └─────┬──────┘     └─────┬──────┘
+           ┌────▼─────┐       ┌────▼─────┐
+           │ <user_1> │  ...  │ <user_N> │
+           └────┬─────┘       └────┬─────┘
+
+    """
+
+    quant_nodes = list(node.users)
+    if len(quant_nodes) != 1 or quant_nodes[0].target not in [
+        exir_ops.edge.quantized_decomposed.quantize_per_tensor.default,
+        exir_ops.edge.quantized_decomposed.quantize_per_channel.default,
+    ]:
+        return []
+
+    dequant_nodes = list(quant_nodes[0].users)
+    if any(
+        dequant_node.target
+        not in [
+            exir_ops.edge.quantized_decomposed.dequantize_per_tensor.default,
+            exir_ops.edge.quantized_decomposed.dequantize_per_channel.default,
+        ]
+        for dequant_node in dequant_nodes
+    ):
+        return []
+
+    res = []
+    for dequant_node in dequant_nodes:
+        res.extend(list(dequant_node.users))
+
+    return res
diff --git a/backends/nxp/backend/ir/converter/node_converter.py b/backends/nxp/backend/ir/converter/node_converter.py
@@ -125,14 +125,19 @@ def supports_partitioning_result(
         node: Node,
         partition_list: list[Partition],
         custom_delegation_options: CustomDelegationOptions,
-    ):
+        neutron_target_spec: NeutronTargetSpec,
+        parameters_mapping: dict[str, Parameter],
+    ) -> bool:
         """Check if the given `node` supports the assigned partitioning, which is stored  the `partition_list`. Child
             classes can overwrite this method in case they have delegation restrictions based on the context defined by
             the partitioning result.
 
         :param node: torch.Node to check.
         :param partition_list: List of proposed partitions.
         :param custom_delegation_options: Custom user options which affect node delegation.
+        :param neutron_target_spec: NeutronTargetSpec instance.
+        :param parameters_mapping: Dictionary mapping tensor names to their static data.
+        :return: Boolean indicating whether the node supports the current partitioning.
         """
         return True
 
diff --git a/backends/nxp/backend/ir/converter/node_converters/ops_converters/cat_converter.py b/backends/nxp/backend/ir/converter/node_converters/ops_converters/cat_converter.py
@@ -156,7 +156,9 @@ def supports_partitioning_result(
         node: Node,
         partition_list: list[Partition],
         custom_delegation_options: CustomDelegationOptions,
-    ):
+        neutron_target_spec: NeutronTargetSpec,
+        parameters_mapping: dict[str, Parameter],
+    ) -> bool:
         # There is a bug in the NeutronConverter, where if none of the input dimensions before the one referenced by
         #  `dim` are `!= 1`, the `Concat` is not delegated.
         # This only happens when the inputs to the `Concat` are model inputs, and not outputs of other
diff --git a/backends/nxp/backend/ir/converter/node_converters/ops_converters/view_copy_converter.py b/backends/nxp/backend/ir/converter/node_converters/ops_converters/view_copy_converter.py
@@ -6,12 +6,18 @@
 import numpy as np
 
 from executorch.backends.nxp.backend.edge_helper import (
+    get_non_qdq_users,
     input_tensor,
     output_tensor,
     tensor_rank,
 )
 from executorch.backends.nxp.backend.ir.converter import quantization_utils
 from executorch.backends.nxp.backend.ir.converter.conversion.common import OpsList
+from executorch.backends.nxp.backend.ir.converter.conversion.translator import (
+    apply_permutation_to,
+    create_channels_first_to_channels_last_permutation,
+    create_channels_last_to_channels_first_permutation,
+)
 from executorch.backends.nxp.backend.ir.converter.node_converter import (
     CustomDelegationOptions,
     is_not_qdq_node,
@@ -23,6 +29,12 @@
 from executorch.backends.nxp.backend.ir.tflite_generator.builtin_options import (
     reshape_options,
 )
+from executorch.backends.nxp.backend.neutron_operator_support import (
+    transposition_is_supported_on_neutron,
+)
+from executorch.backends.nxp.backend.neutron_target_spec import NeutronTargetSpec
+from executorch.backends.nxp.backend.node_format import NXP_NODE_FORMAT
+from executorch.exir.dialects._ops import ops as exir_ops
 from torch.fx import Node
 from torch.fx.passes.infra.partitioner import Partition
 from torch.nn import Parameter
@@ -53,6 +65,8 @@ def supports_partitioning_result(
         node: Node,
         partition_list: list[Partition],
         custom_delegation_options: CustomDelegationOptions,
+        neutron_target_spec: NeutronTargetSpec,
+        parameters_mapping: dict[str, Parameter],
     ):
         view_copy_partitions = [
             partition for partition in partition_list if node in partition.nodes
@@ -66,6 +80,76 @@ def supports_partitioning_result(
             # The `view_copy` cannot be the only node in a partition.
             return False
 
+        input_format = node.args[0].meta[NXP_NODE_FORMAT]
+        output_format = node.meta[NXP_NODE_FORMAT]
+        input_shape = list(node.args[0].meta["val"].shape)
+        output_shape = list(node.meta["val"].shape)
+        to_nchw_perm = create_channels_last_to_channels_first_permutation(
+            len(input_shape), True
+        )
+        to_nhwc_perm = create_channels_first_to_channels_last_permutation(
+            len(output_shape), True
+        )
+        channels_last_input_shape = apply_permutation_to(
+            input_shape,
+            create_channels_first_to_channels_last_permutation(len(input_shape), True),
+        )
+
+        if input_format.is_channels_first() and (not output_format.is_channels_first()):
+            # The `view_copy` removes node format. Conversion will require the addition of a `Transpose` operator.
+            # Make sure the `Transpose` will be supported.
+
+            if not transposition_is_supported_on_neutron(
+                channels_last_input_shape, to_nchw_perm, neutron_target_spec
+            ):
+                # The `Transpose` would have to be removed by the `PermuteFullyConnectedWeightsAfterReshape` pass.
+                # Make sure it will be applied.
+                users = get_non_qdq_users(node)
+                if len(users) != 1 or (linear_node := users[0]).target not in [
+                    exir_ops.edge.aten.addmm.default,
+                    exir_ops.edge.aten.mm.default,
+                ]:
+                    return False
+
+                if linear_node not in view_copy_partitions[0].nodes:
+                    # The `mm` / `addmm` node will not be delegated within this partition.
+                    return False
+
+                # Make sure the specific requirements of the `PermuteFullyConnectedWeightsAfterReshape` are satisfied.
+                weights_index = (
+                    2 if linear_node.target == exir_ops.edge.aten.addmm.default else 1
+                )
+                if not (
+                    input_shape[0] == output_shape[0]  # Preserve batch.
+                    and len(output_shape) == 2
+                    and output_shape[1]
+                    == linear_node.args[weights_index].meta["val"].shape[0]
+                ):
+                    return False
+
+        elif (
+            not input_format.is_channels_first()
+        ) and output_format.is_channels_first():
+            # The `view_copy` introduces node format. Conversion will require the addition of a `Transpose` operator.
+            # Make sure the `Transpose` will be supported.
+            if not transposition_is_supported_on_neutron(
+                output_shape, to_nhwc_perm, neutron_target_spec
+            ):
+                return False
+
+        elif input_format.is_channels_first() and output_format.is_channels_first():
+            # The `view_copy` works with the channels first format, so both tensors will end up being transposed.
+            # Make sure these transpositions are supported.
+            if not (
+                transposition_is_supported_on_neutron(
+                    channels_last_input_shape, to_nchw_perm, neutron_target_spec
+                )
+                and transposition_is_supported_on_neutron(
+                    output_shape, to_nhwc_perm, neutron_target_spec
+                )
+            ):
+                return False
+
         return True
 
     @staticmethod
diff --git a/backends/nxp/neutron_partitioner.py b/backends/nxp/neutron_partitioner.py
@@ -317,11 +317,12 @@ def __init__(
         )
         self.neutron_target_spec = neutron_target_spec
 
-    @staticmethod
     def validate_partitioning_result(
+        self,
         graph: Graph,
         partition_list: list[Partition],
         custom_delegation_options: CustomDelegationOptions,
+        parameters_mapping: dict[str, Parameter],
     ) -> bool:
         all_delegated_nodes = {
             node for partition in partition_list for node in partition.nodes
@@ -334,7 +335,11 @@ def validate_partitioning_result(
                 and node.target in supported_ops
             ):
                 if not supported_ops[node.target].supports_partitioning_result(
-                    node, partition_list, custom_delegation_options
+                    node,
+                    partition_list,
+                    custom_delegation_options,
+                    self.neutron_target_spec,
+                    parameters_mapping,
                 ):
                     # This node is not supported within its partition. Exclude it from delegation in the future.
                     partitioning_valid = False
@@ -379,14 +384,21 @@ def partition(self, exported_program: ExportedProgram) -> PartitionResult:
         # This format will be used by the `CapabilityBasedPartitioner` to determine which nodes will be delegated.
         NodeFormatInference(exported_program).identify_node_formats()
 
+        parameters_mapping = EdgeProgramToIRConverter.map_inputs_to_parameters(
+            exported_program
+        )
+
         iteration_limit = len(exported_program.graph.nodes)
         for _ in range(iteration_limit):
             # Run the partitioning.
             partition_list = capability_partitioner.propose_partitions()
 
             # Check if the nodes support the partitioning result. Mark the problematic nodes with `NXP_DO_NOT_DELEGATE`.
             partitioning_valid = self.validate_partitioning_result(
-                exported_program.graph, partition_list, self.custom_delegation_options
+                exported_program.graph,
+                partition_list,
+                self.custom_delegation_options,
+                parameters_mapping,
             )
             if partitioning_valid:
                 # The result of the partitioning is fine
diff --git a/backends/nxp/tests/ir/converter/node_converter/test_view_copy_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_view_copy_converter.py