NXP backend: Improve cat delegation by using inferred node formats.

MartinPavella · MartinPavella · commit 2ca11c2edb19 · 2025-11-04T09:33:31.000+01:00
diff --git a/backends/nxp/backend/ir/converter/node_converters/ops_converters/cat_converter.py b/backends/nxp/backend/ir/converter/node_converters/ops_converters/cat_converter.py
@@ -9,6 +9,9 @@
     CustomDelegationOptions,
 )
 from executorch.backends.nxp.backend.ir.converter.conversion import translator
+from executorch.backends.nxp.backend.ir.converter.conversion.translator import (
+    create_channels_first_to_channels_last_permutation,
+)
 from executorch.backends.nxp.backend.ir.converter.node_converter import (
     _is_dequant_node,
     _is_quant_node,
@@ -18,6 +21,7 @@
     Concatenation,
 )
 from executorch.backends.nxp.backend.neutron_target_spec import NeutronTargetSpec
+from executorch.backends.nxp.backend.node_format_inference import NXP_NODE_FORMAT
 from torch.fx import Node
 from torch.nn import Parameter
 
@@ -85,39 +89,48 @@ def _is_supported_on_target(
         if dim == 0:
             return False
 
-        # If all input shapes are equal, the neutron is able to pad the last dimension of inputs and outputs.
-        input_shapes = [_get_shape(input_) for input_ in node.all_input_nodes]
-        if input_shapes.count(input_shapes[0]) == len(input_shapes):
-            if dim == len(input_shapes[0]) - 1:
-                return True
+        # Neutron requires the channels to be a multiple of `num_macs`. The channels could either be the second or the
+        #  last dimension, depending on the formats of the node.
+        if node.meta[NXP_NODE_FORMAT].is_channels_first():
+            # During conversion to IR, the shape will be permuted to channels last, and the dimension on index
+            #  `1` will end up being the channels (last dim in NHWC).
+            channels_index = 1
+            to_nhwc_perm = create_channels_first_to_channels_last_permutation(
+                len(node.meta["val"].shape), True
+            )
+            dim = to_nhwc_perm.index(
+                dim
+            )  # Make sure the dim points to the NHWC dimension.
+        else:
+            # The shape will not be permuted during conversion, so the channels will remain the last dimension.
+            channels_index = -1
 
-        # Neutron requires the channels to be a multiple of numMacs. The channels could either be the second or the
-        #  last dimension, depending on the formats of the node. The format, however, cannot be determined
-        #  during conversion, as it depends on what other nodes are delegated.
         input_channels = [
-            # The second dimension is the channels in PyTorch. If the inputs/output are not channels first, it
-            #  will still be the channels in the IR.
-            _get_shape(input_)[1]
-            for input_ in node.all_input_nodes
-        ] + [
-            # If the inputs/outputs are channels first, the last dimension will be the channels.
-            _get_shape(input_)[-1]
-            for input_ in node.all_input_nodes
+            _get_shape(input_)[channels_index] for input_ in node.all_input_nodes
         ]
-        if any(
-            (input_channel % neutron_target_spec.get_num_macs()) != 0
-            for input_channel in input_channels
-        ):
+        output_channels = _get_shape(node)[channels_index]
+
+        num_macs = neutron_target_spec.get_num_macs()
+        input_shapes = [_get_shape(input_) for input_ in node.all_input_nodes]
+        if any((input_channel % num_macs) != 0 for input_channel in input_channels):
             # neutron-library/src/utils/NeutronLibraryInterrogation.cpp#1492
-            return False
 
-        output_channels = [_get_shape(node)[1], _get_shape(node)[-1]]
-        # neutron-library/src/utils/NeutronLibraryInterrogation.cpp#1493
-        if any(
-            (out_c % neutron_target_spec.get_num_macs()) != 0
-            for out_c in output_channels
-        ):
-            return False
+            # If all input shapes are equal, the neutron is able to pad the last dimension of the inputs.
+            if not (
+                input_shapes.count(input_shapes[0]) == len(input_shapes)
+                and dim == len(input_shapes[0]) - 1
+            ):
+                return False
+
+        if (output_channels % num_macs) != 0:
+            # neutron-library/src/utils/NeutronLibraryInterrogation.cpp#1493
+
+            # If all input shapes are equal, the neutron is able to pad the last dimension of the output.
+            if not (
+                input_shapes.count(input_shapes[0]) == len(input_shapes)
+                and dim == len(input_shapes[0]) - 1
+            ):
+                return False
 
         if len(node.all_input_nodes) < 2:  # Not supported on Neutron
             # TODO Try to skip the operator if this case is realistic.
diff --git a/backends/nxp/tests/ir/converter/node_converter/test_cat_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_cat_converter.py
@@ -319,6 +319,27 @@ def test_cat__same_shapes_converter_padding_last_dimension():
     assert any("lowered_module" in node.name for node in quantized_program.graph.nodes)
 
 
+def test_cat__same_shapes__channels_first__padding_channels():
+    target = "imxrt700"
+
+    # The Converter is capable of padding the last dimension of `cat` with the same input shapes.
+    input_shape = (1, 2, 3, 4)
+
+    quantized_program = to_quantized_edge_program(
+        CatConvModule(1),
+        [input_shape, input_shape],
+        target=target,
+        neutron_converter_flavor="SDK_25_09",
+        custom_delegation_options=CustomDelegationOptions(),
+    ).exported_program()
+
+    # Make sure the `Cat` was delegated.
+    assert not graph_contains_any_of_ops(
+        graph=quantized_program.graph, ops=[exir_ops.edge.aten.cat.default]
+    )
+    assert any("lowered_module" in node.name for node in quantized_program.graph.nodes)
+
+
 def test_cat__same_shapes_converter_padding_middle_dimension():
     target = "imxrt700"
 
@@ -339,3 +360,78 @@ def test_cat__same_shapes_converter_padding_middle_dimension():
     assert not any(
         "lowered_module" in node.name for node in quantized_program.graph.nodes
     )
+
+
+def test_cat__format_specific_support__formatless(mocker):
+    # The last dim will end up being the channels, as the format is `formatless`.
+    # Only the last dim satisfies the Neutron requirements for the channels.
+    input_shape = (3, 3, 3, 8)
+    num_inputs = 2
+    dim = 2
+
+    input_shapes = [input_shape] * num_inputs
+
+    converter_spy = mocker.spy(EdgeProgramToIRConverter, "convert_program")
+
+    quantized_program = to_quantized_edge_program(
+        CatModule(dim), input_shapes
+    ).exported_program()
+
+    # Make sure the `Cat` was delegated.
+    assert not graph_contains_any_of_ops(
+        graph=quantized_program.graph, ops=[exir_ops.edge.aten.cat.default]
+    )
+    assert any("lowered_module" in node.name for node in quantized_program.graph.nodes)
+
+    tflite_flatbuffers_model, io_formats = converter_spy.spy_return
+    exported_program: ExportedProgram = converter_spy.call_args.args[1]
+    input_data = {
+        i: (np.random.random(shape) * 50).astype(np.int8)
+        for i, shape in enumerate(input_shapes)
+    }
+    convert_run_compare(
+        exported_program,
+        tfl_model=tflite_flatbuffers_model,
+        input_data=input_data,
+        atol=1,
+    )
+
+
+def test_cat__format_specific_support__channels_first(mocker):
+    # The second dim will end up being the channels, as the format is `formatless`.
+    # Only the second dim satisfies the Neutron requirements for the channels.
+    input_shape = (3, 8, 3, 3)
+    num_inputs = 2
+    dim = 2
+
+    input_shapes = [input_shape] * num_inputs
+
+    converter_spy = mocker.spy(EdgeProgramToIRConverter, "convert_program")
+
+    channels = (
+        sum(shape[1] for shape in input_shapes) if dim in [1, -3] else input_shape[1]
+    )
+    quantized_program = to_quantized_edge_program(
+        CatConvModule(dim, channels), input_shapes
+    ).exported_program()
+
+    # Make sure the `Cat` was delegated.
+    assert not graph_contains_any_of_ops(
+        graph=quantized_program.graph, ops=[exir_ops.edge.aten.cat.default]
+    )
+    assert any("lowered_module" in node.name for node in quantized_program.graph.nodes)
+
+    tflite_flatbuffers_model, io_formats = converter_spy.spy_return
+    exported_program: ExportedProgram = converter_spy.call_args.args[1]
+    input_data = {
+        i: (np.random.random(shape) * 50).astype(np.int8)
+        for i, shape in enumerate(input_shapes)
+    }
+    convert_run_compare(
+        exported_program,
+        tfl_model=tflite_flatbuffers_model,
+        input_data=input_data,
+        tflite_input_preprocess=ToNHWCPreprocess(),
+        tflite_output_preprocess=ToNCHWPreprocess(),
+        atol=1,
+    )