NXP backend: Improve cat delegation by using inferred node formats.

MartinPavella · MartinPavella · commit 58f191b5b6af · 2025-10-14T10:26:16.000+02:00
diff --git a/backends/nxp/backend/ir/converter/node_converters/ops_converters/cat_converter.py b/backends/nxp/backend/ir/converter/node_converters/ops_converters/cat_converter.py
@@ -18,6 +18,7 @@
     Concatenation,
 )
 from executorch.backends.nxp.backend.neutron_target_spec import NeutronTargetSpec
+from executorch.backends.nxp.backend.node_format_inference import NXP_NODE_FORMAT
 from torch.fx import Node
 from torch.nn import Parameter
 
@@ -85,32 +86,29 @@ def _is_supported_on_target(
         if dim == 0:
             return False
 
-        # Neutron requires the channels to be a multiple of numMacs. The channels could either be the second or the
-        #  last dimension, depending on the formats of the node. The format, however, cannot be determined
-        #  during conversion, as it depends on what other nodes are delegated.
+        # Neutron requires the channels to be a multiple of `num_macs`. The channels could either be the second or the
+        #  last dimension, depending on the formats of the node.
+        if node.meta[NXP_NODE_FORMAT].is_channels_first():
+            # During conversion to IR, the shape will be permuted to channels last, and the dimension on index
+            #  `1` will end up being the channels (last dim in NHWC).
+            channels_index = 1
+        else:
+            # The shape will not be permuted during conversion, so the channels will remain the last dimension.
+            channels_index = -1
+
         input_channels = [
-            # The second dimension is the channels in PyTorch. If the inputs/output are not channels first, it
-            #  will still be the channels in the IR.
-            _get_shape(input_)[1]
-            for input_ in node.all_input_nodes
-        ] + [
-            # If the inputs/outputs are channels first, the last dimension will be the channels.
-            _get_shape(input_)[-1]
+            _get_shape(input_)[channels_index]
             for input_ in node.all_input_nodes
         ]
-        if any(
-            (input_channel % neutron_target_spec.get_num_macs()) != 0
-            for input_channel in input_channels
-        ):
+        output_channels = _get_shape(node)[channels_index]
+
+        num_macs = neutron_target_spec.get_num_macs()
+        if any((input_channel % num_macs) != 0 for input_channel in input_channels):
             # neutron-library/src/utils/NeutronLibraryInterrogation.cpp#1492
             return False
 
-        output_channels = [_get_shape(node)[1], _get_shape(node)[-1]]
-        # neutron-library/src/utils/NeutronLibraryInterrogation.cpp#1493
-        if any(
-            (out_c % neutron_target_spec.get_num_macs()) != 0
-            for out_c in output_channels
-        ):
+        if (output_channels % num_macs) != 0:
+            # neutron-library/src/utils/NeutronLibraryInterrogation.cpp#1493
             return False
 
         if len(node.all_input_nodes) < 2:  # Not supported on Neutron
diff --git a/backends/nxp/tests/ir/converter/node_converter/test_cat_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_cat_converter.py
@@ -296,3 +296,78 @@ def test_cat__force_delegate():
         graph=quantized_program.graph, ops=[exir_ops.edge.aten.cat.default]
     )
     assert any("lowered_module" in node.name for node in quantized_program.graph.nodes)
+
+
+def test_cat__format_specific_support__formatless(mocker):
+    # The last dim will end up being the channels, as the format is `formatless`.
+    # Only the last dim satisfies the Neutron requirements for the channels.
+    input_shape = (3, 3, 3, 8)
+    num_inputs = 2
+    dim = 2
+
+    input_shapes = [input_shape] * num_inputs
+
+    converter_spy = mocker.spy(EdgeProgramToIRConverter, "convert_program")
+
+    quantized_program = to_quantized_edge_program(
+        CatModule(dim), input_shapes
+    ).exported_program()
+
+    # Make sure the `Cat` was delegated.
+    assert not graph_contains_any_of_ops(
+        graph=quantized_program.graph, ops=[exir_ops.edge.aten.cat.default]
+    )
+    assert any("lowered_module" in node.name for node in quantized_program.graph.nodes)
+
+    tflite_flatbuffers_model, io_formats = converter_spy.spy_return
+    exported_program: ExportedProgram = converter_spy.call_args.args[1]
+    input_data = {
+        i: (np.random.random(shape) * 50).astype(np.int8)
+        for i, shape in enumerate(input_shapes)
+    }
+    convert_run_compare(
+        exported_program,
+        tfl_model=tflite_flatbuffers_model,
+        input_data=input_data,
+        atol=1,
+    )
+
+
+def test_cat__format_specific_support__channels_first(mocker):
+    # The second dim will end up being the channels, as the format is `formatless`.
+    # Only the second dim satisfies the Neutron requirements for the channels.
+    input_shape = (3, 8, 3, 3)
+    num_inputs = 2
+    dim = 2
+
+    input_shapes = [input_shape] * num_inputs
+
+    converter_spy = mocker.spy(EdgeProgramToIRConverter, "convert_program")
+
+    channels = (
+        sum(shape[1] for shape in input_shapes) if dim in [1, -3] else input_shape[1]
+    )
+    quantized_program = to_quantized_edge_program(
+        CatConvModule(dim, channels), input_shapes
+    ).exported_program()
+
+    # Make sure the `Cat` was delegated.
+    assert not graph_contains_any_of_ops(
+        graph=quantized_program.graph, ops=[exir_ops.edge.aten.cat.default]
+    )
+    assert any("lowered_module" in node.name for node in quantized_program.graph.nodes)
+
+    tflite_flatbuffers_model, io_formats = converter_spy.spy_return
+    exported_program: ExportedProgram = converter_spy.call_args.args[1]
+    input_data = {
+        i: (np.random.random(shape) * 50).astype(np.int8)
+        for i, shape in enumerate(input_shapes)
+    }
+    convert_run_compare(
+        exported_program,
+        tfl_model=tflite_flatbuffers_model,
+        input_data=input_data,
+        tflite_input_preprocess=ToNHWCPreprocess(),
+        tflite_output_preprocess=ToNCHWPreprocess(),
+        atol=1,
+    )