Add support for conversion and quantization of Mean Dim operator

roman-janik-nxp · roman-janik-nxp · commit e1a0db806228 · 2025-03-10T18:00:48.000+01:00
diff --git a/backends/nxp/backend/edge_program_converter.py b/backends/nxp/backend/edge_program_converter.py
@@ -31,6 +31,7 @@
     exir_ops.edge.aten._softmax.default: SoftmaxConverter,
     exir_ops.edge.aten.view_copy.default: ViewCopyConverter,
     exir_ops.edge.aten.add.Tensor: AddTensorConverter,
+    exir_ops.edge.aten.mean.dim: MeanDimConverter,
 }
 
 
diff --git a/backends/nxp/backend/ir/converter/node_converters/ops_converters/__init__.py b/backends/nxp/backend/ir/converter/node_converters/ops_converters/__init__.py
@@ -22,8 +22,10 @@
     AddTensorConverter
 from executorch.backends.nxp.backend.ir.converter.node_converters.ops_converters.relu_converter import \
     ReLUConverter
+from executorch.backends.nxp.backend.ir.converter.node_converters.ops_converters.mean_dim_converter import \
+    MeanDimConverter
 __all__ = [
     "AddMMConverter", "ConvolutionConverter", "MMConverter", "PermuteCopyConverter", "SoftmaxConverter",
     "ViewCopyConverter", "QDQDequantizeConverter", "QDQQuantizeConverter", "ConstantPadNDConverter", "ReLUConverter",
-    "MaxPool2dConverter", "AvgPool2dConverter", "AddTensorConverter"
+    "MaxPool2dConverter", "AvgPool2dConverter", "AddTensorConverter", "MeanDimConverter"
 ]
diff --git a/backends/nxp/backend/ir/converter/node_converters/ops_converters/mean_dim_converter.py b/backends/nxp/backend/ir/converter/node_converters/ops_converters/mean_dim_converter.py
@@ -0,0 +1,70 @@
+# Copyright (c) 2025 NXP
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+from torch.fx import Node
+from torch.nn import Parameter
+
+from executorch.backends.nxp.backend.ir.converter.conversion.translator import \
+    create_channels_last_to_channels_first_permutation
+from executorch.backends.nxp.backend.ir.converter.node_converter import NodeConverter, Target
+from executorch.backends.nxp.backend.ir.converter.node_converters.shared.reduce_utils import \
+    convert_axes_from_attribute
+from executorch.backends.nxp.backend.ir.tflite_generator.builtin_options import mean_options
+
+
+class MeanDimConverter(NodeConverter):
+    supported_targets = [Target.RT700]
+
+    @staticmethod
+    def _is_supported_in_IR(node: Node, parameters_mapping: dict[str, Parameter]) -> bool:
+        dim = node.args[1]
+        keepdim = node.args[2] if len(node.args) >= 3 else False
+        rank = len(node.args[0].meta["val"].shape)
+        to_neg_dim = lambda d: d - rank if d > 0 else d
+        dim = [to_neg_dim(d) for d in dim]
+
+        # Only last 2 dimensions (H, W) and keepdim=True with rank=4 are supported on Neutron.
+        if rank != 4 or dim not in [[-1, -2], [-2, -1]] or not keepdim:
+            return False
+
+        if hasattr(node.kwargs, "dtype") and node.kwargs["dtype"] not in [torch.float32, torch.uint32, torch.uint8]:
+            return False
+
+        if not NodeConverter._has_shared_q_params_if_quantized(node):
+            return False
+
+        return True
+
+    @staticmethod
+    def _normalize_and_to_channel_last_dim(dim: list[int], rank: int) -> list[int]:
+        # convert negative index to positive
+        to_pos_dim = lambda d: d + rank if d < 0 else d
+        dim = [to_pos_dim(d) for d in dim]
+
+        perm = create_channels_last_to_channels_first_permutation(rank, True)
+        dim = [perm[d] for d in dim]
+
+        return dim
+
+    # Mean Dim Node format: (Tensor self, int[1]? dim, bool keepdim=False, *, ScalarType? dtype=None)
+    def convert(self, node: Node):
+        """ Convert 'mean.dim' operator to TFLite 'Mean'.
+        """
+        self.assert_convertible(node)
+
+        dim = node.args[1]
+        keepdim = node.args[2] if len(node.args) >= 3 else False
+
+        t_op = self._create_tflite_op_with_io_tensors(node)
+        t_op.builtin_options = mean_options.Mean(keepdim)
+        x = t_op.tmp_inputs[0]
+
+        if x.tensor_format.is_channels_last():
+            dim = self._normalize_and_to_channel_last_dim(dim, x.rank)
+
+        convert_axes_from_attribute(t_op, self.builder, dim)
+        self.builder.append_operators([t_op])
diff --git a/backends/nxp/backend/ir/converter/node_converters/shared/reduce_utils.py b/backends/nxp/backend/ir/converter/node_converters/shared/reduce_utils.py
@@ -1,21 +1,17 @@
 #
-# Copyright 2024 NXP
+# Copyright 2024-2025 NXP
 #
 # License: LA_OPT_NXP_Software_License
 # See the LICENSE_LA_OPT_NXP_Software_License for more details.
 #
 
 import numpy as np
 
-from executorch.backends.nxp.backend.ir.lib.tflite.TensorType import TensorType
-from executorch.backends.nxp.backend.ir import logger
 from executorch.backends.nxp.backend.ir.converter.builder.model_builder import ModelBuilder
 from executorch.backends.nxp.backend.ir.converter.conversion import translator
-from executorch.backends.nxp.backend.ir.converter.conversion.common import OpsList, try_get_input
-from executorch.backends.nxp.backend.ir.converter.tensor_utils import tensor_has_data
+from executorch.backends.nxp.backend.ir.converter.conversion.common import OpsList
 from executorch.backends.nxp.backend.ir.tensor_formatting import TensorFormat
 from executorch.backends.nxp.backend.ir.tflite_generator import tflite_model
-from executorch.backends.nxp.backend.ir.tflite_generator.meta.types import name_for_type
 
 
 def convert_axes_from_attribute(t_op: tflite_model.Operator, builder: ModelBuilder, axes: list[int] | None):
@@ -38,88 +34,6 @@ def convert_axes_from_attribute(t_op: tflite_model.Operator, builder: ModelBuild
     t_op.tmp_inputs.append(axes_tensor)
 
 
-def convert_axes_from_input_tensor(t_op: tflite_model.Operator, builder: ModelBuilder, inspector: ONNXModelInspector,
-                                   ops: OpsList, noop_with_empty_axes: int, op_type: str):
-    """ Verify the `axes` tensor (on input index 1) of the `t_op`, which is expected to represent an ONNX reduction
-         operator.
-    """
-    x = t_op.tmp_inputs[0]
-    rank = x.rank
-
-    if axes_tensor := try_get_input(t_op, 1):
-
-        # ONNX uses int64, while TFLite requires int32 for the `axes` tensor.
-        if axes_tensor.type != TensorType.INT64:
-            logger.e(logger.Code.INVALID_ONNX_OPERATOR,
-                     f'ONNX `{op_type}` has `axes` of type `{name_for_type(axes_tensor.type)}`, instead of INT64.')
-
-        # Try to get the inferred data for the `axes` input.
-        if (axes_data := inspector.try_get_inferred_tensor_data(axes_tensor.name)) is not None:
-            # The `axes` were inferred during shape inference.
-            logger.d(f'Using inferred data for the `axes` input tensor of ONNX `{op_type}`.')
-
-            # Create a new tensor, in case the original `axes` tensor is used by multiple ops.
-            axes_tensor = builder.create_tensor_for_data(axes_data.astype(np.int32), 'axes')
-
-        # Make sure the `axes` are int32.
-        if tensor_has_data(axes_tensor):
-            # Cast the `axes` to int32 statically.
-            axes_tensor.tmp_buffer.data = axes_tensor.tmp_buffer.data.astype(np.int32)
-            axes_tensor.type = TensorType.INT32
-
-        else:
-            # The `axes` are dynamic and there is no inferred data for them. The shape inference is not possible in
-            #  this case, so it must have been skipped. If the `axes` are empty at runtime, ONNX will reduce over
-            #  all dimensions, whereas TFLite will not reduce at all. So the behavior is different, and it depends
-            #  on runtime data. Conversion could be implemented by adding multiple extra operators.
-            # I don't thing that completely prohibiting the conversion here is ideal, since the issue arises only in
-            #  an edge case, which is hopefully not very common. Just print a warning message for now.
-            logger.w(f'Conversion of ONNX `{op_type}` with a dynamic `axes` input will not be correct, if the `axes`'
-                     'are empty at runtime!')
-
-            # Insert a `Cast` op, to make the `axes` int32.
-            cast_op = builder.create_cast_before(t_op, 1, TensorType.INT32)
-            ops.add_pre(cast_op)
-
-            # For future references. Following code only cares about the final axes tensor.
-            axes_tensor = cast_op.tmp_outputs[0]
-
-        # Assign the new `axes_tensor` to the ReduceX operator.
-        t_op.tmp_inputs[1] = axes_tensor
-
-    else:
-        # No axes specified.
-
-        if noop_with_empty_axes == 1:
-            # ONNXRT: According to the documentation, the operator should do nothing in this situation. But that's
-            #  not what happens in ONNX Runtime. ORT seems to simply ignore the `noop_with_empty_axes` attribute.
-            #  https://github.com/microsoft/onnxruntime/issues/19147
-            # For now, exit with error. If later ORT adds support for this attribute, simply uncomment the
-            #  following code.
-
-            # if self.builder.operator_can_be_skipped(t_op, self.inspector):
-            #     # Skip the operator.
-            #     self.builder.redirect_tensor(t_op.tmp_outputs[0], t_op.tmp_inputs[0])
-            #     return []
-            #
-            # else:
-            #     # Return an operator which does nothing.
-            #     self.builder.turn_operator_to_identity(t_op)
-            #     return [t_op]
-
-            logger.e(logger.Code.INVALID_ONNX_OPERATOR,
-                     f'ONNX `{op_type}` has `noop_with_empty_axes` == 1 and the `axes` are not specified, which'
-                     ' indicates that the operator should do nothing. This is however not supported by ONNX'
-                     ' Runtime, and therefore the conversion is also not supported.')
-
-        else:
-            # Default is to reduce all axes.
-            axes_tensor = builder.create_tensor_for_data(np.arange(rank).astype(np.int32), 'axes')
-
-            t_op.tmp_inputs[1:] = []  # If the optional input was passed with name "", remove it.
-            t_op.tmp_inputs.append(axes_tensor)
-
-
 def ensure_reduce_transposition(builder, ops: OpsList):
     """
     Ensure transposition of ReduceX operator is defined correctly based on tensor format.
diff --git a/backends/nxp/neutron_partitioner.py b/backends/nxp/neutron_partitioner.py
@@ -190,6 +190,7 @@ def tag_qdq_clusters(self, nodes: List[torch.fx.Node]):
     exir_ops.edge.aten._softmax.default: SoftmaxConverter,
     exir_ops.edge.aten.view_copy.default: ViewCopyConverter,
     exir_ops.edge.aten.add.Tensor: AddTensorConverter,
+    exir_ops.edge.aten.mean.dim: MeanDimConverter,
 }
 
 
diff --git a/backends/nxp/quantizer/neutron_quantizer.py b/backends/nxp/quantizer/neutron_quantizer.py
@@ -169,6 +169,15 @@ def partition_types(self):
         return [torch.ops.aten.view.default]
 
 
+class FlattenPattern(SharedSpecPattern):
+    """
+    Quantizer for Flatten operator.
+    """
+
+    def partition_types(self):
+        return [torch.ops.aten.flatten.using_ints]
+
+
 class PermutePattern(SharedSpecPattern):
     """
     Quantizer for Permute operator.
@@ -178,6 +187,15 @@ def partition_types(self):
         return [torch.ops.aten.permute.default]
 
 
+class MeanDimPattern(SharedSpecPattern):
+    """
+    Quantizer for Mean Dim operator.
+    """
+
+    def partition_types(self):
+        return [torch.ops.aten.mean.dim]
+
+
 class SoftMaxPattern(QuantizationPattern):
     """
     Quantizer for Softmax operator.
@@ -275,6 +293,8 @@ def __init__(self):
                 CadenceAtenQuantizer(ReluInPlacePattern(), static_qconfig),
                 CadenceAtenQuantizer(AvgPoolPattern(), static_qconfig),
                 CadenceAtenQuantizer(ViewPattern(), static_qconfig),
+                CadenceAtenQuantizer(MeanDimPattern(), static_qconfig),
+                CadenceAtenQuantizer(FlattenPattern(), static_qconfig),
             ]
         )
         self.op_to_quantizer = {pt: q for q in self.quantizers for pt in q.pattern.partition_types()}

Original file line number	Diff line number	Diff line change
`@@ -31,6 +31,7 @@`
`31`	`31`	`exir_ops.edge.aten._softmax.default: SoftmaxConverter,`
`32`	`32`	`exir_ops.edge.aten.view_copy.default: ViewCopyConverter,`
`33`	`33`	`exir_ops.edge.aten.add.Tensor: AddTensorConverter,`
	`34`	`+ exir_ops.edge.aten.mean.dim: MeanDimConverter,`
`34`	`35`	`}`
`35`	`36`
`36`	`37`
Original file line number	Diff line number	Diff line change
`@@ -190,6 +190,7 @@ def tag_qdq_clusters(self, nodes: List[torch.fx.Node]):`
`190`	`190`	`exir_ops.edge.aten._softmax.default: SoftmaxConverter,`
`191`	`191`	`exir_ops.edge.aten.view_copy.default: ViewCopyConverter,`
`192`	`192`	`exir_ops.edge.aten.add.Tensor: AddTensorConverter,`
	`193`	`+ exir_ops.edge.aten.mean.dim: MeanDimConverter,`
`193`	`194`	`}`
`194`	`195`
`195`	`196`