From 07a116c456542f3e3c33b42bb97ec1153d69909a Mon Sep 17 00:00:00 2001 From: Roman Janik Date: Fri, 12 Sep 2025 11:46:41 +0200 Subject: [PATCH 1/3] Remove Fuse activation functions IR optimization --- .../fuse_activation_functions.py | 235 ------------------ .../backend/ir/tflite_optimizer/optimizer.py | 8 - 2 files changed, 243 deletions(-) delete mode 100755 backends/nxp/backend/ir/tflite_optimizer/optimizations/fuse_activation_functions.py diff --git a/backends/nxp/backend/ir/tflite_optimizer/optimizations/fuse_activation_functions.py b/backends/nxp/backend/ir/tflite_optimizer/optimizations/fuse_activation_functions.py deleted file mode 100755 index 6b657c4d5b1..00000000000 --- a/backends/nxp/backend/ir/tflite_optimizer/optimizations/fuse_activation_functions.py +++ /dev/null @@ -1,235 +0,0 @@ -# Copyright 2024 NXP -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -from executorch.backends.nxp.backend.ir import logger -from executorch.backends.nxp.backend.ir.lib.tflite.ActivationFunctionType import ( - ActivationFunctionType, -) -from executorch.backends.nxp.backend.ir.lib.tflite.BuiltinOperator import ( - BuiltinOperator, -) -from executorch.backends.nxp.backend.ir.tflite_generator import tflite_model -from executorch.backends.nxp.backend.ir.tflite_optimizer.graph_utils import ( - operator_is_type, -) -from executorch.backends.nxp.backend.ir.tflite_optimizer.operator_rules import ( - NoFusedActivationFunction, -) -from executorch.backends.nxp.backend.ir.tflite_optimizer.optimizations.base_optimization import ( - BaseOptimization, -) -from executorch.backends.nxp.backend.ir.tflite_optimizer.pattern_matcher import ( - Op, - PatternMatcher, -) -from executorch.backends.nxp.backend.ir.tflite_optimizer.tensor_rules import ( - TensorHasOneConsumer, -) - - -class FuseActivationFunctions(BaseOptimization): - ops_with_fused_activation_function = [ - "Conv2D", - "Conv3D", - "DepthwiseConv2D", - "TransposeConv", - "MaxPool2D", - "AveragePool2D", - "SVDF", - "FullyConnected", - "Add", - "Mul", - "Sub", - "Div", - # 'Concatenation', # currently disabled - # https://github.com/tensorflow/tensorflow/blob/v2.15.0/tensorflow/lite/kernels/concatenation.cc#L139 - # 'L2Norm', # currently disabled - # https://github.com/tensorflow/tensorflow/blob/v2.15.0/tensorflow/lite/kernels/l2norm.cc#L72 - # LSTM operators will always already have fused activation functions. They are assigned in `convert_lstm.py`. - # 'LSTM', 'UnidirectionalSequenceLSTM', 'BidirectionalSequenceLSTM' - # RNN operators will always already have fused activation functions. They are assigned in `convert_rnn.py`. - # 'RNN', 'SequenceRNN', 'BidirectionalSequenceRNN', - ] - - activation_functions = ["Relu", "ReluN1To1", "Relu6", "Tanh", "Sign"] - - supported_activations_for_op: dict[ - BuiltinOperator, list[ActivationFunctionType] - ] = { - BuiltinOperator.CONV_2D: [ - ActivationFunctionType.RELU, - ActivationFunctionType.RELU_N1_TO_1, - ActivationFunctionType.RELU6, - ], - # https://github.com/tensorflow/tensorflow/blob/v2.15.0/tensorflow/lite/kernels/conv.cc#L912 - # https://github.com/tensorflow/tensorflow/blob/v2.15.0/tensorflow/lite/kernels/kernel_util.h#L285-L300 - BuiltinOperator.CONV_3D: [ - ActivationFunctionType.RELU, - ActivationFunctionType.RELU_N1_TO_1, - ActivationFunctionType.RELU6, - ], - # https://github.com/tensorflow/tensorflow/blob/v2.15.0/tensorflow/lite/kernels/conv3d.cc#L213 - # https://github.com/tensorflow/tensorflow/blob/v2.15.0/tensorflow/lite/kernels/kernel_util.h#L285-L300 - BuiltinOperator.DEPTHWISE_CONV_2D: [ - ActivationFunctionType.RELU, - ActivationFunctionType.RELU_N1_TO_1, - ActivationFunctionType.RELU6, - ], - # https://github.com/tensorflow/tensorflow/blob/v2.15.0/tensorflow/lite/kernels/depthwise_conv.cc#L307 - # https://github.com/tensorflow/tensorflow/blob/v2.15.0/tensorflow/lite/kernels/kernel_util.h#L285-L300 - BuiltinOperator.TRANSPOSE_CONV: [ - ActivationFunctionType.RELU, - ActivationFunctionType.RELU_N1_TO_1, - ActivationFunctionType.RELU6, - ], - # https://github.com/tensorflow/tensorflow/blob/v2.15.0/tensorflow/lite/kernels/transpose_conv.cc#L516 - # https://github.com/tensorflow/tensorflow/blob/v2.15.0/tensorflow/lite/kernels/kernel_util.h#L285-L300 - BuiltinOperator.MAX_POOL_2D: [ - ActivationFunctionType.RELU, - ActivationFunctionType.RELU_N1_TO_1, - ActivationFunctionType.RELU6, - ], - # https://github.com/tensorflow/tensorflow/blob/v2.15.0/tensorflow/lite/kernels/pooling.cc#L247 - # https://github.com/tensorflow/tensorflow/blob/v2.15.0/tensorflow/lite/kernels/kernel_util.h#L285-L300 - BuiltinOperator.AVERAGE_POOL_2D: [ - ActivationFunctionType.RELU, - ActivationFunctionType.RELU_N1_TO_1, - ActivationFunctionType.RELU6, - ], - # https://github.com/tensorflow/tensorflow/blob/v2.15.0/tensorflow/lite/kernels/pooling.cc#L124 - # https://github.com/tensorflow/tensorflow/blob/v2.15.0/tensorflow/lite/kernels/kernel_util.h#L285-L300 - BuiltinOperator.FULLY_CONNECTED: [ - ActivationFunctionType.RELU, - ActivationFunctionType.RELU_N1_TO_1, - ActivationFunctionType.RELU6, - ], - # https://github.com/tensorflow/tensorflow/blob/v2.15.0/tensorflow/lite/kernels/fully_connected.cc#L627-L630 - BuiltinOperator.ADD: [ - ActivationFunctionType.RELU, - ActivationFunctionType.RELU_N1_TO_1, - ActivationFunctionType.RELU6, - ], - # https://github.com/tensorflow/tensorflow/blob/v2.15.0/tensorflow/lite/kernels/add.cc#L246 - # https://github.com/tensorflow/tensorflow/blob/v2.15.0/tensorflow/lite/kernels/kernel_util.h#L285-L300 - BuiltinOperator.MUL: [ - ActivationFunctionType.RELU, - ActivationFunctionType.RELU_N1_TO_1, - ActivationFunctionType.RELU6, - ], - # https://github.com/tensorflow/tensorflow/blob/v2.15.0/tensorflow/lite/kernels/mul.cc#L159 - # https://github.com/tensorflow/tensorflow/blob/v2.15.0/tensorflow/lite/kernels/kernel_util.h#L285-L300 - BuiltinOperator.SUB: [ - ActivationFunctionType.RELU, - ActivationFunctionType.RELU_N1_TO_1, - ActivationFunctionType.RELU6, - ], - # https://github.com/tensorflow/tensorflow/blob/v2.15.0/tensorflow/lite/kernels/sub.cc#L306 - # https://github.com/tensorflow/tensorflow/blob/v2.15.0/tensorflow/lite/kernels/kernel_util.h#L285-L300 - BuiltinOperator.DIV: [ - ActivationFunctionType.RELU, - ActivationFunctionType.RELU_N1_TO_1, - ActivationFunctionType.RELU6, - ], - # https://github.com/tensorflow/tensorflow/blob/v2.15.0/tensorflow/lite/kernels/div.cc#L180 - # https://github.com/tensorflow/tensorflow/blob/v2.15.0/tensorflow/lite/kernels/kernel_util.h#L285-L300 - BuiltinOperator.SVDF: [ActivationFunctionType.RELU], - # https://github.com/tensorflow/tensorflow/blob/v2.15.0/tensorflow/lite/kernels/svdf.cc#L394 - BuiltinOperator.RNN: [ - ActivationFunctionType.RELU, - ActivationFunctionType.RELU_N1_TO_1, - ActivationFunctionType.RELU6, - ActivationFunctionType.TANH, - ActivationFunctionType.SIGN_BIT, - ], - # https://github.com/tensorflow/tensorflow/blob/v2.15.0/tensorflow/lite/kernels/basic_rnn.cc#L222 - # https://github.com/tensorflow/tensorflow/blob/v2.15.0/tensorflow/lite/kernels/internal/kernel_utils.cc#L71 - # https://github.com/tensorflow/tensorflow/blob/v2.15.0/tensorflow/lite/kernels/internal/tensor_utils.h#L58-L77 - BuiltinOperator.UNIDIRECTIONAL_SEQUENCE_RNN: [ - ActivationFunctionType.RELU, - ActivationFunctionType.RELU_N1_TO_1, - ActivationFunctionType.RELU6, - ActivationFunctionType.TANH, - ActivationFunctionType.SIGN_BIT, - ], - # https://github.com/tensorflow/tensorflow/blob/6887368d6d46223f460358323c4b76d61d1558a8/tensorflow/lite/kernels/unidirectional_sequence_rnn.cc#L239 - # https://github.com/tensorflow/tensorflow/blob/v2.15.0/tensorflow/lite/kernels/internal/kernel_utils.cc#L71 - # https://github.com/tensorflow/tensorflow/blob/v2.15.0/tensorflow/lite/kernels/internal/tensor_utils.h#L58-L77 - BuiltinOperator.BIDIRECTIONAL_SEQUENCE_RNN: [ - ActivationFunctionType.RELU, - ActivationFunctionType.RELU_N1_TO_1, - ActivationFunctionType.RELU6, - ActivationFunctionType.TANH, - ActivationFunctionType.SIGN_BIT, - ], - # https://github.com/tensorflow/tensorflow/blob/6887368d6d46223f460358323c4b76d61d1558a8/tensorflow/lite/kernels/bidirectional_sequence_rnn.cc#L433 - # https://github.com/tensorflow/tensorflow/blob/v2.15.0/tensorflow/lite/kernels/internal/kernel_utils.cc#L71 - # https://github.com/tensorflow/tensorflow/blob/v2.15.0/tensorflow/lite/kernels/internal/tensor_utils.h#L58-L77 - } - - ops_that_need_equal_io_quantization = [ - # Documented restrictions from https://www.tensorflow.org/lite/performance/quantization_spec - BuiltinOperator.AVERAGE_POOL_2D, - BuiltinOperator.MAX_POOL_2D, - BuiltinOperator.CONCATENATION, - ] - - def _act_fun_type_for_op(self, op: tflite_model.Operator) -> ActivationFunctionType: - if operator_is_type(op, "Relu", self._builder): - return ActivationFunctionType.RELU - elif operator_is_type(op, "ReluN1To1", self._builder): - return ActivationFunctionType.RELU_N1_TO_1 - elif operator_is_type(op, "Relu6", self._builder): - return ActivationFunctionType.RELU6 - elif operator_is_type(op, "Tanh", self._builder): - return ActivationFunctionType.TANH - elif operator_is_type(op, "Sign", self._builder): - return ActivationFunctionType.SIGN_BIT - - def __call__(self) -> bool: - matcher = PatternMatcher( - self._builder, - [ - Op( - self.ops_with_fused_activation_function, - ["x"], - ["x1"], - [NoFusedActivationFunction()], - ), - Op(self.activation_functions, ["x1"], ["y"]), - ], - [TensorHasOneConsumer("x1")], - ) - - to_remove = [] - for [leading_op, act_fun_op], tensor_map, _, _ in matcher.match_patterns(): - builtin_leading_op = leading_op.builtin_options.operator_type - logger.internal_assert( - builtin_leading_op in self.supported_activations_for_op.keys(), - f"FuseActivationFunctions: supported activations for operator `{builtin_leading_op}`" - "are not known.", - ) - - act_fun = self._act_fun_type_for_op(act_fun_op) - if act_fun not in self.supported_activations_for_op[builtin_leading_op]: - # The leading op doesn't support this activation function. - continue - - x, y = tensor_map["x"], tensor_map["y"] - if ( - x.quantization != y.quantization - and builtin_leading_op in self.ops_that_need_equal_io_quantization - ): - # The fusion would result in different input and output quantization of `leading_op`, which would cause - # runtime issues for that particular operator. - continue - - leading_op.builtin_options.fused_activation_function = act_fun - leading_op.tmp_outputs[0] = act_fun_op.tmp_outputs[0] - to_remove.append(act_fun_op) - - for op in to_remove: - self._builder.get_operators().remove(op) - - return len(to_remove) != 0 diff --git a/backends/nxp/backend/ir/tflite_optimizer/optimizer.py b/backends/nxp/backend/ir/tflite_optimizer/optimizer.py index 69b75b72cdd..3611c55e995 100755 --- a/backends/nxp/backend/ir/tflite_optimizer/optimizer.py +++ b/backends/nxp/backend/ir/tflite_optimizer/optimizer.py @@ -11,9 +11,6 @@ from executorch.backends.nxp.backend.ir import logger from executorch.backends.nxp.backend.ir.conversion_config import ConversionConfig -from executorch.backends.nxp.backend.ir.tflite_optimizer.optimizations.fuse_activation_functions import ( - FuseActivationFunctions, -) from executorch.backends.nxp.backend.ir.tflite_optimizer.optimizations.move_relu_before_concat import ( MoveActivationBeforeConcatenation, ) @@ -27,8 +24,6 @@ class Optimization(Enum): - FUSE_ACTIVATION_FUNCTIONS = 1 - FUSE_TRANSPOSE_OPERATORS = 5 REMOVE_IDENTITY_TRANSPOSE_OPERATORS = 6 @@ -64,9 +59,6 @@ def __init__( self._builder = builder self.optimization_map = { - Optimization.FUSE_ACTIVATION_FUNCTIONS: FuseActivationFunctions( - builder, conversion_config - ), Optimization.FUSE_TRANSPOSE_OPERATORS: FuseTransposeOperators( builder, conversion_config ), From a3684520a42ea9aa4518f38a7b618f54a575204b Mon Sep 17 00:00:00 2001 From: Roman Janik Date: Tue, 9 Sep 2025 17:57:27 +0200 Subject: [PATCH 2/3] Make Relu quantization non-shared --- backends/nxp/quantizer/patterns.py | 42 +++++++++++++++++------------- 1 file changed, 24 insertions(+), 18 deletions(-) diff --git a/backends/nxp/quantizer/patterns.py b/backends/nxp/quantizer/patterns.py index 9588ce24c9e..47e487494c6 100644 --- a/backends/nxp/quantizer/patterns.py +++ b/backends/nxp/quantizer/patterns.py @@ -121,6 +121,24 @@ def get_anchors( ) +class SingleInputBasicPattern(QuantizationPattern): + @abstractmethod + def partition_types(self) -> list[OpOverload]: + pass + + def get_anchors( + self, gm: fx.GraphModule, fused_partition: list[fx.GraphModule] + ) -> PartitionAnchors | None: + node = fused_partition[0].nodes[-1] + + return PartitionAnchors( + inputs=[(node, NodeArgsIdx(0))], + weights=[], + biases=[], + output=[(node,)], + ) + + def get_anchors_for_fixed_quant_specs( fused_partition: list[fx.GraphModule], scale: float, @@ -376,7 +394,7 @@ def partition_types(self): return [torch.ops.aten.flatten.using_ints] -class HardTanhPattern(QuantizationPattern): +class HardTanhPattern(SingleInputBasicPattern): """ Quantizer for HardTanh operator. Shared quantization spec is selected, as activation functions usually follows computation layer. @@ -385,23 +403,12 @@ class HardTanhPattern(QuantizationPattern): def partition_types(self): return [torch.ops.aten.hardtanh.default] - def get_anchors( - self, gm: fx.GraphModule, fused_partition: list[fx.GraphModule] - ) -> PartitionAnchors | None: - node = fused_partition[0].nodes[-1] - - return PartitionAnchors( - inputs=[(node, NodeArgsIdx(0))], - weights=[], - biases=[], - output=[(node,)], - ) def replacement_op(self): raise AssertionError() -class HardTanhInPlacePattern(QuantizationPattern): +class HardTanhInPlacePattern(SingleInputBasicPattern): """ Quantizer for HardTanh operator with param inplace=True. Shared quantization spec is selected, as activation functions usually follows computation layer. @@ -513,19 +520,18 @@ def partition_types(self): return [torch.ops.aten.permute.default] -class ReluPattern(SharedSpecPattern): +class ReluPattern(SingleInputBasicPattern): """ - Quantizer for Relu operator. Shared quantization spec is selected, as ReLU usually follows computation layer. + Quantizer for Relu operator. """ def partition_types(self): return [torch.ops.aten.relu.default] -class ReluInPlacePattern(SharedSpecPattern): +class ReluInPlacePattern(SingleInputBasicPattern): """ - Quantizer for Relu operator with param inplace=True. Shared quantization spec is selected, as ReLU usually - follows computation layer. + Quantizer for Relu operator with param inplace=True. """ def partition_types(self): From 6fdef26661f2ba592585124d46a56052b4125ac5 Mon Sep 17 00:00:00 2001 From: Roman Janik Date: Fri, 12 Sep 2025 14:15:05 +0200 Subject: [PATCH 3/3] Quantize Addmm, Conv2d, Linear, Mm together with fusable activations + Move fused activations to separate QDQ cluster --- backends/nxp/backend/edge_helper.py | 25 +- backends/nxp/backend/neutron_target_spec.py | 79 ++++ ...operator_into_separate_qdq_cluster_pass.py | 19 + backends/nxp/neutron_partitioner.py | 4 + backends/nxp/quantizer/neutron_quantizer.py | 14 +- backends/nxp/quantizer/patterns.py | 131 ++++++- backends/nxp/tests/executorch_pipeline.py | 24 +- backends/nxp/tests/models.py | 70 ++++ backends/nxp/tests/test_edge_passes.py | 263 +++++++++++-- .../nxp/tests/test_per_channel_conversion.py | 14 +- backends/nxp/tests/test_quantizer.py | 371 +++++++++++++++--- backends/nxp/tests/test_removing_dead_code.py | 7 +- .../nxp/tests/test_split_group_convolution.py | 3 +- examples/nxp/aot_neutron_compile.py | 6 +- 14 files changed, 899 insertions(+), 131 deletions(-) diff --git a/backends/nxp/backend/edge_helper.py b/backends/nxp/backend/edge_helper.py index 60b367c0f39..9b390790f3c 100644 --- a/backends/nxp/backend/edge_helper.py +++ b/backends/nxp/backend/edge_helper.py @@ -4,10 +4,29 @@ # LICENSE file in the root directory of this source tree. import torch + from torch.fx import GraphModule, Node from torch.nn import Parameter +def _is_dequantize(node_: Node) -> bool: + return node_.op == "call_function" and node_.target.__name__ in [ + "dequantize_per_tensor.default", + "quantized_decomposed.dequantize_per_tensor.default", + "dequantize_per_channel.default", + "quantized_decomposed.dequantize_per_channel.default", + ] + + +def _is_quantize(node_: Node) -> bool: + return node_.op == "call_function" and node_.target.__name__ in [ + "quantize_per_tensor.default", + "quantized_decomposed.quantize_per_tensor.default", + "quantize_per_channel.default", + "quantized_decomposed.quantize_per_channel.default", + ] + + def input_tensor(node: Node, input_index: int) -> torch.Tensor: if len(node.all_input_nodes) <= input_index: raise IndexError @@ -62,12 +81,6 @@ def node_is_effectively_static_tensor( if node_is_static_tensor(node, parameters_mapping): return True - def _is_dequantize(node_: Node) -> bool: - return node_.target.__name__ in { - "quantized_decomposed.dequantize_per_tensor.default", - "quantized_decomposed.dequantize_per_channel.default", - } - return _is_dequantize(node) and node_is_static_tensor( node.args[0], parameters_mapping ) diff --git a/backends/nxp/backend/neutron_target_spec.py b/backends/nxp/backend/neutron_target_spec.py index 44399982e29..cf718991858 100644 --- a/backends/nxp/backend/neutron_target_spec.py +++ b/backends/nxp/backend/neutron_target_spec.py @@ -7,9 +7,14 @@ from enum import Enum +import torch + from executorch.backends.nxp.backend.neutron_converter_manager import ( NeutronConverterManager, ) +from executorch.exir.dialects._ops import ops as exir_ops + +from torch.fx import Node class NeutronHWVersion(Enum): @@ -17,6 +22,77 @@ class NeutronHWVersion(Enum): N3 = 2 +class NeutronTargetNeutronC: + @staticmethod + def is_supported_fused_activation(node_: Node) -> bool: + """Node operator is supported fused activation on Neutron for Linear and Conv2D.""" + return node_.op == "call_function" and ( + node_.target + in ( + torch.ops.aten.relu.default, # TODO Add torch.ops.aten.leaky_relu.default once it is supported + torch.ops.aten.relu_.default, + torch.ops.aten.sigmoid.default, + torch.ops.aten.sigmoid_.default, + torch.ops.aten.tanh.default, + torch.ops.aten.tanh_.default, + ) + or ( + ( + node_.target == torch.ops.aten.hardtanh.default + or node_.target == torch.ops.aten.hardtanh_.default + ) + and ( + node_.args[1:3] == (0.0, 6.0) # is converted to Relu6 + or node_.args[1:3] == (0.0, float("inf")) # is converted to Relu + ) + ) + ) + + @staticmethod + def is_supported_fused_activation__edge(node_: Node) -> bool: + """Node operator is supported fused activation on Neutron for Linear and Conv2D.""" + return node_.op == "call_function" and ( + node_.target + in ( + exir_ops.edge.aten.relu.default, # TODO Add torch.ops.aten.leaky_relu.default once it is supported + exir_ops.edge.aten.sigmoid.default, + exir_ops.edge.aten.tanh.default, + ) + or ( + (node_.target == exir_ops.edge.aten.hardtanh.default) + and ( + node_.args[1:3] == (0.0, 6.0) # is converted to Relu6 + or node_.args[1:3] == (0.0, float("inf")) # is converted to Relu + ) + ) + ) + + @staticmethod + def is_fusable_conv_or_linear(node_: Node) -> bool: + """Node operator is supported fusable Linear or Conv2D on Neutron.""" + return node_.op == "call_function" and ( + node_.target == torch.ops.aten.conv2d.default + or node_.target == torch.ops.aten.addmm.default + or node_.target == torch.ops.aten.mm.default + or ( + node_.target == torch.ops.aten.linear.default + and len(node_.meta["val"].shape) == 2 + ) + ) + + @staticmethod + def is_fusable_conv_or_linear__edge(node_: Node) -> bool: + """Node operator in edge dialect is supported fusable Linear or Conv2D on Neutron.""" + return node_.op == "call_function" and ( + node_.target == exir_ops.edge.aten.addmm.default + or node_.target == exir_ops.edge.aten.mm.default + or ( + node_.target == exir_ops.edge.aten.convolution.default + and len(node_.meta["val"].shape) == 4 + ) + ) + + class NeutronTargetSpec: """ The functionality for probing the properties of Neutron Target. @@ -39,6 +115,9 @@ def __init__(self, target: str, neutron_converter_flavor: str): f"Target `{target}` contains unsupported HW version. Only N3/N3+ targets are supported at the moment." ) + # Now only Neutron-C is supported + self.neutron_target_info = NeutronTargetNeutronC() + # Target name. def get_name(self) -> str: return self.neutron_target.name diff --git a/backends/nxp/edge_passes/move_auxiliary_operator_into_separate_qdq_cluster_pass.py b/backends/nxp/edge_passes/move_auxiliary_operator_into_separate_qdq_cluster_pass.py index d88684b86f0..f32e09e78e0 100644 --- a/backends/nxp/edge_passes/move_auxiliary_operator_into_separate_qdq_cluster_pass.py +++ b/backends/nxp/edge_passes/move_auxiliary_operator_into_separate_qdq_cluster_pass.py @@ -15,6 +15,11 @@ AddMM = exir_ops.edge.aten.addmm.default ViewCopy = exir_ops.edge.aten.view_copy.default MM = exir_ops.edge.aten.mm.default +Conv = exir_ops.edge.aten.convolution.default +HardTanh = exir_ops.edge.aten.hardtanh.default +Relu = exir_ops.edge.aten.relu.default +Sigmoid = exir_ops.edge.aten.sigmoid.default +Tanh = exir_ops.edge.aten.tanh.default def insert_qdq_pair_after_node( @@ -175,9 +180,23 @@ class MoveTrailingAuxiliaryOperatorIntoSeparateQDQClusterPass(NeutronEdgePass): main_cluster_node_to_auxiliary_nodes = { AddMM: [ ViewCopy, + HardTanh, + Relu, + Sigmoid, + Tanh, ], MM: [ ViewCopy, + HardTanh, + Relu, + Sigmoid, + Tanh, + ], + Conv: [ + HardTanh, + Relu, + Sigmoid, + Tanh, ], } diff --git a/backends/nxp/neutron_partitioner.py b/backends/nxp/neutron_partitioner.py index e7ad7ff7a0b..80237c5b37a 100644 --- a/backends/nxp/neutron_partitioner.py +++ b/backends/nxp/neutron_partitioner.py @@ -80,6 +80,10 @@ class QDQCluster: operator.getitem, exir_ops.edge.aten.view_copy.default, exir_ops.edge.aten.permute_copy.default, + exir_ops.edge.aten.hardtanh.default, + exir_ops.edge.aten.relu.default, + exir_ops.edge.aten.sigmoid.default, + exir_ops.edge.aten.tanh.default, ] def __init__(self): diff --git a/backends/nxp/quantizer/neutron_quantizer.py b/backends/nxp/quantizer/neutron_quantizer.py index 2681e221869..6564c19d7b9 100644 --- a/backends/nxp/quantizer/neutron_quantizer.py +++ b/backends/nxp/quantizer/neutron_quantizer.py @@ -5,10 +5,11 @@ # LICENSE file in the root directory of this source tree. import torch - from executorch.backends.nxp.aten_passes.neutron_aten_pass_manager import ( NeutronAtenPassManager, ) + +from executorch.backends.nxp.backend.neutron_target_spec import NeutronTargetSpec from executorch.backends.nxp.quantizer.patterns import ( AbsPattern, AdaptiveAvgPoolPattern, @@ -181,7 +182,8 @@ def get_supported_operators(cls) -> list[OperatorConfig]: class NeutronQuantizer(ComposableQuantizer): - def __init__(self): + def __init__(self, neutron_target_spec: NeutronTargetSpec): + self.neutron_target_spec = neutron_target_spec static_qconfig = QuantizationConfig(act_qspec, act_qspec, wgt_qspec, None) static_fc_qconfig = QuantizationConfig(act_qspec, act_qspec, wgt_fc_qspec, None) super().__init__( @@ -189,19 +191,19 @@ def __init__(self): NeutronAtenQuantizer(AbsPattern(), static_qconfig), NeutronAtenQuantizer(AdaptiveAvgPoolPattern(), static_qconfig), NeutronAtenQuantizer(AddTensorPattern(), static_qconfig), - NeutronAtenQuantizer(AddmmPattern(), static_fc_qconfig), + NeutronAtenQuantizer(AddmmPattern(self), static_fc_qconfig), NeutronAtenQuantizer(AvgPoolPattern(), static_qconfig), NeutronAtenQuantizer(CatPattern(), static_qconfig), NeutronAtenQuantizer(Conv1dPattern(), static_qconfig), - NeutronAtenQuantizer(Conv2dPattern(), static_qconfig), + NeutronAtenQuantizer(Conv2dPattern(self), static_qconfig), NeutronAtenQuantizer(DropoutPattern(), static_qconfig), NeutronAtenQuantizer(FlattenPattern(), static_qconfig), NeutronAtenQuantizer(HardTanhPattern(), static_qconfig), NeutronAtenQuantizer(HardTanhInPlacePattern(), static_qconfig), - NeutronAtenQuantizer(LinearPattern(), static_fc_qconfig), + NeutronAtenQuantizer(LinearPattern(self), static_fc_qconfig), NeutronAtenQuantizer(MaxPoolPattern(), static_qconfig), NeutronAtenQuantizer(MeanDimPattern(), static_qconfig), - NeutronAtenQuantizer(MmPattern(), static_qconfig), + NeutronAtenQuantizer(MmPattern(self), static_qconfig), NeutronAtenQuantizer(PadPattern(), static_qconfig), NeutronAtenQuantizer(PermutePattern(), static_qconfig), NeutronAtenQuantizer(ReluPattern(), static_qconfig), diff --git a/backends/nxp/quantizer/patterns.py b/backends/nxp/quantizer/patterns.py index 47e487494c6..f18d01f4bce 100644 --- a/backends/nxp/quantizer/patterns.py +++ b/backends/nxp/quantizer/patterns.py @@ -127,7 +127,7 @@ def partition_types(self) -> list[OpOverload]: pass def get_anchors( - self, gm: fx.GraphModule, fused_partition: list[fx.GraphModule] + self, gm: fx.GraphModule, fused_partition: list[fx.GraphModule] ) -> PartitionAnchors | None: node = fused_partition[0].nodes[-1] @@ -187,6 +187,12 @@ def partition_types(self): class AddmmPattern(QuantizationPattern): + def __init__(self, neutron_quantizer): + self.neutron_quantizer = neutron_quantizer + self.neutron_target_info = ( + self.neutron_quantizer.neutron_target_spec.neutron_target_info + ) + def partition_types(self) -> list[OpOverload]: return [torch.ops.aten.addmm.default] @@ -208,11 +214,25 @@ def get_anchors( qscheme=torch.per_tensor_affine, ) + # If the following node is a fusable activation, quantize together with activation + output = [(addmm_node,)] + if len( + addmm_node.users + ) == 1 and self.neutron_target_info.is_supported_fused_activation( + activation := next(iter(addmm_node.users)) + ): + activation_quantizer = self.neutron_quantizer.op_to_quantizer[ + activation.target + ] + activation_quantizer.annotate(gm) + output = [] + activation.meta["quantization_annotation"].input_qspec_map = {} + return PartitionAnchors( inputs=[(addmm_node, NodeArgsIdx(1))], weights=[(addmm_node, NodeArgsIdx(2))], biases=[(addmm_node, NodeArgsIdx(0), bias_qspec)], - output=[(addmm_node,)], + output=output, ) @@ -372,9 +392,69 @@ def partition_types(self) -> list[OpOverload]: class Conv2dPattern(ConvPattern): + def __init__(self, neutron_quantizer): + self.neutron_quantizer = neutron_quantizer + self.neutron_target_info = ( + self.neutron_quantizer.neutron_target_spec.neutron_target_info + ) + def partition_types(self) -> list[OpOverload]: return [torch.ops.aten.conv2d.default] + def get_anchors( + self, gm: fx.GraphModule, fused_partition: list[fx.GraphModule] + ) -> PartitionAnchors: + conv_node = fused_partition[0].nodes[-1] + + bias_quantization_qspec = DerivedQuantizationSpec( + derived_from=[ + (conv_node.args[0], conv_node), + (conv_node.args[1], conv_node), + ], + derive_qparams_fn=get_bias_qparams, + dtype=torch.int32, + quant_min=-(2**31) + 1, + quant_max=2**31 - 1, + qscheme=torch.per_channel_symmetric, + ch_axis=0, + ) + + weight_observer_or_fake_quant_ctr = PerChannelMinMaxObserver + weight_quantization_spec = QuantizationSpec( + dtype=torch.int8, + observer_or_fake_quant_ctr=weight_observer_or_fake_quant_ctr, + quant_min=-127, + quant_max=127, + qscheme=torch.per_channel_symmetric, + ch_axis=0, + ) + + # Keep bias empty if not supplied + bias = [] + if len(conv_node.args) > 2 and conv_node.args[2] is not None: + bias = [(conv_node, NodeArgsIdx(2), bias_quantization_qspec)] + + # If the following node is a fusable activation, quantize together with activation + output = [(conv_node,)] + if len( + conv_node.users + ) == 1 and self.neutron_target_info.is_supported_fused_activation( + activation := next(iter(conv_node.users)) + ): + activation_quantizer = self.neutron_quantizer.op_to_quantizer[ + activation.target + ] + activation_quantizer.annotate(gm) + output = [] + activation.meta["quantization_annotation"].input_qspec_map = {} + + return PartitionAnchors( + inputs=[(conv_node, NodeArgsIdx(0))], + weights=[(conv_node, NodeArgsIdx(1), weight_quantization_spec)], + biases=bias, + output=output, + ) + class DropoutPattern(SharedSpecPattern): """ @@ -403,7 +483,6 @@ class HardTanhPattern(SingleInputBasicPattern): def partition_types(self): return [torch.ops.aten.hardtanh.default] - def replacement_op(self): raise AssertionError() @@ -434,6 +513,12 @@ def replacement_op(self): class LinearPattern(QuantizationPattern): + def __init__(self, neutron_quantizer): + self.neutron_quantizer = neutron_quantizer + self.neutron_target_info = ( + self.neutron_quantizer.neutron_target_spec.neutron_target_info + ) + def partition_types(self) -> list[OpOverload]: return [torch.ops.aten.linear.default] @@ -459,11 +544,27 @@ def get_anchors( if len(linear_node.args) > 2: bias = [(linear_node, NodeArgsIdx(2), bias_qspec)] + # If the following node is a fusable activation, quantize together with activation + output = [(linear_node,)] + if ( + len(linear_node.users) == 1 + and len(linear_node.meta["val"].shape) <= 2 + and self.neutron_target_info.is_supported_fused_activation( + activation := next(iter(linear_node.users)) + ) + ): + activation_quantizer = self.neutron_quantizer.op_to_quantizer[ + activation.target + ] + activation_quantizer.annotate(gm) + output = [] + activation.meta["quantization_annotation"].input_qspec_map = {} + return PartitionAnchors( inputs=[(linear_node, NodeArgsIdx(0))], weights=[(linear_node, NodeArgsIdx(1))], biases=bias, - output=[(linear_node,)], + output=output, ) @@ -486,6 +587,12 @@ def partition_types(self): class MmPattern(QuantizationPattern): + def __init__(self, neutron_quantizer): + self.neutron_quantizer = neutron_quantizer + self.neutron_target_info = ( + self.neutron_quantizer.neutron_target_spec.neutron_target_info + ) + def partition_types(self) -> list[OpOverload]: return [torch.ops.aten.mm.default] @@ -494,11 +601,25 @@ def get_anchors( ) -> PartitionAnchors: mm_node = fused_partition[0].nodes[-1] + # If the following node is a fusable activation, quantize together with activation + output = [(mm_node,)] + if len( + mm_node.users + ) == 1 and self.neutron_target_info.is_supported_fused_activation( + activation := next(iter(mm_node.users)) + ): + activation_quantizer = self.neutron_quantizer.op_to_quantizer[ + activation.target + ] + activation_quantizer.annotate(gm) + output = [] + activation.meta["quantization_annotation"].input_qspec_map = {} + return PartitionAnchors( inputs=[(mm_node, NodeArgsIdx(0))], weights=[(mm_node, NodeArgsIdx(1))], biases=[], - output=[(mm_node,)], + output=output, ) diff --git a/backends/nxp/tests/executorch_pipeline.py b/backends/nxp/tests/executorch_pipeline.py index 09bceb2b0d3..703be1669a5 100644 --- a/backends/nxp/tests/executorch_pipeline.py +++ b/backends/nxp/tests/executorch_pipeline.py @@ -4,6 +4,7 @@ # LICENSE file in the root directory of this source tree. from dataclasses import dataclass +from functools import partial from typing import Callable import torch @@ -12,6 +13,7 @@ from executorch.backends.nxp.backend.custom_delegation_options import ( CustomDelegationOptions, ) +from executorch.backends.nxp.backend.neutron_target_spec import NeutronTargetSpec from executorch.backends.nxp.edge_passes.neutron_edge_pass_manager import ( NeutronEdgePassManager, ) @@ -27,6 +29,12 @@ from executorch.extension.export_util.utils import export_to_edge from torch import nn from torchao.quantization.pt2e.quantize_pt2e import convert_pt2e, prepare_pt2e +from torchao.quantization.pt2e.quantizer import Quantizer + +default_neutron_converter_flavor = "SDK_25_09" +neutron_target_spec = NeutronTargetSpec( + target="imxrt700", neutron_converter_flavor=default_neutron_converter_flavor +) @dataclass @@ -55,6 +63,10 @@ def get_random_calibration_inputs( ] +def _get_default_quantizer(target_spec: NeutronTargetSpec) -> Quantizer: + return NeutronQuantizer(target_spec) + + def to_model_input_spec( input_spec: tuple[ModelInputSpec, ...] | tuple[int, ...] | list[tuple[int, ...]] ) -> tuple[ModelInputSpec, ...]: @@ -85,13 +97,17 @@ def to_quantized_edge_program( [tuple[ModelInputSpec, ...]], list[tuple[torch.Tensor, ...]] ] = get_random_calibration_inputs, target="imxrt700", - neutron_converter_flavor="SDK_25_09", + neutron_converter_flavor=default_neutron_converter_flavor, remove_quant_io_ops=False, custom_delegation_options=CustomDelegationOptions(), # noqa B008 - get_quantizer_fn=lambda: NeutronQuantizer(), + get_quantizer_fn=None, ) -> EdgeProgramManager: - calibration_inputs = get_calibration_inputs_fn(to_model_input_spec(input_spec)) + _neutron_target_spec = NeutronTargetSpec(target, neutron_converter_flavor) + if get_quantizer_fn is None: + get_quantizer_fn = partial(_get_default_quantizer, _neutron_target_spec) + quantizer = get_quantizer_fn() + calibration_inputs = get_calibration_inputs_fn(to_model_input_spec(input_spec)) example_input = calibration_inputs[0] # Make sure the model is in the evaluation mode. @@ -101,7 +117,7 @@ def to_quantized_edge_program( exir_program_aten__module_quant = _quantize_model( exir_program_aten.module(), - get_quantizer_fn(), + quantizer, calibration_inputs, ) diff --git a/backends/nxp/tests/models.py b/backends/nxp/tests/models.py index f613349fed0..c4d9491d4a7 100644 --- a/backends/nxp/tests/models.py +++ b/backends/nxp/tests/models.py @@ -9,6 +9,8 @@ import torch +from torch import nn + class Conv1dModule(torch.nn.Module): def __init__( @@ -501,3 +503,71 @@ def __init__(self, dim, keepdim): def forward(self, x): x = self.conv(x) return torch.mean(x, dim=self.dim, keepdim=self.keepdim) + + +def get_activation(activation, inplace): + match activation: + case "relu": + return nn.ReLU(inplace=inplace) + case "relu_hardtanh": + return nn.Hardtanh(inplace=inplace, min_val=0.0, max_val=float("inf")) + case "relu6": + return nn.ReLU6(inplace=inplace) + case "tanh": + if inplace: + return torch.tanh + else: + return torch.tanh_ + case "sigmoid": + return nn.Sigmoid() + case _: + raise ValueError + + +class LinearActivationModule(torch.nn.Module): + def __init__( + self, activation: str, inplace: bool, in_channels: int, mode: str = "linear" + ): + super().__init__() + self.mode = mode.lower() + assert self.mode in [ + "linear", + "addmm", + "mm", + ], "Mode must be 'linear', 'addmm', or 'mm'" + + if self.mode == "linear": + self.linear = torch.nn.Linear(in_channels, in_channels) + else: + # Manual weight and bias for addmm/mm + self.weight = torch.nn.Parameter(torch.empty(in_channels, in_channels)) + self.bias = torch.nn.Parameter(torch.empty(in_channels)) + torch.nn.init.kaiming_uniform_(self.weight, a=math.sqrt(5)) + fan_in, _ = torch.nn.init._calculate_fan_in_and_fan_out(self.weight) + bound = 1 / math.sqrt(fan_in) + torch.nn.init.uniform_(self.bias, -bound, bound) + + self.activation = get_activation(activation, inplace) + self.eval() + + def forward(self, x): + if self.mode == "linear": + x = self.linear(x) + if self.mode == "addmm": + x = torch.addmm(self.bias, x, self.weight) + elif self.mode == "mm": + x = torch.mm(x, self.weight) + return self.activation(x) + + +class ConvActivationModule(torch.nn.Module): + def __init__(self, activation: str, inplace: bool, in_channels: int): + super().__init__() + + self.conv = Conv2dModule(in_channels=in_channels) + self.activation = get_activation(activation, inplace) + self.eval() + + def forward(self, x): + x = self.conv(x) + return self.activation(x) diff --git a/backends/nxp/tests/test_edge_passes.py b/backends/nxp/tests/test_edge_passes.py index a189299be52..ff1c215fc55 100644 --- a/backends/nxp/tests/test_edge_passes.py +++ b/backends/nxp/tests/test_edge_passes.py @@ -1,14 +1,37 @@ +# Copyright 2025 NXP +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +import unittest + +import kgb import numpy as np +import torch + +from executorch.backends.nxp.backend.edge_helper import _is_dequantize, _is_quantize +from executorch.backends.nxp.backend.edge_program_converter import ( + EdgeProgramToIRConverter, +) from executorch.backends.nxp.backend.ir.converter.node_converters.ops_converters import ( ViewCopyConverter, ) -from executorch.backends.nxp.tests.executorch_pipeline import to_quantized_edge_program +from executorch.backends.nxp.tests.executorch_pipeline import ( + neutron_target_spec, + to_quantized_edge_program, +) from executorch.backends.nxp.tests.executors import ( EdgeProgramExecutor, OverrideTargetSupportCheck, ) -from executorch.backends.nxp.tests.models import ConvFCFCSoftmaxModuleWithoutReshape +from executorch.backends.nxp.tests.models import ( + ConvActivationModule, + ConvFCFCSoftmaxModuleWithoutReshape, + LinearActivationModule, +) from executorch.exir.dialects._ops import ops as exir_ops +from parameterized import parameterized +from torch.export import ExportedProgram from torch.fx import Graph, Node @@ -19,21 +42,6 @@ def _is_view_copy(node_: Node) -> bool: ) -def _is_dequantize(node_: Node) -> bool: - return ( - node_.op == "call_function" - and node_.target.__name__ - == "quantized_decomposed.dequantize_per_tensor.default" - ) - - -def _is_quantize(node_: Node) -> bool: - return ( - node_.op == "call_function" - and node_.target.__name__ == "quantized_decomposed.quantize_per_tensor.default" - ) - - def _find_view_copy_node_indices(graph_nodes: list[Node]) -> list[int]: view_copy_nodes_indices = [] @@ -57,32 +65,211 @@ def _assert_nodes_form_a_view_copy_qdq_cluster(graph: Graph, node_indices: list[ assert quantize.args[0] == view_copy -def test_moving_view_copy_into_separate_qdq_clusters(): - model = ConvFCFCSoftmaxModuleWithoutReshape() - input_shape = (1, 4, 3, 33) +class TestEdgePasses(unittest.TestCase): + @classmethod + def setUpClass(cls): + torch.manual_seed(23) + np.random.seed(42) + + def test_moving_view_copy_into_separate_qdq_clusters(self): + model = ConvFCFCSoftmaxModuleWithoutReshape() + input_shape = (1, 4, 3, 33) + + # Prohibit `view_copy` conversion for the testing purposes. + def unsupported_target(*_): + return False + + with OverrideTargetSupportCheck( + ViewCopyConverter, new_target_support_check=unsupported_target + ): + epm = to_quantized_edge_program(model, input_shape, target="imxrt700") + exported_program = epm.exported_program() + + nodes = list(exported_program.graph_module.graph.nodes) + assert len(nodes) == 28 - # Prohibit `view_copy` conversion for the testing purposes. - def unsupported_target(*_): - return False + view_copy_indices = _find_view_copy_node_indices(nodes) - with OverrideTargetSupportCheck( - ViewCopyConverter, new_target_support_check=unsupported_target + assert len(view_copy_indices) == 4 + for idx in view_copy_indices: + _assert_nodes_form_a_view_copy_qdq_cluster( + exported_program.graph, node_indices=[idx - 1, idx, idx + 1] + ) + + # Make sure the program is runnable. + input_data = np.random.random(input_shape).astype("float32") + program_executor = EdgeProgramExecutor(exported_program) + program_executor.inference(input_data) + + @parameterized.expand( + [ + ["relu"], + ["relu6"], + ["tanh"], + ["sigmoid"], + ] + ) + def test_moving_fusable_activations_into_separate_qdq_clusters__addmm( + self, activation ): - epm = to_quantized_edge_program(model, input_shape, target="imxrt700") - exported_program = epm.exported_program() + with kgb.spy_on( + EdgeProgramToIRConverter.convert_program, + call_original=True, + owner=EdgeProgramToIRConverter, + ) as converter_spy: - nodes = list(exported_program.graph_module.graph.nodes) - assert len(nodes) == 28 + input_shape = (1, 4) + model = LinearActivationModule( + activation=activation, + inplace=True, + in_channels=input_shape[1], + mode="addmm", + ) - view_copy_indices = _find_view_copy_node_indices(nodes) + _ = to_quantized_edge_program(model, input_shape) + exported_program: ExportedProgram = converter_spy.calls[-1].args[0] - assert len(view_copy_indices) == 4 - for idx in view_copy_indices: - _assert_nodes_form_a_view_copy_qdq_cluster( - exported_program.graph, node_indices=[idx - 1, idx, idx + 1] + # Check linear and activation are in separate QDQ clusters + nodes = list(exported_program.graph.nodes) + assert len(nodes) == 12 + assert _is_dequantize(nodes[5]) + assert ( + neutron_target_spec.neutron_target_info.is_fusable_conv_or_linear__edge( + nodes[6] + ) + ) + assert _is_quantize(nodes[7]) + assert _is_dequantize(nodes[8]) + assert neutron_target_spec.neutron_target_info.is_supported_fused_activation__edge( + nodes[9] ) + assert _is_quantize(nodes[10]) - # Make sure the program is runnable. - input_data = np.random.random(input_shape).astype("float32") - program_executor = EdgeProgramExecutor(exported_program) - program_executor.inference(input_data) + @parameterized.expand( + [ + ["relu"], + ["relu6"], + ["tanh"], + ["sigmoid"], + ] + ) + def test_moving_fusable_activations_into_separate_qdq_clusters__mm( + self, activation + ): + with kgb.spy_on( + EdgeProgramToIRConverter.convert_program, + call_original=True, + owner=EdgeProgramToIRConverter, + ) as converter_spy: + + input_shape = (1, 4) + model = LinearActivationModule( + activation=activation, + inplace=True, + in_channels=input_shape[1], + mode="mm", + ) + + _ = to_quantized_edge_program(model, input_shape) + exported_program: ExportedProgram = converter_spy.calls[-1].args[0] + + # Check linear and activation are in separate QDQ clusters + nodes = list(exported_program.graph.nodes) + assert len(nodes) == 10 + assert _is_dequantize(nodes[3]) + assert ( + neutron_target_spec.neutron_target_info.is_fusable_conv_or_linear__edge( + nodes[4] + ) + ) + assert _is_quantize(nodes[5]) + assert _is_dequantize(nodes[6]) + assert neutron_target_spec.neutron_target_info.is_supported_fused_activation__edge( + nodes[7] + ) + assert _is_quantize(nodes[8]) + + @parameterized.expand( + [ + ["relu"], + ["relu6"], + ["tanh"], + ["sigmoid"], + ] + ) + def test_moving_fusable_activations_into_separate_qdq_clusters__linear( + self, activation + ): + with kgb.spy_on( + EdgeProgramToIRConverter.convert_program, + call_original=True, + owner=EdgeProgramToIRConverter, + ) as converter_spy: + + input_shape = (1, 4) + model = LinearActivationModule( + activation=activation, + inplace=True, + in_channels=input_shape[1], + mode="linear", + ) + + _ = to_quantized_edge_program(model, input_shape) + exported_program: ExportedProgram = converter_spy.calls[-1].args[0] + + # Check linear and activation are in separate QDQ clusters + nodes = list(exported_program.graph.nodes) + assert len(nodes) == 13 + assert _is_dequantize(nodes[5]) + assert ( + neutron_target_spec.neutron_target_info.is_fusable_conv_or_linear__edge( + nodes[7] + ) + ) + assert _is_quantize(nodes[8]) + assert _is_dequantize(nodes[9]) + assert neutron_target_spec.neutron_target_info.is_supported_fused_activation__edge( + nodes[10] + ) + assert _is_quantize(nodes[11]) + + @parameterized.expand( + [ + ["relu"], + ["relu6"], + ["tanh"], + ["sigmoid"], + ] + ) + def test_moving_fusable_activations_into_separate_qdq_clusters__conv( + self, activation + ): + with kgb.spy_on( + EdgeProgramToIRConverter.convert_program, + call_original=True, + owner=EdgeProgramToIRConverter, + ) as converter_spy: + + input_shape = (1, 4, 8, 8) + model = ConvActivationModule( + activation=activation, inplace=True, in_channels=input_shape[1] + ) + + _ = to_quantized_edge_program(model, input_shape) + exported_program: ExportedProgram = converter_spy.calls[-1].args[0] + + # Check linear and activation are in separate QDQ clusters + nodes = list(exported_program.graph.nodes) + assert len(nodes) == 16 + assert _is_dequantize(nodes[9]) + assert ( + neutron_target_spec.neutron_target_info.is_fusable_conv_or_linear__edge( + nodes[10] + ) + ) + assert _is_quantize(nodes[11]) + assert _is_dequantize(nodes[12]) + assert neutron_target_spec.neutron_target_info.is_supported_fused_activation__edge( + nodes[13] + ) + assert _is_quantize(nodes[14]) diff --git a/backends/nxp/tests/test_per_channel_conversion.py b/backends/nxp/tests/test_per_channel_conversion.py index 043ba8fc001..7dcabf46be0 100644 --- a/backends/nxp/tests/test_per_channel_conversion.py +++ b/backends/nxp/tests/test_per_channel_conversion.py @@ -30,7 +30,7 @@ ToChannelLastPreprocess, ) from executorch.backends.nxp.tests.models import Conv2dModule -from executorch.backends.nxp.tests.test_quantizer import _get_target_name +from executorch.exir.dialects._ops import ops as exir_ops from torch import fx from torch._ops import OpOverload @@ -144,10 +144,12 @@ def test_per_channel_convolution(self): nodes = list(exported_program.graph.nodes) - assert _get_target_name(nodes[8]).endswith( - "quantized_decomposed.dequantize_per_channel.default" + assert ( + nodes[8].target + == exir_ops.edge.quantized_decomposed.dequantize_per_channel.default ) - assert _get_target_name(nodes[9]).endswith( - "quantized_decomposed.dequantize_per_channel.default" + assert ( + nodes[9].target + == exir_ops.edge.quantized_decomposed.dequantize_per_channel.default ) - assert nodes[10].name == "aten_convolution_default" + assert nodes[10].target == exir_ops.edge.aten.convolution.default diff --git a/backends/nxp/tests/test_quantizer.py b/backends/nxp/tests/test_quantizer.py index 624e350ed21..0cc6fbfbc2f 100644 --- a/backends/nxp/tests/test_quantizer.py +++ b/backends/nxp/tests/test_quantizer.py @@ -7,14 +7,41 @@ from copy import deepcopy +import executorch.backends.nxp.tests.executorch_pipeline as executorch_pipeline import executorch.backends.nxp.tests.models as models +import numpy as np +import pytest import torch + +from executorch.backends.nxp.backend.edge_program_converter import ( + EdgeProgramToIRConverter, +) + from executorch.backends.nxp.quantizer.neutron_quantizer import NeutronQuantizer +from executorch.backends.nxp.tests.executorch_pipeline import ( + neutron_target_spec, + to_quantized_edge_program, +) +from executorch.backends.nxp.tests.executors import ( + convert_run_compare, + graph_contains_any_of_ops, + ToChannelFirstPreprocess, + ToChannelLastPreprocess, +) +from executorch.exir.dialects._ops import ops as exir_ops +from torch.export import ExportedProgram +from torch.fx import GraphModule from torchao.quantization.pt2e.quantize_pt2e import convert_pt2e, prepare_pt2e - -def _get_target_name(node): - return node._pretty_print_target(node.target) +fuse_activation_ops = [ + exir_ops.edge.aten.addmm.default, + exir_ops.edge.aten.mm.default, + exir_ops.edge.aten.convolution.default, + exir_ops.edge.aten.hardtanh.default, + exir_ops.edge.aten.relu.default, + exir_ops.edge.aten.sigmoid.default, + exir_ops.edge.aten.tanh.default, +] def test_quantizer_conv2d(): @@ -22,7 +49,7 @@ def test_quantizer_conv2d(): model.eval() example_input = (torch.ones(1, 4, 32, 32),) - quantizer = NeutronQuantizer() + quantizer = NeutronQuantizer(neutron_target_spec) graph_module = torch.export.export(model, example_input, strict=True).module() # noinspection PyTypeChecker @@ -38,22 +65,21 @@ def test_quantizer_conv2d(): assert nodes[11].name == "conv2d" # [0]: Input, [1] : weights, [2]: bias assert ( - _get_target_name(nodes[11].args[0]) - == "torch.ops.quantized_decomposed.dequantize_per_tensor.default" + nodes[11].args[0].target + == torch.ops.quantized_decomposed.dequantize_per_tensor.default ) assert ( - _get_target_name(nodes[11].args[1]) - == "torch.ops.quantized_decomposed.dequantize_per_channel.default" + nodes[11].args[1].target + == torch.ops.quantized_decomposed.dequantize_per_channel.default ) assert ( - _get_target_name(nodes[11].args[2]) - == "torch.ops.quantized_decomposed.dequantize_per_channel.default" + nodes[11].args[2].target + == torch.ops.quantized_decomposed.dequantize_per_channel.default ) assert ( - _get_target_name(nodes[12]) - == "torch.ops.quantized_decomposed.quantize_per_tensor.default" + nodes[12].target == torch.ops.quantized_decomposed.quantize_per_tensor.default ) - assert nodes[12].args[0].name == "conv2d" + assert nodes[12].args[0].target == torch.ops.aten.conv2d.default def test_quantizer_linear(): @@ -61,7 +87,7 @@ def test_quantizer_linear(): model.eval() example_input = (torch.ones(10, 32),) - quantizer = NeutronQuantizer() + quantizer = NeutronQuantizer(neutron_target_spec) graph_module = torch.export.export(model, example_input, strict=True).module() # noinspection PyTypeChecker @@ -77,22 +103,19 @@ def test_quantizer_linear(): assert nodes[7].name == "linear" # [0]: Input, [1] : weights, [2]: bias assert ( - _get_target_name(nodes[7].args[0]) - == "torch.ops.quantized_decomposed.dequantize_per_tensor.default" - ) - assert ( - _get_target_name(nodes[7].args[1]) - == "torch.ops.quantized_decomposed.dequantize_per_tensor.default" + nodes[7].args[0].target + == torch.ops.quantized_decomposed.dequantize_per_tensor.default ) assert ( - _get_target_name(nodes[7].args[2]) - == "torch.ops.quantized_decomposed.dequantize_per_tensor.default" + nodes[7].args[1].target + == torch.ops.quantized_decomposed.dequantize_per_tensor.default ) assert ( - _get_target_name(nodes[8]) - == "torch.ops.quantized_decomposed.quantize_per_tensor.default" + nodes[7].args[2].target + == torch.ops.quantized_decomposed.dequantize_per_tensor.default ) - assert nodes[8].args[0].name == "linear" + assert nodes[8].target == torch.ops.quantized_decomposed.quantize_per_tensor.default + assert nodes[8].args[0].target == torch.ops.aten.linear.default def test_quantizer_maxpool2d(): @@ -100,7 +123,7 @@ def test_quantizer_maxpool2d(): model.eval() example_input = (torch.ones(1, 8, 32, 32),) - quantizer = NeutronQuantizer() + quantizer = NeutronQuantizer(neutron_target_spec) graph_module = torch.export.export(model, example_input, strict=True).module() # noinspection PyTypeChecker @@ -114,16 +137,15 @@ def test_quantizer_maxpool2d(): nodes = list(m.graph.nodes) assert len(nodes) == 18 # Check if QDQ pattern: - assert nodes[14].name == "max_pool2d" + assert nodes[14].target == torch.ops.aten.max_pool2d.default assert ( - _get_target_name(nodes[14].args[0]) - == "torch.ops.quantized_decomposed.dequantize_per_tensor.default" + nodes[14].args[0].target + == torch.ops.quantized_decomposed.dequantize_per_tensor.default ) assert ( - _get_target_name(nodes[15]) - == "torch.ops.quantized_decomposed.quantize_per_tensor.default" + nodes[15].target == torch.ops.quantized_decomposed.quantize_per_tensor.default ) - assert nodes[15].args[0].name == "max_pool2d" + assert nodes[15].args[0].target == torch.ops.aten.max_pool2d.default # Check if input and output quantization is same input_quant = nodes[14].args[0].args[1:] @@ -136,7 +158,7 @@ def test_quantizer_softmax(): model.eval() example_input = (torch.ones(1, 10),) - quantizer = NeutronQuantizer() + quantizer = NeutronQuantizer(neutron_target_spec) graph_module = torch.export.export(model, example_input, strict=True).module() # noinspection PyTypeChecker @@ -150,16 +172,13 @@ def test_quantizer_softmax(): nodes = list(m.graph.nodes) assert len(nodes) == 7 # Check if QDQ pattern: - assert nodes[3].name == "softmax" - assert ( - _get_target_name(nodes[3].args[0]) - == "torch.ops.quantized_decomposed.dequantize_per_tensor.default" - ) + assert nodes[3].target == torch.ops.aten.softmax.int assert ( - _get_target_name(nodes[4]) - == "torch.ops.quantized_decomposed.quantize_per_tensor.default" + nodes[3].args[0].target + == torch.ops.quantized_decomposed.dequantize_per_tensor.default ) - assert nodes[4].args[0].name == "softmax" + assert nodes[4].target == torch.ops.quantized_decomposed.quantize_per_tensor.default + assert nodes[4].args[0].target == torch.ops.aten.softmax.int # Check output quantization scale, zp, _, _, dtype = nodes[4].args[1:] @@ -173,7 +192,7 @@ def test_quantizer_single_maxpool2d(): model.eval() example_input = (torch.ones(1, 4, 32, 32),) - quantizer = NeutronQuantizer() + quantizer = NeutronQuantizer(neutron_target_spec) graph_module = torch.export.export(model, example_input, strict=True).module() # noinspection PyTypeChecker @@ -186,7 +205,7 @@ def test_quantizer_single_maxpool2d(): nodes = list(m.graph.nodes) assert len(nodes) == 7 - assert nodes[3].name == "max_pool2d" + assert nodes[3].target == torch.ops.aten.max_pool2d.default assert "quantization_annotation" not in nodes[1].meta @@ -195,7 +214,7 @@ def test_quantizer_conv2d_relu(): model.eval() example_input = (torch.ones(1, 4, 32, 32),) - quantizer = NeutronQuantizer() + quantizer = NeutronQuantizer(neutron_target_spec) graph_module = torch.export.export(model, example_input, strict=True).module() # noinspection PyTypeChecker @@ -207,10 +226,14 @@ def test_quantizer_conv2d_relu(): m(*example_input) nodes = list(m.graph.nodes) - assert len(nodes) == 14 - assert nodes[9].name == "dequantize_per_tensor_default_1" - assert nodes[10].name == "relu" - assert nodes[11].name == "quantize_per_tensor_default_2" + + assert len(nodes) == 12 + assert ( + nodes[6].target == torch.ops.quantized_decomposed.dequantize_per_tensor.default + ) + assert nodes[7].target == torch.ops.aten.conv2d.default + assert nodes[8].target == torch.ops.aten.relu.default + assert nodes[9].target == torch.ops.quantized_decomposed.quantize_per_tensor.default def test_quantizer_conv2d_avg_pool2d(): @@ -218,7 +241,7 @@ def test_quantizer_conv2d_avg_pool2d(): model.eval() example_input = (torch.ones(1, 4, 16, 16),) - quantizer = NeutronQuantizer() + quantizer = NeutronQuantizer(neutron_target_spec) graph_module = torch.export.export(model, example_input, strict=True).module() # noinspection PyTypeChecker @@ -230,10 +253,15 @@ def test_quantizer_conv2d_avg_pool2d(): m(*example_input) nodes = list(m.graph.nodes) + assert len(nodes) == 18 - assert nodes[13].name == "dequantize_per_tensor_default_1" - assert nodes[14].name == "avg_pool2d" - assert nodes[15].name == "quantize_per_tensor_default_2" + assert ( + nodes[13].target == torch.ops.quantized_decomposed.dequantize_per_tensor.default + ) + assert nodes[14].target == torch.ops.aten.avg_pool2d.default + assert ( + nodes[15].target == torch.ops.quantized_decomposed.quantize_per_tensor.default + ) def test_quantizer_conv2d_permute(): @@ -241,7 +269,7 @@ def test_quantizer_conv2d_permute(): model.eval() example_input = (torch.ones(1, 4, 16, 16),) - quantizer = NeutronQuantizer() + quantizer = NeutronQuantizer(neutron_target_spec) graph_module = torch.export.export(model, example_input, strict=True).module() # noinspection PyTypeChecker @@ -255,9 +283,13 @@ def test_quantizer_conv2d_permute(): nodes = list(m.graph.nodes) assert len(nodes) == 14 - assert nodes[9].name == "dequantize_per_tensor_default_1" - assert nodes[10].name == "permute" - assert nodes[11].name == "quantize_per_tensor_default_2" + assert ( + nodes[9].target == torch.ops.quantized_decomposed.dequantize_per_tensor.default + ) + assert nodes[10].target == torch.ops.aten.permute.default + assert ( + nodes[11].target == torch.ops.quantized_decomposed.quantize_per_tensor.default + ) def test_multiple_shared_spec_ops_in_row(): @@ -269,7 +301,7 @@ def test_multiple_shared_spec_ops_in_row(): model.eval() example_input = (torch.ones(1, 3, 64, 64),) - quantizer = NeutronQuantizer() + quantizer = NeutronQuantizer(neutron_target_spec) graph_module = torch.export.export(model, example_input, strict=True).module() # noinspection PyTypeChecker @@ -282,10 +314,14 @@ def test_multiple_shared_spec_ops_in_row(): nodes = list(m.graph.nodes) - assert len(nodes) == 17 - assert nodes[-5].name.startswith("dequantize_per_tensor_default") - assert nodes[-4].name == "max_pool2d" - assert nodes[-3].name.startswith("quantize_per_tensor_default") + assert len(nodes) == 15 + assert ( + nodes[-5].target == torch.ops.quantized_decomposed.dequantize_per_tensor.default + ) + assert nodes[-4].target == torch.ops.aten.max_pool2d.default + assert ( + nodes[-3].target == torch.ops.quantized_decomposed.quantize_per_tensor.default + ) # Assert that post-ReLU quantize and pre-MaxPool dequantize has same specs assert nodes[-6].args[1:] == nodes[-5].args[1:] @@ -302,7 +338,7 @@ def test_quantizers_order_invariance(): model.eval() example_input = (torch.ones(1, 4, 64, 64),) - quantizer = NeutronQuantizer() + quantizer = NeutronQuantizer(neutron_target_spec) graph_module = torch.export.export(model, example_input, strict=True).module() @@ -324,3 +360,214 @@ def test_quantizers_order_invariance(): assert len(nodes) == len(nodes_reversed) assert all(n == n_reversed for n, n_reversed in zip(nodes, nodes_reversed)) + + +@pytest.mark.parametrize( + "activation, inplace", + [ + ("relu", True), + ("relu", False), + ("relu6", True), + ("relu6", False), + ("tanh", True), + ("tanh", False), + ("sigmoid", False), + ], +) +def test_quantizer__linear_w_activation(mocker, activation, inplace): + converter_spy = mocker.spy(EdgeProgramToIRConverter, "convert_program") + quantizer_spy = mocker.spy(executorch_pipeline, "_quantize_model") + + input_shape = (1, 4) + model = models.LinearActivationModule( + activation=activation, + inplace=inplace, + in_channels=input_shape[1], + mode="linear", + ) + + edge_program = to_quantized_edge_program(model, input_shape).exported_program() + + # Make sure that all nodes were delegated. + assert not graph_contains_any_of_ops( + graph=edge_program.graph, + ops=fuse_activation_ops, + ) + assert any("lowered_module" in node.name for node in edge_program.graph.nodes) + + tflite_flatbuffers_model, io_formats = converter_spy.spy_return + exported_program: ExportedProgram = converter_spy.call_args.args[1] + exir_program_aten_quant: GraphModule = quantizer_spy.spy_return + + # Check linear and activation are in the same QDQ cluster + nodes = list(exir_program_aten_quant.graph.nodes) + assert len(nodes) == 12 + assert neutron_target_spec.neutron_target_info.is_fusable_conv_or_linear(nodes[7]) + assert neutron_target_spec.neutron_target_info.is_supported_fused_activation( + nodes[8] + ) + assert nodes[9].target == torch.ops.quantized_decomposed.quantize_per_tensor.default + input_data = (np.random.random(input_shape).astype(np.float32) * 50).astype(np.int8) + convert_run_compare( + exported_program, + input_data, + tfl_model=tflite_flatbuffers_model, + atol=1.0, + ) + + +@pytest.mark.parametrize( + "activation, inplace", + [ + ("relu", True), + ("relu", False), + ("relu6", True), + ("relu6", False), + ("tanh", True), + ("tanh", False), + ("sigmoid", False), + ], +) +def test_quantizer__addmm_w_activation(mocker, activation, inplace): + converter_spy = mocker.spy(EdgeProgramToIRConverter, "convert_program") + quantizer_spy = mocker.spy(executorch_pipeline, "_quantize_model") + + input_shape = (1, 4) + model = models.LinearActivationModule( + activation=activation, inplace=inplace, in_channels=input_shape[1], mode="addmm" + ) + + edge_program = to_quantized_edge_program(model, input_shape).exported_program() + + # Make sure that all nodes were delegated. + assert not graph_contains_any_of_ops( + graph=edge_program.graph, + ops=fuse_activation_ops, + ) + assert any("lowered_module" in node.name for node in edge_program.graph.nodes) + + tflite_flatbuffers_model, io_formats = converter_spy.spy_return + exported_program: ExportedProgram = converter_spy.call_args.args[1] + exir_program_aten_quant: GraphModule = quantizer_spy.spy_return + + # Check linear and activation are in the same QDQ cluster + nodes = list(exir_program_aten_quant.graph.nodes) + assert len(nodes) == 12 + assert neutron_target_spec.neutron_target_info.is_fusable_conv_or_linear(nodes[7]) + assert neutron_target_spec.neutron_target_info.is_supported_fused_activation( + nodes[8] + ) + assert nodes[9].target == torch.ops.quantized_decomposed.quantize_per_tensor.default + input_data = (np.random.random(input_shape).astype(np.float32) * 50).astype(np.int8) + convert_run_compare( + exported_program, + input_data, + tfl_model=tflite_flatbuffers_model, + atol=1.0, + ) + + +@pytest.mark.parametrize( + "activation, inplace", + [ + ("relu", True), + ("relu", False), + ("relu6", True), + ("relu6", False), + ("tanh", True), + ("tanh", False), + ("sigmoid", False), + ], +) +def test_quantizer__mm_w_activation(mocker, activation, inplace): + converter_spy = mocker.spy(EdgeProgramToIRConverter, "convert_program") + quantizer_spy = mocker.spy(executorch_pipeline, "_quantize_model") + + input_shape = (1, 4) + model = models.LinearActivationModule( + activation=activation, inplace=inplace, in_channels=input_shape[1], mode="mm" + ) + + edge_program = to_quantized_edge_program(model, input_shape).exported_program() + + # Make sure that all nodes were delegated. + assert not graph_contains_any_of_ops( + graph=edge_program.graph, + ops=fuse_activation_ops, + ) + assert any("lowered_module" in node.name for node in edge_program.graph.nodes) + + tflite_flatbuffers_model, io_formats = converter_spy.spy_return + exported_program: ExportedProgram = converter_spy.call_args.args[1] + exir_program_aten_quant: GraphModule = quantizer_spy.spy_return + + # Check linear and activation are in the same QDQ cluster + nodes = list(exir_program_aten_quant.graph.nodes) + assert len(nodes) == 10 + assert neutron_target_spec.neutron_target_info.is_fusable_conv_or_linear(nodes[5]) + assert neutron_target_spec.neutron_target_info.is_supported_fused_activation( + nodes[6] + ) + assert nodes[7].target == torch.ops.quantized_decomposed.quantize_per_tensor.default + input_data = (np.random.random(input_shape).astype(np.float32) * 50).astype(np.int8) + convert_run_compare( + exported_program, + input_data, + tfl_model=tflite_flatbuffers_model, + atol=1.0, + ) + + +@pytest.mark.parametrize( + "activation, inplace", + [ + ("relu", True), + ("relu", False), + ("relu6", True), + ("relu6", False), + ("tanh", True), + ("tanh", False), + ("sigmoid", False), + ], +) +def test_quantizer__conv_w_activation(mocker, activation, inplace): + converter_spy = mocker.spy(EdgeProgramToIRConverter, "convert_program") + quantizer_spy = mocker.spy(executorch_pipeline, "_quantize_model") + + input_shape = (1, 4, 8, 8) + model = models.ConvActivationModule( + activation=activation, inplace=inplace, in_channels=input_shape[1] + ) + + edge_program = to_quantized_edge_program(model, input_shape).exported_program() + + # Make sure that all nodes were delegated. + assert not graph_contains_any_of_ops( + graph=edge_program.graph, + ops=fuse_activation_ops, + ) + assert any("lowered_module" in node.name for node in edge_program.graph.nodes) + + tflite_flatbuffers_model, io_formats = converter_spy.spy_return + exported_program: ExportedProgram = converter_spy.call_args.args[1] + exir_program_aten_quant: GraphModule = quantizer_spy.spy_return + + # Check linear and activation are in the same QDQ cluster + nodes = list(exir_program_aten_quant.graph.nodes) + assert len(nodes) == 16 + assert neutron_target_spec.neutron_target_info.is_fusable_conv_or_linear(nodes[11]) + assert neutron_target_spec.neutron_target_info.is_supported_fused_activation( + nodes[12] + ) + assert ( + nodes[13].target == torch.ops.quantized_decomposed.quantize_per_tensor.default + ) + input_data = (np.random.random(input_shape).astype(np.float32) * 50).astype(np.int8) + convert_run_compare( + exported_program, + input_data, + tfl_model=tflite_flatbuffers_model, + tflite_input_preprocess=ToChannelLastPreprocess(), + tflite_output_preprocess=ToChannelFirstPreprocess(), + atol=1.0, + ) diff --git a/backends/nxp/tests/test_removing_dead_code.py b/backends/nxp/tests/test_removing_dead_code.py index cc51746c81c..00cb6775b3c 100644 --- a/backends/nxp/tests/test_removing_dead_code.py +++ b/backends/nxp/tests/test_removing_dead_code.py @@ -10,7 +10,10 @@ import torch from executorch.backends.nxp.quantizer.neutron_quantizer import NeutronQuantizer -from executorch.backends.nxp.tests.executorch_pipeline import _quantize_model +from executorch.backends.nxp.tests.executorch_pipeline import ( + _quantize_model, + neutron_target_spec, +) from executorch.backends.nxp.tests.executors import graph_contains_any_of_ops @@ -51,7 +54,7 @@ def test_removing_dead_code(self): ) # The `NeutronQuantizer` should remove the dead code in the `transform_for_annotation()` method. - quantizer = NeutronQuantizer() + quantizer = NeutronQuantizer(neutron_target_spec) exir_program_aten_quant = _quantize_model( exir_program_aten.module(), quantizer, [example_inputs] ) diff --git a/backends/nxp/tests/test_split_group_convolution.py b/backends/nxp/tests/test_split_group_convolution.py index 4c9f277e34d..52133b6c7e2 100644 --- a/backends/nxp/tests/test_split_group_convolution.py +++ b/backends/nxp/tests/test_split_group_convolution.py @@ -21,6 +21,7 @@ from executorch.backends.nxp.tests.executorch_pipeline import ( _quantize_model, get_random_calibration_inputs, + neutron_target_spec, to_model_input_spec, ) from executorch.backends.nxp.tests.executors import graph_contains_any_of_ops @@ -40,7 +41,7 @@ def _quantize_and_lower_module( module: GraphModule, input_shape: tuple[int, ...], target="imxrt700" ) -> EdgeProgramManager: calibration_inputs = get_random_calibration_inputs(to_model_input_spec(input_shape)) - quantizer = NeutronQuantizer() + quantizer = NeutronQuantizer(neutron_target_spec) exir_program_aten__module_quant = _quantize_model( module, quantizer, calibration_inputs diff --git a/examples/nxp/aot_neutron_compile.py b/examples/nxp/aot_neutron_compile.py index cb23f99a54d..4c90b3aefad 100644 --- a/examples/nxp/aot_neutron_compile.py +++ b/examples/nxp/aot_neutron_compile.py @@ -15,6 +15,7 @@ import executorch.kernels.quantized # noqa F401 import torch +from executorch.backends.nxp.backend.neutron_target_spec import NeutronTargetSpec from executorch.backends.nxp.edge_passes.neutron_edge_pass_manager import ( NeutronEdgePassManager, ) @@ -114,7 +115,10 @@ def post_training_quantize( # Based on executorch.examples.arm.aot_amr_compiler.quantize logging.info("Quantizing model") logging.debug(f"---> Original model: {model}") - quantizer = NeutronQuantizer() + neutron_target_spec = NeutronTargetSpec( + target="imxrt700", neutron_converter_flavor="wrapper" + ) + quantizer = NeutronQuantizer(neutron_target_spec) m = prepare_pt2e(model, quantizer) # Calibration: