NXP backend: Use per-channel quantization for Conv in NeutronQuantizer

skywall · StrycekSimon · commit b340ad1eef4f · 2025-09-23T09:34:22.000+02:00
diff --git a/backends/nxp/backend/ir/converter/node_converters/ops_converters/convolution_converter.py b/backends/nxp/backend/ir/converter/node_converters/ops_converters/convolution_converter.py
@@ -321,6 +321,10 @@ def _convert_2d_conv(
                 t_op.tmp_inputs[1] = self.builder.create_transposed_tensor(
                     weight_tensor, perm
                 )
+
+                if t_op.tmp_inputs[1].quantization is not None:
+                    # Model is quantized
+                    t_op.tmp_inputs[1].quantization.quantized_dimension = 3
             else:
                 raise NotImplementedError("Dynamic Depthwise Conv weights.")
 
diff --git a/backends/nxp/quantizer/patterns.py b/backends/nxp/quantizer/patterns.py
@@ -13,6 +13,7 @@
 from executorch.backends.nxp.quantizer.utils import get_bias_qparams
 from torch import fx
 from torch._ops import OpOverload
+from torchao.quantization.pt2e import PerChannelMinMaxObserver
 from torchao.quantization.pt2e.quantizer import (
     DerivedQuantizationSpec,
     FixedQParamsQuantizationSpec,
@@ -318,30 +319,39 @@ def partition_types(self) -> list[OpOverload]:
     def get_anchors(
         self, gm: fx.GraphModule, fused_partition: list[fx.GraphModule]
     ) -> PartitionAnchors:
-        # pyre-fixme[29]: `Union[BoundMethod[typing.Callable(torch._C.TensorBase.__ge...
         conv2d_node = fused_partition[0].nodes[-1]
 
-        bias_qspec = DerivedQuantizationSpec(
+        bias_quantization_qspec = DerivedQuantizationSpec(
             derived_from=[
                 (conv2d_node.args[0], conv2d_node),
                 (conv2d_node.args[1], conv2d_node),
             ],
             derive_qparams_fn=get_bias_qparams,
             dtype=torch.int32,
-            quant_min=-(2**31),
+            quant_min=-(2**31) + 1,
             quant_max=2**31 - 1,
-            qscheme=torch.per_tensor_affine,
+            qscheme=torch.per_channel_symmetric,
+            ch_axis=0,
+        )
+
+        weight_observer_or_fake_quant_ctr = PerChannelMinMaxObserver
+        weight_quantization_spec = QuantizationSpec(
+            dtype=torch.int8,
+            observer_or_fake_quant_ctr=weight_observer_or_fake_quant_ctr,
+            quant_min=-127,
+            quant_max=127,
+            qscheme=torch.per_channel_symmetric,
+            ch_axis=0,
         )
 
         # Keep bias empty if not supplied
         bias = []
         if len(conv2d_node.args) > 2 and conv2d_node.args[2] is not None:
-            bias = [(conv2d_node, NodeArgsIdx(2), bias_qspec)]
+            bias = [(conv2d_node, NodeArgsIdx(2), bias_quantization_qspec)]
 
         return PartitionAnchors(
             inputs=[(conv2d_node, NodeArgsIdx(0))],
-            weights=[(conv2d_node, NodeArgsIdx(1))],
-            # pyre-fixme[6]: Incompatible parameter type
+            weights=[(conv2d_node, NodeArgsIdx(1), weight_quantization_spec)],
             biases=bias,
             output=[(conv2d_node,)],
         )
diff --git a/backends/nxp/quantizer/utils.py b/backends/nxp/quantizer/utils.py
@@ -49,7 +49,7 @@ def get_bias_qparams(
     act_scale, _ = obs_or_fqs[0].calculate_qparams()
     weight_scale, _ = obs_or_fqs[1].calculate_qparams()
     bias_scale = act_scale * weight_scale
-    bias_zero_point = torch.zeros_like(bias_scale, dtype=torch.int32)
+    bias_zero_point = torch.zeros_like(bias_scale, dtype=torch.int64)
     return bias_scale, bias_zero_point
 
 
diff --git a/backends/nxp/tests/ir/converter/node_converter/test_hardtanh_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_hardtanh_converter.py
@@ -57,7 +57,7 @@ def test_relu6_quant(mocker, input_shape: tuple[int], inplace: bool):
         tflite_input_preprocess=ToNHWCPreprocess(),
         tflite_output_preprocess=ToNCHWPreprocess(),
         input_data=input_data,
-        atol=1.0,
+        atol=2.0,
     )
 
 
diff --git a/backends/nxp/tests/ir/converter/node_converter/test_mean_dim_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_mean_dim_converter.py
@@ -49,6 +49,7 @@ def test_mean_dim_conv_quant_conversion(mocker, input_shape, dim, keeepdim=True)
         input_data=input_data,
         tflite_output_preprocess=ToChannelFirstPreprocess(),
         tfl_model=tflite_flatbuffers_model,
+        atol=1.0,
     )
 
 
diff --git a/backends/nxp/tests/ir/converter/node_converter/test_tanh_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_tanh_converter.py
@@ -76,7 +76,7 @@ def test_conv_tanh(
                 tflite_input_preprocess=ToChannelLastPreprocess(),
                 tflite_output_preprocess=ToChannelFirstPreprocess(),
                 input_data=input_data,
-                atol=1.0,
+                atol=2.0,
             )
 
     @classmethod
diff --git a/backends/nxp/tests/test_batch_norm_fusion.py b/backends/nxp/tests/test_batch_norm_fusion.py
@@ -168,7 +168,7 @@ def test_batch_norm_conv_fusing__full_pipeline__1d(bias: bool):
     nodes = list(edge_program.graph.nodes)
 
     assert (
-        len(nodes) == 13
+        len(nodes) == 17
     )  # 1D Conv currently isn't delegated, because it doesn't get quantized.
     assert not any(
         node.op == "call_function" and "batch_norm" in node.target.__name__
diff --git a/backends/nxp/tests/test_qdq_clustering_conv.py b/backends/nxp/tests/test_qdq_clustering_conv.py
@@ -16,13 +16,13 @@ def test_conv2d_partitioner():
     lowered_module = edge_program.exported_program().graph_module.lowered_module_0
     nodes = list(lowered_module.original_module.graph.nodes)
 
-    assert len(nodes) == 7
+    assert len(nodes) == 9
 
-    q_x_node = nodes[1]
-    dq_w_node = nodes[2]
-    dq_x_node = nodes[3]
-    conv_node = nodes[4]
-    q_y_node = nodes[5]
+    q_x_node = nodes[3]
+    dq_w_node = nodes[4]
+    dq_x_node = nodes[5]
+    conv_node = nodes[6]
+    q_y_node = nodes[7]
 
     assert "cluster" not in q_x_node.meta
     assert dq_w_node.meta["cluster"] == "aten_convolution_default_cluster"
diff --git a/backends/nxp/tests/test_quantizer.py b/backends/nxp/tests/test_quantizer.py
@@ -1,4 +1,4 @@
-# Copyright 2024 NXP
+# Copyright 2024-2025 NXP
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
@@ -34,26 +34,26 @@ def test_quantizer_conv2d():
     m(*example_input)
 
     nodes = list(m.graph.nodes)
-    assert len(nodes) == 11
-    assert nodes[7].name == "conv2d"
+    assert len(nodes) == 15
+    assert nodes[11].name == "conv2d"
     # [0]: Input, [1] : weights, [2]: bias
     assert (
-        _get_target_name(nodes[7].args[0])
+        _get_target_name(nodes[11].args[0])
         == "torch.ops.quantized_decomposed.dequantize_per_tensor.default"
     )
     assert (
-        _get_target_name(nodes[7].args[1])
-        == "torch.ops.quantized_decomposed.dequantize_per_tensor.default"
+        _get_target_name(nodes[11].args[1])
+        == "torch.ops.quantized_decomposed.dequantize_per_channel.default"
     )
     assert (
-        _get_target_name(nodes[7].args[2])
-        == "torch.ops.quantized_decomposed.dequantize_per_tensor.default"
+        _get_target_name(nodes[11].args[2])
+        == "torch.ops.quantized_decomposed.dequantize_per_channel.default"
     )
     assert (
-        _get_target_name(nodes[8])
+        _get_target_name(nodes[12])
         == "torch.ops.quantized_decomposed.quantize_per_tensor.default"
     )
-    assert nodes[8].args[0].name == "conv2d"
+    assert nodes[12].args[0].name == "conv2d"
 
 
 def test_quantizer_linear():
@@ -112,22 +112,22 @@ def test_quantizer_maxpool2d():
     m(*example_input)
 
     nodes = list(m.graph.nodes)
-    assert len(nodes) == 14
+    assert len(nodes) == 18
     # Check if QDQ pattern:
-    assert nodes[10].name == "max_pool2d"
+    assert nodes[14].name == "max_pool2d"
     assert (
-        _get_target_name(nodes[10].args[0])
+        _get_target_name(nodes[14].args[0])
         == "torch.ops.quantized_decomposed.dequantize_per_tensor.default"
     )
     assert (
-        _get_target_name(nodes[11])
+        _get_target_name(nodes[15])
         == "torch.ops.quantized_decomposed.quantize_per_tensor.default"
     )
-    assert nodes[11].args[0].name == "max_pool2d"
+    assert nodes[15].args[0].name == "max_pool2d"
 
     # Check if input and output quantization is same
-    input_quant = nodes[10].args[0].args[1:]
-    output_quant = nodes[11].args[1:]
+    input_quant = nodes[14].args[0].args[1:]
+    output_quant = nodes[15].args[1:]
     assert input_quant == output_quant
 
 
@@ -207,10 +207,10 @@ def test_quantizer_conv2d_relu():
     m(*example_input)
 
     nodes = list(m.graph.nodes)
-    assert len(nodes) == 12
-    assert nodes[7].name == "dequantize_per_tensor_default_2"
-    assert nodes[8].name == "relu"
-    assert nodes[9].name == "quantize_per_tensor_default_3"
+    assert len(nodes) == 14
+    assert nodes[9].name == "dequantize_per_tensor_default_1"
+    assert nodes[10].name == "relu"
+    assert nodes[11].name == "quantize_per_tensor_default_2"
 
 
 def test_quantizer_conv2d_avg_pool2d():
@@ -230,10 +230,10 @@ def test_quantizer_conv2d_avg_pool2d():
     m(*example_input)
 
     nodes = list(m.graph.nodes)
-    assert len(nodes) == 14
-    assert nodes[9].name == "dequantize_per_tensor_default_3"
-    assert nodes[10].name == "avg_pool2d"
-    assert nodes[11].name == "quantize_per_tensor_default_4"
+    assert len(nodes) == 18
+    assert nodes[13].name == "dequantize_per_tensor_default_1"
+    assert nodes[14].name == "avg_pool2d"
+    assert nodes[15].name == "quantize_per_tensor_default_2"
 
 
 def test_quantizer_conv2d_permute():
@@ -253,10 +253,11 @@ def test_quantizer_conv2d_permute():
     m(*example_input)
 
     nodes = list(m.graph.nodes)
-    assert len(nodes) == 12
-    assert nodes[7].name == "dequantize_per_tensor_default_2"
-    assert nodes[8].name == "permute"
-    assert nodes[9].name == "quantize_per_tensor_default_3"
+
+    assert len(nodes) == 14
+    assert nodes[9].name == "dequantize_per_tensor_default_1"
+    assert nodes[10].name == "permute"
+    assert nodes[11].name == "quantize_per_tensor_default_2"
 
 
 def test_multiple_shared_spec_ops_in_row():
@@ -281,15 +282,15 @@ def test_multiple_shared_spec_ops_in_row():
 
     nodes = list(m.graph.nodes)
 
-    assert len(nodes) == 15
-    assert nodes[-5].name == "dequantize_per_tensor_default_3"
+    assert len(nodes) == 17
+    assert nodes[-5].name.startswith("dequantize_per_tensor_default")
     assert nodes[-4].name == "max_pool2d"
-    assert nodes[-3].name == "quantize_per_tensor_default_4"
+    assert nodes[-3].name.startswith("quantize_per_tensor_default")
 
     # Assert that post-ReLU quantize and pre-MaxPool dequantize has same specs
     assert nodes[-6].args[1:] == nodes[-5].args[1:]
     # Assert that post-Conv quantize and pre-ReLU dequantize has same specs
-    assert nodes[6].args[1:] == nodes[7].args[1:]
+    assert nodes[5].args[1:] == nodes[6].args[1:]
 
 
 def test_quantizers_order_invariance():

Original file line number	Diff line number	Diff line change
`@@ -57,7 +57,7 @@ def test_relu6_quant(mocker, input_shape: tuple[int], inplace: bool):`
`57`	`57`	`tflite_input_preprocess=ToNHWCPreprocess(),`
`58`	`58`	`tflite_output_preprocess=ToNCHWPreprocess(),`
`59`	`59`	`input_data=input_data,`
`60`		`- atol=1.0,`
	`60`	`+ atol=2.0,`
`61`	`61`	`)`
`62`	`62`
`63`	`63`
Original file line number	Diff line number	Diff line change
`@@ -49,6 +49,7 @@ def test_mean_dim_conv_quant_conversion(mocker, input_shape, dim, keeepdim=True)`
`49`	`49`	`input_data=input_data,`
`50`	`50`	`tflite_output_preprocess=ToChannelFirstPreprocess(),`
`51`	`51`	`tfl_model=tflite_flatbuffers_model,`
	`52`	`+ atol=1.0,`
`52`	`53`	`)`
`53`	`54`
`54`	`55`
Original file line number	Diff line number	Diff line change
`@@ -76,7 +76,7 @@ def test_conv_tanh(`
`76`	`76`	`tflite_input_preprocess=ToChannelLastPreprocess(),`
`77`	`77`	`tflite_output_preprocess=ToChannelFirstPreprocess(),`
`78`	`78`	`input_data=input_data,`
`79`		`- atol=1.0,`
	`79`	`+ atol=2.0,`
`80`	`80`	`)`
`81`	`81`
`82`	`82`	`@classmethod`