From 5bdcb24f7d457d800981af5d867cc328605552c8 Mon Sep 17 00:00:00 2001
From: Max Ren <maxren@meta.com>
Date: Mon, 16 Jun 2025 11:07:30 -0700
Subject: [PATCH] [Quantized DeConv Support] Enable Quantized Transposed Convs
 with groups==1

Supporting Quantized Transposed Convs with Groups being 1.

Previously, There was some added support for Quantized Transposed Convolutions but only when the channel axis is 1 and when the groups is 1. The current Quantizer didn't support this because it only allows quantizaing along the zero dim, which is generally the output channels. However for TransposedConvs, the dimension of the weights are:
```
[in_channels, out_channels/groups, h, w]
```

Since we want to keep quantization along the output channels, we now need to quantize along axis = 1.

The reason we require groups to be one is because XNNPACK takes in filters of the dimension:
```
[out_channels, H, W, in_channels/groups]
```

Since we are quantizing along the output channels, in pytorch we expect to have out_channels/groups scales, but in xnnpack we have out_channels scales! Realistically we would need to support this with some affine quantization, where we provide a scale for every group, every out_channel. However for now, we just ensure the constraint where groups == 1.

Differential Revision: [D76631781](https://our.internmc.facebook.com/intern/diff/D76631781/)

[ghstack-poisoned]
---
 .../quantizer/xnnpack_quantizer_utils.py      |  28 +++-
 backends/xnnpack/test/ops/test_conv2d.py      | 130 +++++-------------
 2 files changed, 58 insertions(+), 100 deletions(-)

diff --git a/backends/xnnpack/quantizer/xnnpack_quantizer_utils.py b/backends/xnnpack/quantizer/xnnpack_quantizer_utils.py
index 0dcfb4484ed..2ebf69da4f5 100644
--- a/backends/xnnpack/quantizer/xnnpack_quantizer_utils.py
+++ b/backends/xnnpack/quantizer/xnnpack_quantizer_utils.py
@@ -238,7 +238,19 @@ def _do_annotate_conv(
 
         weight = conv_node.args[1]
         assert isinstance(weight, Node)
-        input_qspec_map[weight] = get_weight_qspec(quantization_config)
+        weight_qspec = get_weight_qspec(quantization_config)
+        if is_conv_transpose:
+            # transposed convs per output channel quantization
+            weight_qspec = QuantizationSpec(
+                dtype=weight_qspec.dtype,
+                quant_min=weight_qspec.quant_min,
+                quant_max=weight_qspec.quant_max,
+                qscheme=weight_qspec.qscheme,
+                ch_axis=1,
+                is_dynamic=False,
+                observer_or_fake_quant_ctr=weight_qspec.observer_or_fake_quant_ctr,
+            )
+        input_qspec_map[weight] = weight_qspec
 
         # Only annotate dynamically quantized conv if it's 2D and not depthwise
         if (
@@ -311,7 +323,19 @@ def _do_annotate_conv_relu(
 
         weight = conv_node.args[1]
         assert isinstance(weight, Node)
-        input_qspec_map[weight] = get_weight_qspec(quantization_config)
+        weight_qspec = get_weight_qspec(quantization_config)
+        if is_conv_transpose:
+            # transposed convs per output channel quantization
+            weight_qspec = QuantizationSpec(
+                dtype=weight_qspec.dtype,
+                quant_min=weight_qspec.quant_min,
+                quant_max=weight_qspec.quant_max,
+                qscheme=weight_qspec.qscheme,
+                ch_axis=1,
+                is_dynamic=False,
+                observer_or_fake_quant_ctr=weight_qspec.observer_or_fake_quant_ctr,
+            )
+        input_qspec_map[weight] = weight_qspec
 
         # adding weight node to the partition as well
         partition = [relu_node, conv_node, conv_node.args[1]]
diff --git a/backends/xnnpack/test/ops/test_conv2d.py b/backends/xnnpack/test/ops/test_conv2d.py
index 92bb03c907a..d838ef0ffe9 100644
--- a/backends/xnnpack/test/ops/test_conv2d.py
+++ b/backends/xnnpack/test/ops/test_conv2d.py
@@ -221,7 +221,6 @@ def _test(
         conv_count=1,
         dtype: torch.dtype = torch.float,
         check_quantized=True,
-        delegated=True,
     ):
         # pyre-fixme[29]: `Union[torch._tensor.Tensor,
         #  torch.nn.modules.module.Module]` is not a function.
@@ -240,29 +239,20 @@ def _test(
 
         (tester.export().check_count({op: conv_count}).to_edge_transform_and_lower())
 
-        if delegated:
-            (
-                tester.check_not(
-                    ["executorch_exir_dialects_edge__ops_aten_convolution_default"]
-                )
-                .check_not(
-                    [
-                        "executorch_exir_dialects_edge__ops__native_batch_norm_legit_no_training_default"
-                    ]
-                )
-                .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
-                .to_executorch()
-                .serialize()
-                .run_method_and_compare_outputs(qtol=1)
+        (
+            tester.check_not(
+                ["executorch_exir_dialects_edge__ops_aten_convolution_default"]
             )
-        else:
-            # need quantize ops when ops are not delegated to xnnpack
-            if has_quantized_ops:
-                (
-                    tester.to_executorch()
-                    .serialize()
-                    .run_method_and_compare_outputs(qtol=1)
-                )
+            .check_not(
+                [
+                    "executorch_exir_dialects_edge__ops__native_batch_norm_legit_no_training_default"
+                ]
+            )
+            .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
+            .to_executorch()
+            .serialize()
+            .run_method_and_compare_outputs(qtol=1)
+        )
 
     def _test_dq(
         self,
@@ -325,7 +315,6 @@ def test_qs8_conv2d_per_channel(self) -> None:
             self._test(
                 Conv2d(transpose=transpose),
                 quant_config=get_symmetric_quantization_config(is_per_channel=True),
-                delegated=not transpose,  # XNNPACK does not support per input channel quantization for transpose convolutions with groups > 1
             )
 
     def test_fp32_conv2d_seq(self) -> None:
@@ -485,7 +474,6 @@ def get_inputs(self):
             self._test(
                 ConvReLU(transpose=transpose),
                 quant_config=get_symmetric_quantization_config(is_per_channel=True),
-                delegated=not transpose,  # XNNPACK does not support per input channel quantization for transpose convolutions with groups > 1
             )
 
     def test_qs8_conv2d_dw_relu(self):
@@ -537,8 +525,6 @@ def get_inputs(self):
                     quant_config=get_symmetric_quantization_config(
                         is_per_channel=per_channel_quant
                     ),
-                    # XNNPACK does not support per input channel quantization for transpose convolutions with groups > 1
-                    delegated=not (transpose and per_channel_quant),
                 )
 
     def test_qs8_conv2d_relu_seq(self):
@@ -593,7 +579,7 @@ def get_inputs(self):
                 conv_count=2,
             )
 
-    def test_qs8_conv_transpose_2d_quantize_per_channel(self):
+    def test_qs8_conv_transpose_2d_quantize_per_channel_multi_axis(self):
         class PerChannelConvTranspose2d(torch.nn.Module):
             def __init__(self, input_channels, output_channels, groups, axis):
                 super().__init__()
@@ -662,76 +648,24 @@ def get_inputs(self):
                 )
 
         for groups in (1, 2):
-            for axis in (0, 1):
-                self._test(
-                    PerChannelConvTranspose2d(3 * groups, 5 * groups, groups, axis),
-                    quant_config=None,
-                    conv_count=1,
-                    delegated=axis == 1
-                    and groups
-                    == 1,  # xnnpack only support output channel axis quantization with groups == 1
-                )
-
-    def test_qs8_conv_transpose_2d_dqd_f32_weights(self):
-        class TransposeConv2dDQDf32weights(torch.nn.Module):
-            def __init__(self, input_channels, output_channels, groups, axis):
-                super().__init__()
-                self.input_channels = input_channels
-                self.output_channels = output_channels
-                self.axis = axis
-                self.groups = groups
-                self.transpose = True
-                self.weights = torch.nn.Parameter(
-                    torch.randn((input_channels, output_channels // groups, 4, 4)),
-                    requires_grad=False,
-                )
-
-                axis_size = self.weights.shape[axis]
-                self.scale = torch.nn.Parameter(torch.ones(axis_size) * 0.12345)
-                self.zero_point = torch.nn.Parameter(
-                    torch.zeros((axis_size,), dtype=torch.int64), requires_grad=False
-                )
-
-            def forward(self, x):
-                dequantize_input = (
-                    exir_ops.edge.quantized_decomposed.dequantize_per_tensor.default(
-                        x, 0.12345, 0, -127, 127, torch.int8
+            for ch_axis in (1, 2):
+                if ch_axis == 1 and groups == 1:
+                    self._test(
+                        PerChannelConvTranspose2d(
+                            3 * groups, 5 * groups, groups, ch_axis
+                        ),  # ch_axis=0
+                        quant_config=None,
+                        conv_count=1,
                     )
-                )
-                x = torch.nn.functional.conv_transpose2d(
-                    dequantize_input, self.weights, groups=self.groups
-                )
-
-                return exir_ops.edge.quantized_decomposed.dequantize_per_tensor.default(
-                    exir_ops.edge.quantized_decomposed.quantize_per_tensor.default(
-                        x,
-                        0.12345,
-                        0,
-                        -127,
-                        127,
-                        torch.int8,
-                    ),
-                    0.12345,
-                    0,
-                    -127,
-                    127,
-                    torch.int8,
-                )
-
-            def get_inputs(self):
-                return (
-                    torch.randint(
-                        low=-127, high=127, size=(3, self.input_channels, 4, 4)
-                    ).type(dtype=torch.int8),
-                )
-
-        for groups in (1, 2):
-            for axis in (0, 1):
-                self._test(
-                    TransposeConv2dDQDf32weights(3 * groups, 5 * groups, groups, axis),
-                    quant_config=None,
-                    conv_count=1,
-                )
+                else:
+                    with self.assertRaises(RuntimeError):
+                        self._test(
+                            PerChannelConvTranspose2d(
+                                3 * groups, 5 * groups, groups, ch_axis
+                            ),  # ch_axis=0
+                            quant_config=None,
+                            conv_count=1,
+                        )
 
     def test_padded_output_tconv(self):
         class TConv2d(torch.nn.Module):
@@ -761,7 +695,7 @@ def forward(self, x):
 
         (tester.export().check_count({op: conv_count}).to_edge_transform_and_lower())
 
-        # tconv should not be offloaded to XNNPack, since output padding is not
+        # tconv should not be offloaded to XNNPack, since output padding is not supported
         (
             tester.check(
                 ["executorch_exir_dialects_edge__ops_aten_convolution_default"]