Arm backend: Grouped conv per-channel quant support (#12671)

AdrianLundell · Martin Lindström · web-flow · commit d9e99cb20fdd · 2025-07-21T16:06:11.000+02:00
Adds support for per-channel quantization of grouped convolution.

Signed-off-by: Oscar Andersson &lt;oscar.andersson@arm.com&gt;
Co-authored-by: Martin Lindström &lt;Martin.Lindstroem@arm.com&gt;
diff --git a/backends/arm/_passes/decompose_grouped_conv.py b/backends/arm/_passes/decompose_grouped_conv.py
@@ -6,6 +6,7 @@
 from copy import copy
 
 import torch
+from executorch.backends.arm.tosa_quant_utils import QuantArgs
 from executorch.exir.dialects._ops import ops as exir_ops
 from executorch.exir.pass_base import ExportPass
 
@@ -48,7 +49,40 @@ def _get_decomposition(op):
                     torch.ops.aten.cat.default,
                 )
             case _:
-                raise RuntimeError("Unvalid op for grouped conv decomposition.")
+                raise RuntimeError("Invalid op for grouped conv decomposition")
+
+    @staticmethod
+    def _split_per_channel_qparams(qarg, index, output_slice_size):
+        if qarg is not None and qarg.per_channel:
+            start_index = index * output_slice_size
+            stop_index = (index + 1) * output_slice_size
+            return QuantArgs(
+                scale=qarg.scale[start_index:stop_index],
+                zp=qarg.zp[start_index:stop_index],
+                qmin=qarg.qmin,
+                qmax=qarg.qmax,
+                dtype=qarg.dtype,
+                axis=qarg.axis,
+                per_channel=qarg.per_channel,
+            )
+        return qarg
+
+    @staticmethod
+    def _get_meta_copy(meta, i, output_slice_size):
+        meta_copy = meta.copy()
+        if "input_qparams" in meta.data and len(meta.data["input_qparams"]) > 0:
+            # Handle per-channel quantization by splitting quantization params
+            # similarly to how activations/weights/biases are split.
+            new_qparams = meta.data.get("input_qparams").copy()
+            # Get quantization params of the weights and slice them.
+            qarg = new_qparams[1]
+            new_qparams[1] = DecomposeGroupedConv._split_per_channel_qparams(
+                qarg, index=i, output_slice_size=output_slice_size
+            )
+
+            meta_copy.data["input_qparams"] = new_qparams
+
+        return meta_copy
 
     def call_operator(self, op, args, kwargs, meta):
         if op == exir_ops.edge.aten.convolution.default:
@@ -105,7 +139,6 @@ def call_operator(self, op, args, kwargs, meta):
             if bias_node is None:
                 bias_slices.append(None)
             else:
-
                 start_index = i * output_slice_size
                 stop_index = (i + 1) * output_slice_size
                 slice_args = (bias_node, 0, start_index, stop_index)
@@ -115,20 +148,23 @@ def call_operator(self, op, args, kwargs, meta):
                 )
 
         output_slices = []
-        for input_slice, filter_slice, bias_slice in zip(
-            input_slices, filter_slices, bias_slices
+        for i, (input_slice, filter_slice, bias_slice) in enumerate(
+            zip(input_slices, filter_slices, bias_slices)
         ):
 
+            meta_copy = DecomposeGroupedConv._get_meta_copy(meta, i, output_slice_size)
+
             if op == exir_ops.edge.aten.convolution.default:
                 conv_args = (input_slice, filter_slice, bias_slice, *args[3:8], 1)
             elif op == torch.ops.aten.conv2d.default:
                 conv_args = (input_slice, filter_slice, bias_slice, *args[3:6], 1)
             else:
-                raise RuntimeError("Unvalid op for grouped conv decomposition.")
+                raise RuntimeError("Invalid op for grouped conv decomposition")
 
             output_slices.append(
-                super().call_operator(conv_op, conv_args, kwargs, meta)
+                super().call_operator(conv_op, conv_args, kwargs, meta_copy)
             )
 
         cat_args = (output_slices, 1)
-        return super().call_operator(cat_op, cat_args, kwargs, no_q_dq_meta)
+        # propagate original metadata (including quantization params) to the concatenated output
+        return super().call_operator(cat_op, cat_args, kwargs, meta)
diff --git a/backends/arm/_passes/fold_qdq_with_annotated_qparams_pass.py b/backends/arm/_passes/fold_qdq_with_annotated_qparams_pass.py
@@ -75,7 +75,7 @@ class FoldAndAnnotateQParamsPass(ArmPass):
      node.
      The quantization parameters from the DQ/Q nodes are stored as meta values to be
      accessible for later lowering and serialization passes.
-     The assumption is that the quantization annotatation adds DQ nodes for all tensor
+     The assumption is that the quantization annotation adds DQ nodes for all tensor
      inputs to the target one Q node to the output.
 
      Example ('executorch_exir_dialects_edge__ops_' prefix removed from operators for readability):
@@ -95,7 +95,7 @@ class FoldAndAnnotateQParamsPass(ArmPass):
 
         output_dq: "f32[5]" = quantized_decomposed_dequantize_per_tensor_default(aten_add_tensor_q, 0.05487706884741783, -128, -128, 127, torch.int8)
 
-    The quantization parameters for x_dq and aten_add_tensor_q are store in meta for the aten_add_tensor node.
+    The quantization parameters for x_dq and aten_add_tensor_q are stored in meta for the aten_add_tensor node.
 
     """
 
@@ -132,7 +132,7 @@ def fold_and_annotate_arg(
                 nodes_to_remove.add(arg)
             if input_qparams is not None and input_qparams != arg_quant_params:
                 # Two args are quantized differently
-                raise RuntimeError("Input qparams does not match!")
+                raise RuntimeError("Input qparams do not match")
             input_qparams = arg_quant_params
         if input_qparams is not None:
             node.meta["input_qparams"][i] = input_qparams
diff --git a/backends/arm/test/ops/test_conv2d.py b/backends/arm/test/ops/test_conv2d.py
@@ -385,8 +385,6 @@ def forward(self, x):
     f"{k},per_channel_quant={q}": (lambda v=v, q=q: (v(), q))
     for (k, v) in test_data_MI.items()
     for q in [True, False]
-    # TODO: Invalid TOSA graph (MLETORCH-1144)
-    if (k not in ["groups", "groups_bias"]) and (q is True)
 }
 
 fvp_xfails = {

Original file line number	Diff line number	Diff line change
`@@ -385,8 +385,6 @@ def forward(self, x):`
`385`	`385`	`f"{k},per_channel_quant={q}": (lambda v=v, q=q: (v(), q))`
`386`	`386`	`for (k, v) in test_data_MI.items()`
`387`	`387`	`for q in [True, False]`
`388`		`- # TODO: Invalid TOSA graph (MLETORCH-1144)`
`389`		`- if (k not in ["groups", "groups_bias"]) and (q is True)`
`390`	`388`	`}`
`391`	`389`
`392`	`390`	`fvp_xfails = {`