Update on "[ExecuTorch] XNNPACK: prefer qc over qb when gs == k for non-int4"

digantdesai · digantdesai · commit b53e5b27b4aa · 2025-09-11T14:32:43.000-07:00
* Prefer chanelwise over groupwise when possible for perf and for int8 which doesn't have groupwise support * Fix bug / improve behavior for affine q/dq with gs == k for per_channel * refactor is_per_channel_group state variable * add QuantParams.__str__() TODO - improve affine quant primitives - T237476295 Differential Revision: [D82060758](https://our.internmc.facebook.com/intern/diff/D82060758/) **NOTE FOR REVIEWERS**: This PR has internal Meta-specific changes or comments, please review them on [Phabricator](https://our.internmc.facebook.com/intern/diff/D82060758/)! [ghstack-poisoned]
diff --git a/backends/xnnpack/operators/quant_params.py b/backends/xnnpack/operators/quant_params.py
@@ -96,20 +96,15 @@ def __init__(
             assert (
                 self.per_channel is True
             ), "Only per channel quantization supports groupwise quantization"
-            assert (
-                self.axis == 0,
-                "Only axis 0 is supported for per channel groupwise quant",
-            )
             assert (
                 cast(torch.Tensor, scale).ndim == 2
             ), "Scale must be 2D for per channel groupwise quant"
             # Assumed scale shape - [out_channels, in_channels/group_size]
             input_channels = cast(torch.Tensor, scale).shape[1] * self.group_size
             # 2d weight tensor shape - [out_channels, in_channels]
             assert (
-                tensor.shape[1] == input_channels,
-                "Invalid input channels for groupwise quant",
-            )
+                tensor.shape[1] == input_channels
+            ), "Invalid input channels for groupwise quant"
             # Prefer per_channel over per_channel_group when group_size == input_channels for non int4 cases only
             # int4 case need more fixes to map qb4w to qc4w. Incorrect scales being passed down to xnnpack.
             self.per_channel_group = (