Add depthwise conv checks for dynamic quant

keyprocedure · keyprocedure · commit eaba81962d63 · 2025-04-20T10:01:43.000-07:00
diff --git a/backends/xnnpack/partition/config/gemm_configs.py b/backends/xnnpack/partition/config/gemm_configs.py
@@ -9,6 +9,7 @@
 from typing import cast, List, Optional, Tuple
 
 import torch
+from executorch.backends.transforms import get_shape
 from executorch.backends.xnnpack.operators.quant_params import QuantParams
 from executorch.backends.xnnpack.partition.config.xnnpack_config import (
     ConfigPrecisionType,
@@ -358,18 +359,35 @@ def check_constraints(self, node: torch.fx.Node, ep: ExportedProgram) -> bool:
             why(node, "Only support 1D + 2D Conv")
             return False  # Only support 1D + 2D Conv
 
-        precision = self._detect_precision(node)
-        if precision == ConfigPrecisionType.DYNAMIC_QUANT and len(conv_stride) != 2:
-            why(node, "Only support 2D Conv for dynamic quantization")
-            return False
-
         kernel_node = get_input_node(node, 1)
+        kernel_shape = get_shape(kernel_node)
         weight_quant_params = QuantParams.from_weights(kernel_node, ep)
-
-        is_transpose = node.args[6]
         groups = cast(int, node.args[8])
+        is_transpose = node.args[6]
+
+        if is_transpose:
+            group_input_channels = int(kernel_shape[0] / groups)
+            group_output_channels = kernel_shape[1]
+        else:
+            group_input_channels = kernel_shape[1]
+            group_output_channels = int(kernel_shape[0] / groups)
+
+        is_depthwise = (
+            group_input_channels == 1
+            and group_output_channels % group_input_channels == 0
+        )
+
+        # XNNPACK does not support dynamic quantization convs that are not 2D or are depthwise
+        if self._detect_precision(node) == ConfigPrecisionType.DYNAMIC_QUANT and (
+            len(conv_stride) != 2 or is_depthwise
+        ):
+            why(
+                node,
+                "XNNPACK only supports standard 2D convolutions for dynamic quantization",
+            )
+            return False
 
-        # XNNPack does not support non-zero output padding in transposed
+        # XNNPACK does not support non-zero output padding in transposed
         # convolutions.
         if is_transpose and any(
             out_pad != 0 for out_pad in cast(List[int], node.args[7])
diff --git a/backends/xnnpack/quantizer/xnnpack_quantizer_utils.py b/backends/xnnpack/quantizer/xnnpack_quantizer_utils.py
@@ -323,7 +323,7 @@ def _do_annotate_conv(
         assert isinstance(weight, Node)
         input_qspec_map[weight] = get_weight_qspec(quantization_config)
 
-        # Only annotate dynamically quantized conv if it's 2D
+        # Only annotate dynamically quantized conv if it's 2D and not depthwise
         if (
             quantization_config
             and quantization_config.input_activation
@@ -336,6 +336,22 @@ def _do_annotate_conv(
             if weight_shape is not None and len(weight_shape) != 4:
                 continue
 
+            # Default to 1 since groups is not available in the node
+            groups = 1
+            if is_conv_transpose:
+                group_input_channels = int(weight_shape[0] / groups)
+                group_output_channels = weight_shape[1]
+            else:
+                group_input_channels = weight_shape[1]
+                group_output_channels = int(weight_shape[0] / groups)
+
+            # Skip if depthwise
+            if (
+                group_input_channels == 1
+                and group_output_channels % group_input_channels == 0
+            ):
+                continue
+
         # adding weight node to the partition as well
         partition = [conv_node, conv_node.args[1]]