WIP: add initial support for dq 2D conv

keyprocedure · keyprocedure · commit 0b5b0e80f933 · 2025-04-07T20:46:33.000-07:00
diff --git a/backends/xnnpack/partition/config/gemm_configs.py b/backends/xnnpack/partition/config/gemm_configs.py
@@ -358,6 +358,11 @@ def check_constraints(self, node: torch.fx.Node, ep: ExportedProgram) -> bool:
             why(node, "Only support 1D + 2D Conv")
             return False  # Only support 1D + 2D Conv
 
+        precision = self._detect_precision(node)
+        if precision == ConfigPrecisionType.DYNAMIC_QUANT and len(conv_stride) != 2:
+            why(node, "Only support 2D Conv for dynamic quantization")
+            return False
+
         kernel_node = get_input_node(node, 1)
         weight_quant_params = QuantParams.from_weights(kernel_node, ep)
 
@@ -394,6 +399,7 @@ def supported_precision_types(self):
         return [
             ConfigPrecisionType.FP32,
             ConfigPrecisionType.STATIC_QUANT,
+            ConfigPrecisionType.DYNAMIC_QUANT,
         ]
 
 
diff --git a/backends/xnnpack/quantizer/xnnpack_quantizer.py b/backends/xnnpack/quantizer/xnnpack_quantizer.py
@@ -265,6 +265,7 @@ class XNNPACKQuantizer(Quantizer):
 
     DYNAMIC_OPS = [
         "linear",
+        "conv",
     ]
 
     def __init__(self) -> None:
diff --git a/backends/xnnpack/quantizer/xnnpack_quantizer_utils.py b/backends/xnnpack/quantizer/xnnpack_quantizer_utils.py
@@ -304,6 +304,17 @@ def _do_annotate_conv(
     for n in gm.graph.nodes:
         if not is_conv_node(n):
             continue
+
+        # TODO: Check for dynamically quantized convs and check if nn.Conv2d is always lowered
+        # Only dynamically quantize 2D convolutions
+        # Handle both nn.Conv2d and aten.conv2d.default
+        if n.op == "call_module":
+            mod = gm.get_submodule(n.target)
+            if not hasattr(mod, "padding") or len(mod.padding) != 2:
+                continue
+        elif n.op == "call_function" and n.target != torch.ops.aten.conv2d.default:
+            continue
+
         conv_node = n
 
         # This is hacky!
diff --git a/backends/xnnpack/test/ops/test_conv2d.py b/backends/xnnpack/test/ops/test_conv2d.py
@@ -18,6 +18,10 @@
 except:
     has_quantized_ops = False
 
+from executorch.backends.xnnpack.partition.config.xnnpack_config import (
+    ConfigPrecisionType,
+)
+from executorch.backends.xnnpack.partition.xnnpack_partitioner import XnnpackPartitioner
 from executorch.backends.xnnpack.quantizer.xnnpack_quantizer import (
     get_symmetric_quantization_config,
 )
@@ -26,7 +30,10 @@
 )
 from executorch.backends.xnnpack.test.test_xnnpack_utils import randomize_bn
 from executorch.backends.xnnpack.test.tester import Quantize, Tester
-
+from executorch.backends.xnnpack.test.tester.tester import (
+    Partition,
+    ToEdgeTransformAndLower,
+)
 from executorch.exir.dialects._ops import ops as exir_ops
 
 
@@ -223,6 +230,61 @@ def _test(
                     .run_method_and_compare_outputs(qtol=1)
                 )
 
+    def _test_dq_conv2d(
+        self,
+        m: torch.nn.Module,
+        inputs,
+        dynamic_shapes,
+        atol=5e-02,
+    ):
+        quant_config = get_symmetric_quantization_config(
+            is_per_channel=True,
+            is_dynamic=True,
+            act_qmin=-128,
+            act_qmax=127,
+            weight_qmin=-128,
+            weight_qmax=127,
+        )
+
+        DynamicallyQuantizedPartitioner = XnnpackPartitioner(
+            config_precisions=ConfigPrecisionType.DYNAMIC_QUANT,
+            per_op_mode=False,
+        )
+
+        tester = Tester(m, inputs, dynamic_shapes=dynamic_shapes)
+        tester = tester.quantize(Quantize(quantization_config=quant_config))
+
+        # Print after quantization
+        tester.stages["quantize"] = tester.stages[tester.cur]
+        print("\n----------Annotated Graph:")
+        print(tester.stages["quantize"].graph_module.code)
+
+        exported = tester.export()
+
+        # Print after exporting
+        tester.stages["export"] = exported.stages[exported.cur]
+        print("\n----------Exported Graph:")
+        print(tester.stages["export"].graph_module.code)
+
+        # Check for choose_qparams
+        tester.check(["torch.ops.quantized_decomposed.choose_qparams"])
+
+        tester.to_edge_transform_and_lower(
+            ToEdgeTransformAndLower([DynamicallyQuantizedPartitioner])
+        )
+
+        # Print after lower and partition
+        print("\n----------Lowered Graph:")
+        print(tester.stages[tester.cur].graph_module.code)
+
+        tester.check(["executorch_exir_dialects_edge__ops_aten_convolution_default"])
+        tester.check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
+        tester.check_not(["executorch_exir_dialects_edge__ops_aten_conv2d_default"])
+
+        tester.to_executorch()
+        tester.serialize()
+        tester.run_method_and_compare_outputs(atol=atol)
+
     def test_fp16_conv2d(self) -> None:
         for transpose in (True, False):
             for has_bias in (True, False):
@@ -699,3 +761,25 @@ def forward(self, x):
             .serialize()
             .run_method_and_compare_outputs(qtol=1)
         )
+
+    def test_dq_conv2d(self) -> None:
+        class SimpleConv2d(torch.nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.conv = torch.nn.Conv2d(1, 2, 3)
+                self.conv.weight.requires_grad = False
+                self.conv.bias.requires_grad = False
+
+            def forward(self, x):
+                return self.conv(x)
+
+            def get_inputs(self):
+                return (torch.randn(1, 1, 8, 8),)
+
+        model = SimpleConv2d()
+        self._test_dq_conv2d(
+            model,
+            model.get_inputs(),
+            dynamic_shapes=None,
+            atol=5e-2,
+        )

Original file line number	Diff line number	Diff line change
`@@ -265,6 +265,7 @@ class XNNPACKQuantizer(Quantizer):`
`265`	`265`
`266`	`266`	`DYNAMIC_OPS = [`
`267`	`267`	`"linear",`
	`268`	`+ "conv",`
`268`	`269`	`]`
`269`	`270`
`270`	`271`	`def __init__(self) -> None:`