Skip to content

Commit 0b5b0e8

Browse files
committed
WIP: add initial support for dq 2D conv
1 parent b7eee0c commit 0b5b0e8

File tree

4 files changed

+103
-1
lines changed

4 files changed

+103
-1
lines changed

backends/xnnpack/partition/config/gemm_configs.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -358,6 +358,11 @@ def check_constraints(self, node: torch.fx.Node, ep: ExportedProgram) -> bool:
358358
why(node, "Only support 1D + 2D Conv")
359359
return False # Only support 1D + 2D Conv
360360

361+
precision = self._detect_precision(node)
362+
if precision == ConfigPrecisionType.DYNAMIC_QUANT and len(conv_stride) != 2:
363+
why(node, "Only support 2D Conv for dynamic quantization")
364+
return False
365+
361366
kernel_node = get_input_node(node, 1)
362367
weight_quant_params = QuantParams.from_weights(kernel_node, ep)
363368

@@ -394,6 +399,7 @@ def supported_precision_types(self):
394399
return [
395400
ConfigPrecisionType.FP32,
396401
ConfigPrecisionType.STATIC_QUANT,
402+
ConfigPrecisionType.DYNAMIC_QUANT,
397403
]
398404

399405

backends/xnnpack/quantizer/xnnpack_quantizer.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,7 @@ class XNNPACKQuantizer(Quantizer):
265265

266266
DYNAMIC_OPS = [
267267
"linear",
268+
"conv",
268269
]
269270

270271
def __init__(self) -> None:

backends/xnnpack/quantizer/xnnpack_quantizer_utils.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -304,6 +304,17 @@ def _do_annotate_conv(
304304
for n in gm.graph.nodes:
305305
if not is_conv_node(n):
306306
continue
307+
308+
# TODO: Check for dynamically quantized convs and check if nn.Conv2d is always lowered
309+
# Only dynamically quantize 2D convolutions
310+
# Handle both nn.Conv2d and aten.conv2d.default
311+
if n.op == "call_module":
312+
mod = gm.get_submodule(n.target)
313+
if not hasattr(mod, "padding") or len(mod.padding) != 2:
314+
continue
315+
elif n.op == "call_function" and n.target != torch.ops.aten.conv2d.default:
316+
continue
317+
307318
conv_node = n
308319

309320
# This is hacky!

backends/xnnpack/test/ops/test_conv2d.py

Lines changed: 85 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,10 @@
1818
except:
1919
has_quantized_ops = False
2020

21+
from executorch.backends.xnnpack.partition.config.xnnpack_config import (
22+
ConfigPrecisionType,
23+
)
24+
from executorch.backends.xnnpack.partition.xnnpack_partitioner import XnnpackPartitioner
2125
from executorch.backends.xnnpack.quantizer.xnnpack_quantizer import (
2226
get_symmetric_quantization_config,
2327
)
@@ -26,7 +30,10 @@
2630
)
2731
from executorch.backends.xnnpack.test.test_xnnpack_utils import randomize_bn
2832
from executorch.backends.xnnpack.test.tester import Quantize, Tester
29-
33+
from executorch.backends.xnnpack.test.tester.tester import (
34+
Partition,
35+
ToEdgeTransformAndLower,
36+
)
3037
from executorch.exir.dialects._ops import ops as exir_ops
3138

3239

@@ -223,6 +230,61 @@ def _test(
223230
.run_method_and_compare_outputs(qtol=1)
224231
)
225232

233+
def _test_dq_conv2d(
234+
self,
235+
m: torch.nn.Module,
236+
inputs,
237+
dynamic_shapes,
238+
atol=5e-02,
239+
):
240+
quant_config = get_symmetric_quantization_config(
241+
is_per_channel=True,
242+
is_dynamic=True,
243+
act_qmin=-128,
244+
act_qmax=127,
245+
weight_qmin=-128,
246+
weight_qmax=127,
247+
)
248+
249+
DynamicallyQuantizedPartitioner = XnnpackPartitioner(
250+
config_precisions=ConfigPrecisionType.DYNAMIC_QUANT,
251+
per_op_mode=False,
252+
)
253+
254+
tester = Tester(m, inputs, dynamic_shapes=dynamic_shapes)
255+
tester = tester.quantize(Quantize(quantization_config=quant_config))
256+
257+
# Print after quantization
258+
tester.stages["quantize"] = tester.stages[tester.cur]
259+
print("\n----------Annotated Graph:")
260+
print(tester.stages["quantize"].graph_module.code)
261+
262+
exported = tester.export()
263+
264+
# Print after exporting
265+
tester.stages["export"] = exported.stages[exported.cur]
266+
print("\n----------Exported Graph:")
267+
print(tester.stages["export"].graph_module.code)
268+
269+
# Check for choose_qparams
270+
tester.check(["torch.ops.quantized_decomposed.choose_qparams"])
271+
272+
tester.to_edge_transform_and_lower(
273+
ToEdgeTransformAndLower([DynamicallyQuantizedPartitioner])
274+
)
275+
276+
# Print after lower and partition
277+
print("\n----------Lowered Graph:")
278+
print(tester.stages[tester.cur].graph_module.code)
279+
280+
tester.check(["executorch_exir_dialects_edge__ops_aten_convolution_default"])
281+
tester.check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
282+
tester.check_not(["executorch_exir_dialects_edge__ops_aten_conv2d_default"])
283+
284+
tester.to_executorch()
285+
tester.serialize()
286+
tester.run_method_and_compare_outputs(atol=atol)
287+
226288
def test_fp16_conv2d(self) -> None:
227289
for transpose in (True, False):
228290
for has_bias in (True, False):
@@ -699,3 +761,25 @@ def forward(self, x):
699761
.serialize()
700762
.run_method_and_compare_outputs(qtol=1)
701763
)
764+
765+
def test_dq_conv2d(self) -> None:
766+
class SimpleConv2d(torch.nn.Module):
767+
def __init__(self):
768+
super().__init__()
769+
self.conv = torch.nn.Conv2d(1, 2, 3)
770+
self.conv.weight.requires_grad = False
771+
self.conv.bias.requires_grad = False
772+
773+
def forward(self, x):
774+
return self.conv(x)
775+
776+
def get_inputs(self):
777+
return (torch.randn(1, 1, 8, 8),)
778+
779+
model = SimpleConv2d()
780+
self._test_dq_conv2d(
781+
model,
782+
model.get_inputs(),
783+
dynamic_shapes=None,
784+
atol=5e-2,
785+
)

0 commit comments

Comments
 (0)