Permute before quant

keyprocedure · keyprocedure · commit 8fcb1170dcdd · 2025-04-11T17:25:31.000-07:00
diff --git a/backends/xnnpack/_passes/channels_last_tagged_reshape_pass.py b/backends/xnnpack/_passes/channels_last_tagged_reshape_pass.py
@@ -282,16 +282,38 @@ def input_to_nhwc(
                 ChannelsLastTaggedReshapePass.PARTNER_NODE
             ]
         else:
-            # Need to create NHWC node
-            with graph_module.graph.inserting_after(input_node):
+            # trace back to permute
+            origin = input_node
+            while hasattr(origin, "args") and isinstance(origin.args, tuple) and len(origin.args) > 0:
+                origin = origin.args[0]
+
+            # at x choose_qparams and quantize insert permute
+            with graph_module.graph.inserting_after(origin):
                 input_node_nhwc = self.create_call_function_node(
                     graph_module=graph_module,
                     target=exir_ops.edge.aten._to_copy.default,
-                    args=(input_node,),
+                    args=(origin,),
                     memory_format=torch.channels_last,
                 )
+
+            for user in list(origin.users):
+                if user != input_node_nhwc:
+                    user.replace_input_with(origin, input_node_nhwc)
+
+            graph_module.recompile()
             self.mark_as_nhwc_node(input_node_nhwc)
 
+        # TODO: uncomment, use case when permute not needed
+        #     # Need to create NHWC node                     ----------------------------- CONVERSION HAPPENING ----->>
+        #     with graph_module.graph.inserting_after(input_node):
+        #         input_node_nhwc = self.create_call_function_node(
+        #             graph_module=graph_module,
+        #             target=exir_ops.edge.aten._to_copy.default,
+        #             args=(input_node,),
+        #             memory_format=torch.channels_last,
+        #         )
+        #     self.mark_as_nhwc_node(input_node_nhwc)
+
         self.insert_copy_and_assign_partner_nodes_quantization_sensitive(
             graph_module=graph_module,
             original_input=input_node,
diff --git a/backends/xnnpack/quantizer/xnnpack_quantizer.py b/backends/xnnpack/quantizer/xnnpack_quantizer.py
@@ -71,8 +71,10 @@ def _supported_symmetric_quantized_operators() -> dict[str, list[OperatorPattern
         "conv2d": [
             [torch.nn.Conv2d, torch.nn.ReLU],
             [torch.nn.Conv2d, F.relu],
+            [torch.nn.Conv2d],
             [F.conv2d, torch.nn.ReLU],
             [F.conv2d, F.relu],
+            [F.conv2d],
         ],
         "linear": [[torch.nn.Linear], [F.linear]],
         "add": [[torch.add]],
diff --git a/backends/xnnpack/quantizer/xnnpack_quantizer_utils.py b/backends/xnnpack/quantizer/xnnpack_quantizer_utils.py
@@ -305,16 +305,6 @@ def _do_annotate_conv(
         if not is_conv_node(n):
             continue
 
-        # TODO: Check for dynamically quantized convs and check if nn.Conv2d is always lowered
-        # Only dynamically quantize 2D convolutions
-        # Handle both nn.Conv2d and aten.conv2d.default
-        if n.op == "call_module":
-            mod = gm.get_submodule(n.target)
-            if not hasattr(mod, "padding") or len(mod.padding) != 2:
-                continue
-        elif n.op == "call_function" and n.target != torch.ops.aten.conv2d.default:
-            continue
-
         conv_node = n
 
         # This is hacky!
diff --git a/backends/xnnpack/runtime/XNNCompiler.cpp b/backends/xnnpack/runtime/XNNCompiler.cpp
@@ -1172,7 +1172,7 @@ Error defineStaticTransposeNode(
   ET_CHECK_OR_RETURN_ERROR(
       status == xnn_status_success,
       Internal,
-      "Failed to create sigmoid node %i with code: %s",
+      "Failed to create static transpose node %i with code: %s",
       node->debug_handle(),
       xnn_status_to_string(status));
 
diff --git a/backends/xnnpack/test/ops/test_conv2d.py b/backends/xnnpack/test/ops/test_conv2d.py
@@ -240,10 +240,6 @@ def _test_dq_conv2d(
         quant_config = get_symmetric_quantization_config(
             is_per_channel=True,
             is_dynamic=True,
-            act_qmin=-128,
-            act_qmax=127,
-            weight_qmin=-128,
-            weight_qmax=127,
         )
 
         DynamicallyQuantizedPartitioner = XnnpackPartitioner(
@@ -254,35 +250,26 @@ def _test_dq_conv2d(
         tester = Tester(m, inputs, dynamic_shapes=dynamic_shapes)
         tester = tester.quantize(Quantize(quantization_config=quant_config))
 
-        # Print after quantization
         tester.stages["quantize"] = tester.stages[tester.cur]
-        print("\n----------Annotated Graph:")
-        print(tester.stages["quantize"].graph_module.code)
 
         exported = tester.export()
 
-        # Print after exporting
         tester.stages["export"] = exported.stages[exported.cur]
-        print("\n----------Exported Graph:")
-        print(tester.stages["export"].graph_module.code)
 
-        # Check for choose_qparams
         tester.check(["torch.ops.quantized_decomposed.choose_qparams"])
 
         tester.to_edge_transform_and_lower(
             ToEdgeTransformAndLower([DynamicallyQuantizedPartitioner])
         )
 
-        # Print after lower and partition
-        print("\n----------Lowered Graph:")
-        print(tester.stages[tester.cur].graph_module.code)
-
-        tester.check(["executorch_exir_dialects_edge__ops_aten_convolution_default"])
         tester.check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
         tester.check_not(["executorch_exir_dialects_edge__ops_aten_conv2d_default"])
 
         tester.to_executorch()
-        tester.serialize()
+
+        #tester.serialize()
+        tester.serialize().dump_artifact("conv2d.pte")
+
         tester.run_method_and_compare_outputs(atol=atol)
 
     def test_fp16_conv2d(self) -> None:
@@ -766,15 +753,15 @@ def test_dq_conv2d(self) -> None:
         class SimpleConv2d(torch.nn.Module):
             def __init__(self):
                 super().__init__()
-                self.conv = torch.nn.Conv2d(1, 2, 3)
+                self.conv = torch.nn.Conv2d(3, 10, 3, )
                 self.conv.weight.requires_grad = False
                 self.conv.bias.requires_grad = False
 
             def forward(self, x):
                 return self.conv(x)
 
             def get_inputs(self):
-                return (torch.randn(1, 1, 8, 8),)
+                return (torch.randn(1, 3, 8, 8),)
 
         model = SimpleConv2d()
         self._test_dq_conv2d(