Apply suggestions

AkiSakurai · AkiSakurai · commit 26cd168deb26 · 2025-01-12T17:13:14.000+08:00
diff --git a/backends/xnnpack/_passes/fuse_activation_pass.py b/backends/xnnpack/_passes/fuse_activation_pass.py
@@ -68,6 +68,8 @@ def call(self, graph_module: torch.fx.GraphModule):
                         preceding_op.op == "call_function"
                         and preceding_op.target in self.FUSEABLE_OPS
                     ):
+                        # Check that current activation is the only user of the preceding op
+                        # so that we can fuse the activation into the preceding op
                         if len(preceding_op.users) > 1:
                             continue
                         # Delete activation, and embed metadata into preceding op
diff --git a/backends/xnnpack/_passes/fuse_batch_norm_with_conv.py b/backends/xnnpack/_passes/fuse_batch_norm_with_conv.py
@@ -140,6 +140,8 @@ def can_fuse(
         Determine whether a batch norm node can be fused with a preceding conv node.
         """
 
+        # Only fuse transposed convolutions if the kernel size matches the stride,
+        # Weights are not distributed equally across the spatial dimensions otherwise
         is_transpose = conv.args[6]
         kernel_node = get_input_node(conv, 1)
         kernel_shape = get_shape(kernel_node)
diff --git a/backends/xnnpack/_passes/tag_implicit_q_dq_pass.py b/backends/xnnpack/_passes/tag_implicit_q_dq_pass.py
@@ -83,11 +83,16 @@ def is_dynamically_quantized(self, node: torch.fx.Node) -> bool:
         return is_dynamic_qdq(node)
 
     def is_supported_quant_op(self, node: torch.fx.Node) -> bool:
-        return (
-            node.op == "call_function"
-            and cast(torch._ops.OpOverload, node.target).name()
-            in SUPPORTED_IMPLICIT_Q_DQ_OP_NAMES_SET
-        )
+        if node.op != "call_function":
+            return False
+
+        op_name = cast(torch._ops.OpOverload, node.target).name()
+
+        # Weight and Input should both be quantized
+        if op_name == exir_ops.edge.aten.convolution.default.name():
+            return is_dequant(node.args[1])
+
+        return op_name in SUPPORTED_IMPLICIT_Q_DQ_OP_NAMES_SET
 
     def is_supported_quant_module(self, node: torch.fx.Node) -> bool:
         is_supported = (
diff --git a/backends/xnnpack/operators/node_visitor.py b/backends/xnnpack/operators/node_visitor.py
@@ -343,10 +343,9 @@ def define_tensor(  # noqa: C901
         xnn_graph: XNNGraph,
         vals_to_ids: Dict[torch.fx.Node, int],
         convert_to_nhwc: bool = False,
-        swap_nc_for_depthwise_weights: bool = False,
+        swap_in_out_for_weights: bool = False,
         quant_params: Optional[QuantParams] = None,
         fp32_static_weights: bool = False,
-        swap_in_out_for_transpose_weights: bool = False,
         groups: int = 1,
     ) -> None:
         """
@@ -359,19 +358,21 @@ def define_tensor(  # noqa: C901
                         their corresponding ids in XNNGraph
             convert_to_nhwc: bool to indicate whether tensor shape should be permuted to
                         reflect the nhwc memory format.
-            swap_nc_for_depthwise_weights: bool to indicate whether tensor shape
-                        should be permuted such that the N and C dimensions are
-                        swapped, which should be used for depthwise convolution
+            swap_in_out_for_weights: bool to indicate whether tensor shape should be
+                        permuted and reshape from (inc, oc/groups, height, width) to (oc, inc/groups, height, width)
+                        , which should be used for depthwise/transpose convolution
                         weights. This is only valid for tensors which hold
                         constant data. If used along with convert_to_nhwc, this
                         swap will happen before converting to nhwc.
             quant_params: Quantization meta data for this tensor, None if it is not quantized
             fp32_static_weights: XNN_FLAG_FP32_STATIC_WEIGHTS for fp16 conv
-            swap_in_out_for_transpose_weights: bool to indicate whether tensor shape should be
-                permuted and reshape from (inc, oc/groups, height, width) to  (oc, inc/groups, height, width)
-            groups: number of groups for swap_in_out_for_transpose_weights
+            groups: number of groups for swap_in_out_for_weights
         """
 
+        assert (
+            swap_in_out_for_weights or groups == 1
+        ), "groups is option for swap_in_out_for_weights"
+
         if tensor in vals_to_ids:
             return
 
@@ -399,18 +400,15 @@ def define_tensor(  # noqa: C901
             xnn_graph,
             vals_to_ids,
             convert_to_nhwc,
-            swap_nc_for_depthwise_weights,
+            swap_in_out_for_weights,
             quant_params,
             fp32_static_weights,
-            swap_in_out_for_transpose_weights,
             groups,
         )
 
         # convert tensor shape must reflect memory format, default is contiguous, so
         # only permute shape if we are converting the tensor to nhwc format
-        if swap_nc_for_depthwise_weights:
-            dims = [dims[1], dims[0]] + dims[2:]
-        if swap_in_out_for_transpose_weights:
+        if swap_in_out_for_weights:
             dims = [dims[1] * groups, dims[0] // groups] + dims[2:]
         if convert_to_nhwc:
             check_or_raise(len(dims) == 4, "Converting to nhwc requires 4d tensor")
@@ -431,24 +429,16 @@ def define_tensor(  # noqa: C901
         )
 
         # Override the quant params axis since we have
-        # updated the weights for depthwise, with that the out_channels dim
+        # updated the weights for depthwise/ transposed_conv2d, with that the out_channels dim
         # will be dims[3] instead of dims[0]. Let's update the per_channel
         # quant axis to match the new weight tensor before serializing
-        if swap_nc_for_depthwise_weights and (
-            quant_params and quant_params.per_channel
-        ):
-            if quant_params.axis == 0:
-                quant_params.axis = len(dims) - 1
-            else:
-                assert f"Unsupported weight per channel quantization axis for depthwise conv2d: {quant_params.axis}, expecting 0."
-
-        if swap_in_out_for_transpose_weights and (
-            quant_params and quant_params.per_channel
-        ):
+        if swap_in_out_for_weights and (quant_params and quant_params.per_channel):
             if quant_params.axis == 0:
                 quant_params.axis = len(dims) - 1
+            elif quant_params.axis == 1:
+                quant_params.axis = 0
             else:
-                assert f"Unsupported weight per channel quantization axis for conv_transpose2d: {quant_params.axis}, expecting 0."
+                assert f"Unsupported weight per channel quantization axis for depthwise conv2d / conv_transpose2d : {quant_params.axis}, expecting 0 / 1."
 
         # Serialize tensor value
         ser_val = (
@@ -509,10 +499,9 @@ def get_serialized_buffer_index(
         xnn_graph: XNNGraph,
         vals_to_ids: Dict[torch.fx.Node, int],
         convert_to_nhwc: bool,
-        swap_nc_for_depthwise_weights: bool,
+        swap_in_out_for_weights: bool,
         quant_params: Optional[QuantParams],
         fp32_static_weights: bool = False,
-        swap_in_out_for_transpose_weights: bool = False,
         groups: int = 1,
     ) -> int:
         """
@@ -526,24 +515,30 @@ def get_serialized_buffer_index(
                         their corresponding ids in XNNGraph
             convert_to_nhwc: bool to indicate whether tensor shape should be permuted to
                         reflect the nhwc memory format.
-            swap_nc_for_depthwise_weights: bool to indicate whether tensor shape
-                        should be permuted such that the N and C dimensions are
-                        swapped, which should be used for depthwise convolution
+            swap_in_out_for_weights: bool to indicate whether tensor shape should be
+                        permuted and reshape from (inc, oc/groups, height, width) to (oc, inc/groups, height, width)
+                        , which should be used for depthwise/transpose convolution
                         weights. This is only valid for tensors which hold
                         constant data. If used along with convert_to_nhwc, this
                         swap will happen before converting to nhwc.
             quant_params: Quantization meta data for this tensor, None if it is not quantize
             fp32_static_weights: bool to indicate whether tensor is fp32 static weights
+            groups: groups for swap_in_out_for_weights
 
         Returns:
             buffer_idx: idx of the serialized data. 0 If not associated constant
                         data
         """
+
+        assert (
+            swap_in_out_for_weights or groups == 1
+        ), "groups is option for swap_in_out_for_weights"
+
         # The get_attr node is the input to quant_params.
         get_attr_node = tensor if quant_params is None else quant_params.q_input
         if not is_param_node(self.exported_program, get_attr_node):
             check_or_raise(
-                not swap_nc_for_depthwise_weights,
+                not swap_in_out_for_weights,
                 "Swapping N and C dimensions is only valid for constant data tensors",
             )
             return 0
@@ -560,12 +555,9 @@ def get_serialized_buffer_index(
             # ensure that the const is fp32
             const_val = const_val.to(dtype=torch.float32).contiguous()
 
-        if swap_nc_for_depthwise_weights:
-            const_val = const_val.permute(
-                dims=((1, 0) + tuple(range(2, const_val.dim())))
-            ).contiguous()
-
-        if swap_in_out_for_transpose_weights:
+        if swap_in_out_for_weights:
+            # Permute and reshape the tensor from (inc, oc/groups, height, width) to (oc, inc/groups, height, width)
+            # which should be used for depthwise/transpose convolution weights for XNNPACK
             shape = const_val.shape
             const_val = const_val.reshape(
                 (groups, const_val.shape[0] // groups) + const_val.shape[1:]
diff --git a/backends/xnnpack/operators/op_conv2d.py b/backends/xnnpack/operators/op_conv2d.py
@@ -84,16 +84,26 @@ def define_node(
         )
         fp32_static_weights = kernel_node.meta["val"].dtype == torch.float16
 
+        if weight_quant_params is not None and weight_quant_params.per_channel:
+            if is_transpose:
+                check_or_raise(
+                    weight_quant_params.axis == 1 and groups == 1,
+                    "XNNPACK currently only supports per output channel quantization with groups == 1 for transpose convolutions",
+                )
+            elif is_depthwise_conv:
+                check_or_raise(
+                    weight_quant_params.axis == 0,
+                    "XNNPACK currently only supports per input channel quantization for depthwise convolutions",
+                )
         self.define_tensor(
             kernel_node,
             xnn_graph,
             vals_to_ids,
             convert_to_nhwc=True,
-            swap_nc_for_depthwise_weights=is_depthwise_conv,
+            swap_in_out_for_weights=is_depthwise_conv or is_transpose,
             quant_params=weight_quant_params,
             fp32_static_weights=fp32_static_weights,
-            swap_in_out_for_transpose_weights=is_transpose,
-            groups=groups,
+            groups=groups if is_transpose else 1,
         )
         kwargs["filter_id"] = vals_to_ids[get_input_node(node, 1)]
 
diff --git a/backends/xnnpack/partition/config/gemm_configs.py b/backends/xnnpack/partition/config/gemm_configs.py
@@ -318,7 +318,7 @@ def __init__(self, **kwargs):
 
     def check_constraints(self, node: torch.fx.Node, ep: ExportedProgram) -> bool:
         """
-        Currently we have no support for convolution 3d and transposed convolution
+        Currently we have no support for convolution 3d
         """
         if not super().check_constraints(node, ep):
             return False
@@ -333,11 +333,12 @@ def check_constraints(self, node: torch.fx.Node, ep: ExportedProgram) -> bool:
 
         is_transpose = node.args[6]
         groups = cast(int, node.args[8])
+
         if (
             is_transpose
             and weight_quant_params is not None
             and weight_quant_params.per_channel
-            and groups > 1
+            and (groups > 1 or weight_quant_params.axis != 1)
         ):
             why(
                 node,
diff --git a/backends/xnnpack/partition/configs.py b/backends/xnnpack/partition/configs.py
@@ -131,11 +131,8 @@
     torch.nn.functional.conv1d,
     torch.ao.nn.quantized.reference.modules.conv.Conv1d,
     torch.nn.Conv2d,
-    torch.nn.ConvTranspose2d,
     torch.nn.functional.conv2d,
-    torch.nn.functional.conv_transpose2d,
     torch.ao.nn.quantized.reference.modules.conv.Conv2d,
-    torch.ao.nn.quantized.reference.modules.conv.ConvTranspose2d,
     torch.nn.BatchNorm1d,
     torch.nn.BatchNorm2d,
 ]
diff --git a/backends/xnnpack/test/ops/test_conv2d.py b/backends/xnnpack/test/ops/test_conv2d.py
diff --git a/backends/xnnpack/test/ops/test_conv_transpose2d.py b/backends/xnnpack/test/ops/test_conv_transpose2d.py
diff --git a/backends/xnnpack/test/ops/test_linear.py b/backends/xnnpack/test/ops/test_linear.py