[Quantizer][XNNPACK] Fix ReLU fusion when conv/linear has > 1 user (pytorch#140846)

mcr229 · facebook-github-bot · commit 4884bc5936fb · 2024-11-18T11:10:46.000-08:00
Summary: X-link: pytorch/executorch#6894 Bug in quantizer when Conv + ReLU is fused even when the preceeding conv has more than one user. Conv and ReLU can not be fused in this case because the result of Conv must be used elsewhere. XNNPACK Delegate naturally handles this by inserting a clamp node for ReLU. Test Plan: CI Reviewed By: digantdesai Differential Revision: D65989599
diff --git a/torch/ao/quantization/quantizer/xnnpack_quantizer_utils.py b/torch/ao/quantization/quantizer/xnnpack_quantizer_utils.py
@@ -247,6 +247,10 @@ def _annotate_linear_relu(
             continue
 
         linear_node = maybe_linear_node
+        if len(linear_node.users) > 1:
+            # if linear node has multiple users, then it can't be fused with relu
+            continue
+
         input_qspec_map = {}
         input_act = linear_node.args[0]
         assert isinstance(input_act, Node)
@@ -351,6 +355,11 @@ def _do_annotate_conv_relu(
             continue
         conv_node = maybe_conv_node
 
+        if len(conv_node.users) > 1:
+            # relu shouldn't be fuseable to conv if there are other users
+            # of convolution
+            continue
+
         input_qspec_map = {}
         input_act = conv_node.args[0]
         assert isinstance(input_act, Node)
@@ -738,6 +747,12 @@ def _annotate_add_relu(
             continue
 
         add_node = maybe_add
+
+        if len(add_node.users) > 1:
+            # add can't be fused with ReLU if the result of add is being used
+            # else where in the graph
+            continue
+
         partition = [relu_node, add_node]
 
         if _is_annotated(partition):
@@ -860,6 +875,11 @@ def _annotate_mul_relu(
             continue
 
         mul_node = maybe_mul
+        if len(mul_node.users) > 1:
+            # mul can't be fused with ReLU if the result of mul is being used
+            # else where in the graph
+            continue
+
         partition = [relu_node, mul_node]
 
         if _is_annotated(partition):
@@ -1003,6 +1023,7 @@ def _annotate_cat(
 
 def _is_share_obs_or_fq_op(op: Callable) -> bool:
     return op in [
+        torch.ops.aten.relu.default,
         torch.ops.aten.hardtanh.default,
         torch.ops.aten.hardtanh_.default,
         torch.ops.aten.max_pool2d.default,