Update remove clone to drop no-op q/dq (#10920)

JakeStevens · facebook-github-bot · commit ba8a4741182e · 2025-07-03T06:59:28.000-07:00
Summary: Pull Request resolved: #10920 After removing clone, we may be left with no-op quantize operations. This diff updates the pass in backend/transforms to remove these, if they exist Differential Revision: D74832417
diff --git a/backends/transforms/remove_clone_ops.py b/backends/transforms/remove_clone_ops.py
@@ -9,23 +9,42 @@
 import torch
 from executorch.exir.dialects._ops import ops as exir_ops
 from executorch.exir.pass_base import ExportPass, PassResult
+from executorch.exir.passes import dead_code_elimination_pass
+from executorch.exir.passes.remove_noop_pass import _DEQUANT_OPS, eliminate_dq_q
 
 
-def remove_clone_ops(graph: torch.fx.Graph) -> torch.fx.Graph:
+class RemoveCloneOpsTransform(ExportPass):
     """
-    Remove clone op nodes and replace uses with parent node.
+    Trim the 'identity' operators to reduce the unnecessary copy overhead.
     """
-    clone_op = exir_ops.edge.aten.clone.default
-    for node in graph.nodes:
-        if node.op == "call_function" and node.target == clone_op:
-            with graph.inserting_after(node):
-                node.replace_all_uses_with(node.args[0])
 
-    graph.eliminate_dead_code()
-    return graph
+    clone_ops = {
+        torch.clone,
+        torch.ops.aten.clone.default,
+        exir_ops.edge.aten.clone.default,
+    }
 
+    def __init__(self):
+        super().__init__()
 
-class RemoveCloneOpsTransform(ExportPass):
-    def call(self, graph_module: torch.fx.GraphModule) -> PassResult:
-        graph_module.graph = remove_clone_ops(graph_module.graph)
+    def _remove(self, graph_module: torch.fx.GraphModule) -> torch.fx.GraphModule:
+        dequant_nodes = []
+
+        for n in graph_module.graph.nodes:
+            if n.target not in self.clone_ops:
+                continue
+
+            to_be_remove = n
+            for user_n in list(n.users.keys()):
+                user_n.replace_input_with(n, n.args[0])
+            if n.args[0].target in _DEQUANT_OPS:
+                dequant_nodes += [n.args[0]]
+            graph_module.graph.erase_node(to_be_remove)
+
+        eliminate_dq_q(graph_module, dequant_nodes)
+
+    def call(self, graph_module: torch.fx.GraphModule):
+        self._remove(graph_module)
+        graph_module.recompile()
+        dead_code_elimination_pass(graph_module)
         return PassResult(graph_module, True)
diff --git a/backends/transforms/targets.bzl b/backends/transforms/targets.bzl
@@ -109,6 +109,7 @@ def define_common_targets():
         srcs = ["remove_clone_ops.py"],
         visibility = [
             "//executorch/backends/...",
+            "@EXECUTORCH_CLIENTS",
         ],
         deps = [
             "//caffe2:torch",
@@ -242,3 +243,15 @@ def define_common_targets():
             ":rank_0_to_rank_1",
         ],
     )
+
+    runtime.python_test(
+        name = "test_remove_clone_ops",
+        srcs = [
+            "test/test_remove_clone_ops.py",
+        ],
+        deps = [
+            "//caffe2:torch",
+            "//executorch/exir:lib",
+            ":remove_clone_ops",
+        ],
+    )
diff --git a/backends/transforms/test/test_remove_clone_ops.py b/backends/transforms/test/test_remove_clone_ops.py
@@ -0,0 +1,151 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import unittest
+
+import torch
+from executorch.backends.transforms.remove_clone_ops import RemoveCloneOpsTransform
+from torch.fx import GraphModule
+from torch.testing._internal.common_utils import TestCase
+
+
+class TestRemoveCloneOpsTransform(TestCase):
+    def test_dq_clone_q_linear(self):
+        """
+        Test RemoveCloneOpsTransform on a graph with d/q -> clone -> q -> linear pattern
+
+        Before: Should contain all nodes
+        After: Should only have the linear operation
+        """
+
+        # Create a graph module directly with the pattern: quant -> clone -> dequant -> fp linear
+        class TestModule(torch.nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.linear = torch.nn.Linear(10, 5)
+
+            def forward(self, x):
+                # This will be replaced with our custom graph
+                return self.linear(x)
+
+        # Create a module instance
+        module = TestModule()
+
+        # Create a new graph with our desired pattern
+        graph = torch.fx.Graph()
+
+        # Add placeholders
+        input_node = graph.placeholder("x")
+
+        # Create nodes for our pattern: quant -> clone -> dequant -> fp linear
+        # Constants for quantization parameters
+        scale = graph.create_node(
+            "call_function", torch.tensor, args=([0.1],), kwargs={}
+        )
+        zero_point = graph.create_node(
+            "call_function", torch.tensor, args=([0],), kwargs={}
+        )
+
+        # Dequantize node
+        dequant_node = graph.create_node(
+            "call_function",
+            torch.ops.quantized_decomposed.dequantize_per_tensor.default,
+            args=(input_node, scale, zero_point, torch.int8),
+            kwargs={},
+        )
+
+        # Clone node
+        clone_node = graph.create_node(
+            "call_function",
+            torch.ops.aten.clone.default,
+            args=(dequant_node,),
+            kwargs={},
+        )
+
+        # Quantize node
+        quant_node = graph.create_node(
+            "call_function",
+            torch.ops.quantized_decomposed.quantize_per_tensor.default,
+            args=(clone_node, scale, zero_point, torch.int8),
+            kwargs={},
+        )
+
+        # Linear node (using the module's linear layer)
+        # Technically, should use quantized weight and bias
+        # but we are just inspecting graph patterns in this test
+        weight = graph.create_node("get_attr", "linear.weight")
+        bias = graph.create_node("get_attr", "linear.bias")
+        linear_node = graph.create_node(
+            "call_function",
+            torch.nn.functional.linear,
+            args=(quant_node, weight, bias),
+            kwargs={},
+        )
+
+        # Output
+        graph.output(linear_node)
+
+        # Create a GraphModule with our custom graph
+        gm = GraphModule(module, graph)
+
+        # Print the graph before transformation
+        print("Before transformation:")
+        print(gm.graph)
+
+        # Check node counts before transformation
+        node_counts_before = {}
+        for node in gm.graph.nodes:
+            if node.op == "call_function":
+                target_name = str(node.target)
+                if target_name not in node_counts_before:
+                    node_counts_before[target_name] = 0
+                node_counts_before[target_name] += 1
+
+        # Verify we have the expected nodes before transformation
+        self.assertIn(str(torch.ops.aten.clone.default), node_counts_before)
+        self.assertIn(
+            str(torch.ops.quantized_decomposed.quantize_per_tensor.default),
+            node_counts_before,
+        )
+        self.assertIn(
+            str(torch.ops.quantized_decomposed.dequantize_per_tensor.default),
+            node_counts_before,
+        )
+        self.assertIn(str(torch.nn.functional.linear), node_counts_before)
+
+        # Apply the transform
+        transformed_gm = RemoveCloneOpsTransform()(gm).graph_module
+
+        # Print the graph after transformation
+        print("After transformation:")
+        print(transformed_gm.graph)
+
+        # Check node counts after transformation
+        node_counts_after = {}
+        for node in transformed_gm.graph.nodes:
+            if node.op == "call_function":
+                target_name = str(node.target)
+                if target_name not in node_counts_after:
+                    node_counts_after[target_name] = 0
+                node_counts_after[target_name] += 1
+
+        # Verify the dq -> clone -> q pattern is removed
+        self.assertNotIn(str(torch.ops.aten.clone.default), node_counts_after)
+        self.assertNotIn(
+            str(torch.ops.quantized_decomposed.dequantize_per_tensor.default),
+            node_counts_after,
+        )
+        self.assertNotIn(
+            str(torch.ops.quantized_decomposed.quantize_per_tensor.default),
+            node_counts_after,
+        )
+
+        # Verify the linear op is still present
+        self.assertIn(str(torch.nn.functional.linear), node_counts_after)
+
+
+if __name__ == "__main__":
+    unittest.main()