Update remove clone to drop no-op q/dq (#10920)

JakeStevens · facebook-github-bot · commit 50b366d0cb94 · 2025-07-25T11:48:48.000-07:00
Summary:

After removing clone, we may be left with no-op quantize operations.

This diff updates the pass in backend/transforms to remove these, if they exist

Differential Revision: D74832417
diff --git a/backends/transforms/remove_clone_ops.py b/backends/transforms/remove_clone_ops.py
@@ -9,23 +9,40 @@
 import torch
 from executorch.exir.dialects._ops import ops as exir_ops
 from executorch.exir.pass_base import ExportPass, PassResult
+from executorch.exir.passes import dead_code_elimination_pass
+from executorch.exir.passes.remove_noop_pass import _DEQUANT_OPS, eliminate_dq_q
 
 
-def remove_clone_ops(graph: torch.fx.Graph) -> torch.fx.Graph:
+class RemoveCloneOpsTransform(ExportPass):
     """
-    Remove clone op nodes and replace uses with parent node.
+    Trim the 'identity' operators to reduce the unnecessary copy overhead.
     """
-    clone_op = exir_ops.edge.aten.clone.default
-    for node in graph.nodes:
-        if node.op == "call_function" and node.target == clone_op:
-            with graph.inserting_after(node):
-                node.replace_all_uses_with(node.args[0])
 
-    graph.eliminate_dead_code()
-    return graph
+    clone_ops = {
+        exir_ops.edge.aten.clone.default,
+    }
 
+    def __init__(self):
+        super().__init__()
 
-class RemoveCloneOpsTransform(ExportPass):
-    def call(self, graph_module: torch.fx.GraphModule) -> PassResult:
-        graph_module.graph = remove_clone_ops(graph_module.graph)
+    def _remove(self, graph_module: torch.fx.GraphModule) -> torch.fx.GraphModule:
+        dequant_nodes = []
+
+        for n in graph_module.graph.nodes:
+            if n.target not in self.clone_ops:
+                continue
+
+            to_be_remove = n
+            for user_n in list(n.users.keys()):
+                user_n.replace_input_with(n, n.args[0])
+            if n.args[0].target in _DEQUANT_OPS:
+                dequant_nodes += [n.args[0]]
+            graph_module.graph.erase_node(to_be_remove)
+
+        eliminate_dq_q(graph_module, dequant_nodes)
+
+    def call(self, graph_module: torch.fx.GraphModule):
+        self._remove(graph_module)
+        graph_module.recompile()
+        dead_code_elimination_pass(graph_module)
         return PassResult(graph_module, True)
diff --git a/backends/transforms/targets.bzl b/backends/transforms/targets.bzl
@@ -109,6 +109,7 @@ def define_common_targets():
         srcs = ["remove_clone_ops.py"],
         visibility = [
             "//executorch/backends/...",
+            "@EXECUTORCH_CLIENTS",
         ],
         deps = [
             "//caffe2:torch",
@@ -242,3 +243,15 @@ def define_common_targets():
             ":rank_0_to_rank_1",
         ],
     )
+
+    runtime.python_test(
+        name = "test_remove_clone_ops",
+        srcs = [
+            "test/test_remove_clone_ops.py",
+        ],
+        deps = [
+            "//caffe2:torch",
+            "//executorch/exir:lib",
+            ":remove_clone_ops",
+        ],
+    )
diff --git a/backends/transforms/test/test_remove_clone_ops.py b/backends/transforms/test/test_remove_clone_ops.py
@@ -0,0 +1,130 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import unittest
+
+import torch
+from executorch.backends.transforms.remove_clone_ops import RemoveCloneOpsTransform
+from executorch.exir import EdgeCompileConfig, to_edge
+from executorch.exir.dialects._ops import ops as exir_ops
+from torch.export import export
+from torch.fx import GraphModule
+from torch.testing import FileCheck
+from torch.testing._internal.common_utils import TestCase
+
+
+class TestRemoveCloneOpsTransform(TestCase):
+    def test_dq_clone_q_linear(self):
+        """
+        Test RemoveCloneOpsTransform on a graph with d/q -> clone -> q -> linear pattern
+
+        Before: Should contain all nodes
+        After: Should only have the linear operation
+        """
+
+        # Create a graph module directly with the pattern: quant -> clone -> dequant -> fp linear
+        class TestModule(torch.nn.Module):
+            def __init__(self):
+                super().__init__()
+                self.linear = torch.nn.Linear(10, 5)
+
+            def forward(self, x):
+                # This will be replaced with our custom graph
+                return self.linear(x)
+
+        # Create a module instance
+        module = TestModule()
+
+        # Create a new graph with our desired pattern
+        graph = torch.fx.Graph()
+
+        # Add placeholders
+        input_node = graph.placeholder("x")
+
+        # Create nodes for our pattern: quant -> clone -> dequant -> fp linear
+        # Constants for quantization parameters
+        scale = graph.create_node(
+            "call_function", torch.tensor, args=([0.1],), kwargs={}
+        )
+        zero_point = graph.create_node(
+            "call_function", torch.tensor, args=([0],), kwargs={}
+        )
+
+        # Dequantize node
+        dequant_node = graph.create_node(
+            "call_function",
+            torch.ops.quantized_decomposed.dequantize_per_tensor.default,
+            args=(input_node, scale, zero_point, torch.int8),
+            kwargs={},
+        )
+
+        # Clone node.
+        # Use Edge op as this is an executorch pass
+        clone_node = graph.create_node(
+            "call_function",
+            exir_ops.edge.aten.clone.default,
+            args=(dequant_node,),
+            kwargs={},
+        )
+
+        # Quantize node
+        quant_node = graph.create_node(
+            "call_function",
+            torch.ops.quantized_decomposed.quantize_per_tensor.default,
+            args=(clone_node, scale, zero_point, torch.int8),
+            kwargs={},
+        )
+
+        # Linear node (using the module's linear layer)
+        # Technically, should use quantized weight and bias
+        # but we are just inspecting graph patterns in this test
+        weight = graph.create_node("get_attr", "linear.weight")
+        bias = graph.create_node("get_attr", "linear.bias")
+        linear_node = graph.create_node(
+            "call_function",
+            torch.nn.functional.linear,
+            args=(quant_node, weight, bias),
+            kwargs={},
+        )
+
+        # Output
+        graph.output(linear_node)
+
+        # Create a GraphModule with our custom graph
+        gm = GraphModule(module, graph)
+
+        # Verify we have the expected nodes before transformation using FileCheck
+        FileCheck().check(
+            "torch.ops.quantized_decomposed.dequantize_per_tensor.default",
+        ).check(
+            "executorch_exir_dialects_edge__ops_aten_clone_default",
+        ).check(
+            "torch.ops.quantized_decomposed.quantize_per_tensor.default",
+        ).check(
+            "torch._C._nn.linear",
+        ).run(
+            gm.code
+        )
+
+        # Apply the transform
+        transformed_gm = RemoveCloneOpsTransform()(gm).graph_module
+
+        # Verify the dq -> clone -> q pattern is removed and linear op is still present using FileCheck
+        FileCheck().check_not(
+            "executorch_exir_dialects_edge__ops_aten_clone_default"
+        ).check_not("quantized_decomposed.dequantize_per_tensor.default").check_not(
+            "quantized_decomposed.quantize_per_tensor.default"
+        ).check_count(
+            "torch._C._nn.linear",
+            1,
+            exactly=True,
+        ).run(
+            transformed_gm.code
+        )
+
+
+if __name__ == "__main__":
+    unittest.main()