delegated copy op

shewu-quic · shewu-quic · commit 3f188ffd555f · 2024-10-25T15:23:51.000+08:00
diff --git a/backends/qualcomm/builders/__init__.py b/backends/qualcomm/builders/__init__.py
@@ -14,6 +14,7 @@
     op_ceil,
     op_clamp,
     op_conv2d,
+    op_copy,
     op_depth_to_space,
     op_dequantize,
     op_div,
@@ -70,6 +71,7 @@
     op_ceil,
     op_clamp,
     op_conv2d,
+    op_copy,
     op_depth_to_space,
     op_dequantize,
     op_div,
diff --git a/backends/qualcomm/builders/op_copy.py b/backends/qualcomm/builders/op_copy.py
@@ -0,0 +1,91 @@
+# Copyright (c) Qualcomm Innovation Center, Inc.
+# All rights reserved
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+from typing import Dict
+
+import executorch.backends.qualcomm.python.PyQnnWrapperAdaptor as PyQnnWrapper
+
+import torch
+from executorch.backends.qualcomm.utils.constants import (
+    QCOM_QUANT_ATTRS,
+    QCOM_SCALE,
+    QCOM_ZERO_POINT,
+)
+from executorch.exir.dialects._ops import ops as exir_ops
+
+from .node_visitor import NodeVisitor, register_node_visitor
+from .qnn_constants import OpElementWiseAdd, QNN_OP_PACKAGE_NAME_QTI_AISW
+
+
+@register_node_visitor
+class Copy(NodeVisitor):
+    target = ["aten.copy.default"]
+
+    def __init__(self, *args) -> None:
+        super().__init__(*args)
+
+    def define_node(
+        self,
+        node: torch.fx.Node,
+        nodes_to_wrappers: Dict[torch.fx.Node, PyQnnWrapper.TensorWrapper],
+    ) -> PyQnnWrapper.PyQnnOpWrapper:
+        input_node = node.args[0]
+        input_tensor = self.get_tensor(input_node, node)
+        copy_inp_tensor_wrapper = self.define_tensor(
+            input_node,
+            input_tensor,
+            PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_NATIVE,
+            nodes_to_wrappers,
+            is_input_tensor=True,
+        )
+        # 'graph', 'name', 'op', 'target', 'args', and 'kwargs'
+        zero_input_node = torch.fx.Node(
+            node.graph,
+            node.name + "_runtime_scalar",
+            "call_function",
+            exir_ops.edge.aten.scalar_tensor.default,
+            (),  # args
+            {},  # kwargs
+        )
+        zero_input_tensor = torch.tensor(0, dtype=input_tensor.dtype)
+        if quant_attrs := input_node.meta.get(QCOM_QUANT_ATTRS):
+            quant_attrs = quant_attrs.copy()
+            quant_attrs[QCOM_ZERO_POINT] = 0
+            quant_attrs[QCOM_SCALE] = 1
+            zero_input_node.meta[QCOM_QUANT_ATTRS] = quant_attrs
+
+        
+        zero_tensor_wrapper = self.define_tensor(
+                zero_input_node,
+                zero_input_tensor,
+                PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_STATIC,
+                nodes_to_wrappers,
+                is_input_tensor=True,
+            )
+        copy_input_tensors = [copy_inp_tensor_wrapper, zero_tensor_wrapper]
+
+        if quant_attrs := input_node.meta.get(QCOM_QUANT_ATTRS):
+            quant_attrs = quant_attrs.copy()
+            # Because there is no output after convert_pt2e, the QCOM_QUANT_ATTRS of node is none
+            node.meta[QCOM_QUANT_ATTRS] = quant_attrs
+        output_tensor = self.get_tensor(node, node)
+        output_tensor_wrapper = self.define_tensor(
+            node,
+            output_tensor,
+            PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_NATIVE,
+            nodes_to_wrappers,
+            is_input_tensor=False,
+        )
+        copy_output_tensors = [output_tensor_wrapper]
+
+        copy_op = PyQnnWrapper.PyQnnOpWrapper(
+            node.name,
+            QNN_OP_PACKAGE_NAME_QTI_AISW,
+            OpElementWiseAdd.op_name,
+        )
+        copy_op.AddInputTensors(copy_input_tensors)
+        copy_op.AddOutputTensors(copy_output_tensors)
+
+        return copy_op
diff --git a/backends/qualcomm/partition/common_defs.py b/backends/qualcomm/partition/common_defs.py
@@ -13,7 +13,6 @@
     exir_ops.edge.aten.clone.default,
     exir_ops.edge.aten.full.default,
     exir_ops.edge.aten.slice_scatter.default,
-    exir_ops.edge.aten.copy.default,
     exir_ops.edge.quantized_decomposed.embedding_4bit.dtype,
 ]
 
diff --git a/backends/qualcomm/quantizer/custom_annotation.py b/backends/qualcomm/quantizer/custom_annotation.py
@@ -161,7 +161,7 @@ def get_custom_quant_ios_dtype(
 
     # Tag index put node before copy node, because copy is a skipped node in qnn
     if (
-        exir_ops.edge.aten.index_put.default == node.target
+        exir_ops.edge.aten.copy.default == node.target
         and node.meta["val"].shape == cache_shape
     ):
         return kv_dtype

Original file line number	Diff line number	Diff line change
`@@ -13,7 +13,6 @@`
`13`	`13`	`exir_ops.edge.aten.clone.default,`
`14`	`14`	`exir_ops.edge.aten.full.default,`
`15`	`15`	`exir_ops.edge.aten.slice_scatter.default,`
`16`		`- exir_ops.edge.aten.copy.default,`
`17`	`16`	`exir_ops.edge.quantized_decomposed.embedding_4bit.dtype,`
`18`	`17`	`]`
`19`	`18`