Skip to content

Commit 7f236c3

Browse files
committed
Qualcomm AI Engine Direct - Delegated mutable buffer
summary: - Support copy op with QNN Reshape - Consume mutable buffer in QNN Delegate - Set the same memory address for I/O of mutable buffer at runtime
1 parent 39e5b91 commit 7f236c3

File tree

12 files changed

+202
-24
lines changed

12 files changed

+202
-24
lines changed

backends/qualcomm/_passes/layout_transform.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ class LayoutTransform(ExportPass):
6464
exir_ops.edge.aten.prelu.default,
6565
exir_ops.edge.aten.relu.default,
6666
exir_ops.edge.aten._softmax.default, # TODO: Need to find a new solution to do "axis_order" to transform axis.
67+
exir_ops.edge.aten.sigmoid.default,
6768
exir_ops.edge.aten.sqrt.default,
6869
exir_ops.edge.aten.sub.Tensor,
6970
exir_ops.edge.aten.sum.dim_IntList,

backends/qualcomm/builders/__init__.py

100644100755
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
op_ceil,
1515
op_clamp,
1616
op_conv2d,
17+
op_copy,
1718
op_depth_to_space,
1819
op_dequantize,
1920
op_div,
@@ -71,6 +72,7 @@
7172
op_ceil,
7273
op_clamp,
7374
op_conv2d,
75+
op_copy,
7476
op_depth_to_space,
7577
op_dequantize,
7678
op_div,

backends/qualcomm/builders/node_visitor.py

Lines changed: 36 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,8 @@
3636
get_parameter,
3737
is_graph_input,
3838
is_graph_output,
39+
is_mutable_buffer_input,
40+
is_mutable_buffer_output,
3941
is_parameter,
4042
)
4143

@@ -214,7 +216,9 @@ def get_tensor_type(
214216
node: torch.fx.Node,
215217
tensor_type: PyQnnWrapper.Qnn_TensorType_t,
216218
) -> PyQnnWrapper.Qnn_TensorType_t:
217-
is_input = is_graph_input(node, self.edge_program)
219+
is_input = is_graph_input(node, self.edge_program) or is_mutable_buffer_input(
220+
node, self.edge_program
221+
)
218222
is_output = is_graph_output(node)
219223
# handle logic for input/output tensors
220224
if is_input or is_output:
@@ -247,6 +251,33 @@ def get_data_type(
247251

248252
return QNN_TENSOR_TYPE_MAP[tensor.dtype]
249253

254+
def get_tensor_name(
255+
self,
256+
node: torch.fx.Node,
257+
wrapper_idx: int = 0,
258+
):
259+
tensor_name = f"{node.name}_{wrapper_idx}"
260+
# The `input_{id}` is utilized for sorting at runtime. Due to multiple passes in qnn_preprocess,
261+
# the input order between QNN and the original graph’s forward function may differ.
262+
# The `mutbuf_{id}` is utilized for mapping I/O of mutable buffer at runtime.
263+
# The `output_` is identified as the graph’s output at runtime to prevent confusion with per_tensor_dump.
264+
if is_mutable_buffer_input(node, self.edge_program):
265+
fqn = self.edge_program.graph_signature.inputs_to_buffers[node.target]
266+
position_index = list(
267+
self.edge_program.graph_signature.buffers_to_mutate.values()
268+
).index(fqn)
269+
tensor_name = f"input_{str(self.external_ids[node])}_mutbuf_{str(position_index)}_{tensor_name}"
270+
elif is_graph_input(node, self.edge_program):
271+
tensor_name = f"input_{str(self.external_ids[node])}_{tensor_name}"
272+
elif is_mutable_buffer_output(node, self.edge_program):
273+
position_index = list(
274+
self.edge_program.graph_signature.buffers_to_mutate.keys()
275+
).index(node.name)
276+
tensor_name = f"output_mutbuf_{position_index}_{tensor_name}"
277+
elif is_graph_output(node):
278+
tensor_name = f"output_{tensor_name}"
279+
return tensor_name
280+
250281
def define_custom_tensor_wrapper(
251282
self,
252283
node_name: str,
@@ -307,11 +338,7 @@ def define_tensor(
307338
if cached := nodes_to_wrappers[node_name].get(wrapper_idx, None):
308339
return cached
309340

310-
tensor_name = f"{node.name}_{wrapper_idx}"
311-
if is_graph_input(node, self.edge_program):
312-
tensor_name = "input_" + str(self.external_ids[node]) + "_" + tensor_name
313-
if is_graph_output(node):
314-
tensor_name = "output_" + tensor_name
341+
tensor_name = self.get_tensor_name(node, wrapper_idx)
315342
dims = [1] if len(tensor.size()) == 0 else tensor.size()
316343
tensor_type = self.get_tensor_type(node, tensor_type)
317344
quant_encoding, quant_configs = self.get_quant_encoding_conf(
@@ -383,7 +410,9 @@ def generate_node_to_external_map(
383410
# The order in which we visit the placeholder node is same as the *args
384411
# order for the forward(*args) signature for this gm. Using the order of
385412
# the nodes as external_id to extract the right arg from *args at runtime
386-
if is_graph_input(node, edge_program):
413+
if is_graph_input(node, edge_program) or is_mutable_buffer_input(
414+
node, edge_program
415+
):
387416
node_to_external_map[node] = len(node_to_external_map)
388417
for node in edge_program.graph_module.graph.nodes:
389418
if is_graph_output(node):
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
# Copyright (c) Qualcomm Innovation Center, Inc.
2+
# All rights reserved
3+
#
4+
# This source code is licensed under the BSD-style license found in the
5+
# LICENSE file in the root directory of this source tree.
6+
from typing import Dict
7+
8+
import executorch.backends.qualcomm.python.PyQnnWrapperAdaptor as PyQnnWrapper
9+
10+
import torch
11+
from executorch.backends.qualcomm.utils.constants import QCOM_QUANT_ATTRS
12+
13+
from .node_visitor import NodeVisitor, register_node_visitor
14+
from .qnn_constants import OpReshape, QNN_OP_PACKAGE_NAME_QTI_AISW
15+
16+
17+
@register_node_visitor
18+
class Copy(NodeVisitor):
19+
target = ["aten.copy.default"]
20+
21+
def __init__(self, *args) -> None:
22+
super().__init__(*args)
23+
24+
def define_node(
25+
self,
26+
node: torch.fx.Node,
27+
nodes_to_wrappers: Dict[torch.fx.Node, PyQnnWrapper.TensorWrapper],
28+
) -> PyQnnWrapper.PyQnnOpWrapper:
29+
input_node = node.args[1]
30+
input_tensor = self.get_tensor(input_node, node)
31+
copy_inp_tensor_wrapper = self.define_tensor(
32+
input_node,
33+
input_tensor,
34+
PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_NATIVE,
35+
nodes_to_wrappers,
36+
is_input_tensor=True,
37+
)
38+
39+
copy_input_tensors = [copy_inp_tensor_wrapper]
40+
41+
if quant_attrs := input_node.meta.get(QCOM_QUANT_ATTRS):
42+
quant_attrs = quant_attrs.copy()
43+
# Because there is no output after convert_pt2e, the QCOM_QUANT_ATTRS of node is none
44+
node.meta[QCOM_QUANT_ATTRS] = quant_attrs
45+
output_tensor = self.get_tensor(node, node)
46+
output_tensor_wrapper = self.define_tensor(
47+
node,
48+
output_tensor,
49+
PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_NATIVE,
50+
nodes_to_wrappers,
51+
is_input_tensor=False,
52+
)
53+
copy_output_tensors = [output_tensor_wrapper]
54+
55+
copy_op = PyQnnWrapper.PyQnnOpWrapper(
56+
node.name,
57+
QNN_OP_PACKAGE_NAME_QTI_AISW,
58+
OpReshape.op_name,
59+
)
60+
copy_op.AddInputTensors(copy_input_tensors)
61+
copy_op.AddOutputTensors(copy_output_tensors)
62+
63+
return copy_op

backends/qualcomm/builders/utils.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,23 @@ def is_graph_input(
7575
return tensor.op == "placeholder" and not is_parameter(tensor, edge_program)
7676

7777

78+
def is_mutable_buffer_input(
79+
tensor: torch.fx.Node, edge_program: torch.export.ExportedProgram
80+
) -> bool:
81+
"""
82+
Check if the given tensor is a mutable buffer input
83+
84+
Args:
85+
tensor: EdgeIR Tensor that is being checked for mutable buffer input
86+
"""
87+
if tensor.op == "placeholder" and is_buffer(edge_program, tensor):
88+
fqn = edge_program.graph_signature.inputs_to_buffers[tensor.target]
89+
# if the buffer is mutated then record that
90+
if fqn in edge_program.graph_signature.buffers_to_mutate.values():
91+
return True
92+
return False
93+
94+
7895
def is_graph_output(tensor: torch.fx.Node) -> bool:
7996
"""
8097
Check if the given tensor is used as a graph output
@@ -91,6 +108,26 @@ def is_graph_output(tensor: torch.fx.Node) -> bool:
91108
return False
92109

93110

111+
def is_mutable_buffer_output(
112+
tensor: torch.fx.Node, edge_program: torch.export.ExportedProgram
113+
) -> bool:
114+
"""
115+
Check if the given tensor is a mutable buffer output
116+
117+
Args:
118+
tensor: EdgeIR Tensor that is being checked for mutable buffer output
119+
"""
120+
for user in tensor.users.keys():
121+
# getitem node is skiped, check the op_skip_ops.py
122+
if user.op == "output" or (
123+
user.target.__name__ == "getitem" and is_graph_output(user)
124+
):
125+
# if the buffer is mutated then record that
126+
if tensor.name in edge_program.graph_signature.buffers_to_mutate.keys():
127+
return True
128+
return False
129+
130+
94131
def is_constant(
95132
tensor: torch.fx.Node, edge_program: torch.export.ExportedProgram
96133
) -> bool:

backends/qualcomm/partition/common_defs.py

100644100755
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
exir_ops.edge.aten.clone.default,
1414
exir_ops.edge.aten.full.default,
1515
exir_ops.edge.aten.slice_scatter.default,
16-
exir_ops.edge.aten.copy.default,
16+
exir_ops.edge.quantized_decomposed.embedding_4bit.dtype,
1717
]
1818

1919
to_be_implemented_operator = [

backends/qualcomm/partition/qnn_partitioner.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
Partitioner,
2424
PartitionResult,
2525
)
26-
from executorch.exir.backend.utils import tag_constant_data
26+
from executorch.exir.backend.utils import tag_constant_data, tag_mutated_buffer
2727
from torch.fx.passes.infra.partitioner import Partition
2828
from torch.fx.passes.operator_support import OperatorSupportBase
2929

@@ -103,6 +103,7 @@ def __init__(
103103
compiler_specs: List[CompileSpec],
104104
skip_node_id_set: set = None,
105105
skip_node_op_set: set = None,
106+
skip_mutable_buffer: bool = True,
106107
):
107108
self.compiler_specs_snapshot = copy.deepcopy(compiler_specs)
108109

@@ -112,6 +113,7 @@ def __init__(
112113
self.partition_tags: Dict[str, DelegationSpec] = {}
113114
self.skip_node_id_set = set() if skip_node_id_set is None else skip_node_id_set
114115
self.skip_node_op_set = set() if skip_node_op_set is None else skip_node_op_set
116+
self.skip_mutable_buffer = skip_mutable_buffer
115117

116118
def generate_partitions(
117119
self, edge_program: torch.export.ExportedProgram
@@ -157,6 +159,8 @@ def partition(self, edge_program: torch.export.ExportedProgram) -> PartitionResu
157159
if len(partitions) != 0:
158160
self.tag_nodes(partitions, edge_program)
159161
tag_constant_data(edge_program)
162+
if not self.skip_mutable_buffer:
163+
tag_mutated_buffer(edge_program)
160164
for node in edge_program.graph_module.graph.nodes:
161165
if hasattr(node, "meta"):
162166
# pop certain keys in meta for not affecting the passes in compilation

backends/qualcomm/quantizer/custom_annotation.py

100644100755
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,7 @@ def get_custom_quant_ios_dtype(
161161

162162
# Tag index put node before copy node, because copy is a skipped node in qnn
163163
if (
164-
exir_ops.edge.aten.index_put.default == node.target
164+
exir_ops.edge.aten.copy.default == node.target
165165
and node.meta["val"].shape == cache_shape
166166
):
167167
return kv_dtype

backends/qualcomm/runtime/QnnExecuTorchBackend.cpp

Lines changed: 18 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -194,29 +194,34 @@ Error QnnExecuTorchBackend::execute(
194194
std::vector<Qnn_Tensor_t> input_tensor_structs;
195195
std::vector<Qnn_Tensor_t> output_tensor_structs;
196196

197+
int args_index = 0;
197198
input_tensor_structs.reserve(input_tensors.size());
198-
for (int i = 0; i < input_tensors.size(); ++i) {
199-
if (qnn_manager->RegisterMem(
200-
args[i]->toTensor().mutable_data_ptr(), input_tensors[i]) !=
201-
Error::Ok) {
202-
// update data ptr only should be fine
203-
input_tensors[i]->FillDataBuffer(
204-
args[i]->toTensor().const_data_ptr(), false /* copy_data */);
199+
for (const auto& input_tensor : input_tensors) {
200+
if (input_tensor->GetName().find("mutbuf_") == std::string::npos) {
201+
if (qnn_manager->RegisterMem(
202+
args[args_index]->toTensor().mutable_data_ptr(), input_tensor) !=
203+
Error::Ok) {
204+
// update data ptr only should be fine
205+
input_tensor->FillDataBuffer(
206+
args[args_index]->toTensor().const_data_ptr(),
207+
false /* copy_data */);
208+
}
209+
args_index++;
205210
}
206-
input_tensor_structs.push_back(input_tensors[i]->CloneTensorStruct());
211+
212+
input_tensor_structs.push_back(input_tensor->CloneTensorStruct());
207213
}
208214

209-
int output_index = input_tensors.size();
210215
for (const auto& output_tensor : output_tensors) {
211216
// pos=0 limits the search to the prefix
212-
if (output_tensor->GetName().rfind("output_", 0) == 0) {
213-
void* mutable_data_ptr =
214-
args[output_index]->toTensor().mutable_data_ptr();
217+
if (output_tensor->GetName().rfind("output_", 0) == 0 &&
218+
output_tensor->GetName().find("mutbuf_") == std::string::npos) {
219+
void* mutable_data_ptr = args[args_index]->toTensor().mutable_data_ptr();
215220
if (qnn_manager->RegisterMem(mutable_data_ptr, output_tensor) !=
216221
Error::Ok) {
217222
output_tensor->FillDataBuffer(mutable_data_ptr, false /* copy_data */);
218223
}
219-
output_index++;
224+
args_index++;
220225
}
221226
output_tensor_structs.push_back(output_tensor->CloneTensorStruct());
222227
}

backends/qualcomm/runtime/QnnManager.cpp

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include <cstring>
1717
#include <fstream>
1818
#include <string>
19+
#include <unordered_map>
1920

2021
namespace executorch {
2122
namespace backends {
@@ -33,6 +34,16 @@ bool CompareExportedInput(
3334
return numA < numB;
3435
}
3536

37+
int ExtractMutableBufferNumber(const std::string& name) {
38+
std::string prefix = "mutbuf_";
39+
size_t startPos = name.find(prefix);
40+
if (startPos != std::string::npos) {
41+
startPos += prefix.length();
42+
return std::stoi(name.substr(startPos));
43+
}
44+
return -1;
45+
}
46+
3647
QnnManager::~QnnManager() {
3748
backend_params_ptr_.reset(new BackendConfigParameters());
3849
logger_.reset();
@@ -324,9 +335,22 @@ Error QnnManager::AllocateTensor() {
324335
std::vector<Qnn_Tensor_t> output_tensors =
325336
backend_params_ptr_->qnn_context_ptr_->GetGraphOutputs();
326337

338+
// Mapping memory address for the input and output of mutable buffer
339+
std::unordered_map<int, const void*> mutable_buffer_id_to_memory_map;
340+
327341
for (auto& tensor : input_tensors) {
328342
std::shared_ptr<TensorWrapper> tensor_wrapper = CreateTensorWrapper(tensor);
329343
tensor_wrapper->UpdateQnnTensorMeta(tensor);
344+
345+
int mutable_buffer_id =
346+
ExtractMutableBufferNumber(tensor_wrapper->GetName());
347+
if (mutable_buffer_id != -1) {
348+
// Delegate maintain the memory for mutable buffer
349+
tensor_wrapper->AllocateDataBuffer();
350+
mutable_buffer_id_to_memory_map[mutable_buffer_id] =
351+
tensor_wrapper->GetStaticTensorData();
352+
}
353+
330354
input_tensors_.emplace_back(std::move(tensor_wrapper));
331355
}
332356
if (!options_->is_from_context_binary()) {
@@ -347,6 +371,18 @@ Error QnnManager::AllocateTensor() {
347371
if (IsTensorDump()) {
348372
tensor_wrapper->AllocateDataBuffer();
349373
}
374+
375+
int mutable_buffer_id =
376+
ExtractMutableBufferNumber(tensor_wrapper->GetName());
377+
if (mutable_buffer_id != -1 &&
378+
mutable_buffer_id_to_memory_map.find(mutable_buffer_id) !=
379+
mutable_buffer_id_to_memory_map.end()) {
380+
// Fill the same memory for I/O of mutable buffer
381+
tensor_wrapper->FillDataBuffer(
382+
mutable_buffer_id_to_memory_map[mutable_buffer_id],
383+
false /* copy_data */);
384+
}
385+
350386
output_tensors_.emplace_back(std::move(tensor_wrapper));
351387
}
352388
return Error::Ok;

0 commit comments

Comments
 (0)