Skip to content

Commit 2043e22

Browse files
committed
Qualcomm AI Engine Direct - Delegated mutable buffer
summary: - Support copy op with QNN Reshape - Consume mutable buffer in QNN Delegate - Set the same memory address for I/O of mutable buffer at runtime
1 parent 86cb5d7 commit 2043e22

File tree

11 files changed

+201
-23
lines changed

11 files changed

+201
-23
lines changed

backends/qualcomm/_passes/layout_transform.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ class LayoutTransform(ExportPass):
6464
exir_ops.edge.aten.prelu.default,
6565
exir_ops.edge.aten.relu.default,
6666
exir_ops.edge.aten._softmax.default, # TODO: Need to find a new solution to do "axis_order" to transform axis.
67+
exir_ops.edge.aten.sigmoid.default,
6768
exir_ops.edge.aten.sqrt.default,
6869
exir_ops.edge.aten.sub.Tensor,
6970
exir_ops.edge.aten.sum.dim_IntList,

backends/qualcomm/builders/__init__.py

100644100755
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
op_ceil,
1515
op_clamp,
1616
op_conv2d,
17+
op_copy,
1718
op_depth_to_space,
1819
op_dequantize,
1920
op_div,
@@ -71,6 +72,7 @@
7172
op_ceil,
7273
op_clamp,
7374
op_conv2d,
75+
op_copy,
7476
op_depth_to_space,
7577
op_dequantize,
7678
op_div,

backends/qualcomm/builders/node_visitor.py

Lines changed: 36 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,8 @@
3636
get_parameter,
3737
is_graph_input,
3838
is_graph_output,
39+
is_mutable_buffer_input,
40+
is_mutable_buffer_output,
3941
is_parameter,
4042
)
4143

@@ -214,7 +216,9 @@ def get_tensor_type(
214216
node: torch.fx.Node,
215217
tensor_type: PyQnnWrapper.Qnn_TensorType_t,
216218
) -> PyQnnWrapper.Qnn_TensorType_t:
217-
is_input = is_graph_input(node, self.edge_program)
219+
is_input = is_graph_input(node, self.edge_program) or is_mutable_buffer_input(
220+
node, self.edge_program
221+
)
218222
is_output = is_graph_output(node)
219223
# handle logic for input/output tensors
220224
if is_input or is_output:
@@ -247,6 +251,33 @@ def get_data_type(
247251

248252
return QNN_TENSOR_TYPE_MAP[tensor.dtype]
249253

254+
def get_tensor_name(
255+
self,
256+
node: torch.fx.Node,
257+
wrapper_idx: int = 0,
258+
):
259+
tensor_name = f"{node.name}_{wrapper_idx}"
260+
# The `input_{id}` is utilized for sorting at runtime. Due to multiple passes in qnn_preprocess,
261+
# the input order between QNN and the original graph’s forward function may differ.
262+
# The `mutbuf_{id}` is utilized for mapping I/O of mutable buffer at runtime.
263+
# The `output_` is identified as the graph’s output at runtime to prevent confusion with per_tensor_dump.
264+
if is_mutable_buffer_input(node, self.edge_program):
265+
fqn = self.edge_program.graph_signature.inputs_to_buffers[node.target]
266+
position_index = list(
267+
self.edge_program.graph_signature.buffers_to_mutate.values()
268+
).index(fqn)
269+
tensor_name = f"input_{str(self.external_ids[node])}_mutbuf_{str(position_index)}_{tensor_name}"
270+
elif is_graph_input(node, self.edge_program):
271+
tensor_name = f"input_{str(self.external_ids[node])}_{tensor_name}"
272+
elif is_mutable_buffer_output(node, self.edge_program):
273+
position_index = list(
274+
self.edge_program.graph_signature.buffers_to_mutate.keys()
275+
).index(node.name)
276+
tensor_name = f"output_mutbuf_{position_index}_{tensor_name}"
277+
elif is_graph_output(node):
278+
tensor_name = f"output_{tensor_name}"
279+
return tensor_name
280+
250281
def define_custom_tensor_wrapper(
251282
self,
252283
node_name: str,
@@ -307,11 +338,7 @@ def define_tensor(
307338
if cached := nodes_to_wrappers[node_name].get(wrapper_idx, None):
308339
return cached
309340

310-
tensor_name = f"{node.name}_{wrapper_idx}"
311-
if is_graph_input(node, self.edge_program):
312-
tensor_name = "input_" + str(self.external_ids[node]) + "_" + tensor_name
313-
if is_graph_output(node):
314-
tensor_name = "output_" + tensor_name
341+
tensor_name = self.get_tensor_name(node, wrapper_idx)
315342
dims = [1] if len(tensor.size()) == 0 else tensor.size()
316343
tensor_type = self.get_tensor_type(node, tensor_type)
317344
quant_encoding, quant_configs = self.get_quant_encoding_conf(
@@ -383,7 +410,9 @@ def generate_node_to_external_map(
383410
# The order in which we visit the placeholder node is same as the *args
384411
# order for the forward(*args) signature for this gm. Using the order of
385412
# the nodes as external_id to extract the right arg from *args at runtime
386-
if is_graph_input(node, edge_program):
413+
if is_graph_input(node, edge_program) or is_mutable_buffer_input(
414+
node, edge_program
415+
):
387416
node_to_external_map[node] = len(node_to_external_map)
388417
for node in edge_program.graph_module.graph.nodes:
389418
if is_graph_output(node):
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
# Copyright (c) Qualcomm Innovation Center, Inc.
2+
# All rights reserved
3+
#
4+
# This source code is licensed under the BSD-style license found in the
5+
# LICENSE file in the root directory of this source tree.
6+
from typing import Dict
7+
8+
import executorch.backends.qualcomm.python.PyQnnWrapperAdaptor as PyQnnWrapper
9+
10+
import torch
11+
from executorch.backends.qualcomm.utils.constants import QCOM_QUANT_ATTRS
12+
13+
from .node_visitor import NodeVisitor, register_node_visitor
14+
from .qnn_constants import OpReshape, QNN_OP_PACKAGE_NAME_QTI_AISW
15+
16+
17+
@register_node_visitor
18+
class Copy(NodeVisitor):
19+
target = ["aten.copy.default"]
20+
21+
def __init__(self, *args) -> None:
22+
super().__init__(*args)
23+
24+
def define_node(
25+
self,
26+
node: torch.fx.Node,
27+
nodes_to_wrappers: Dict[torch.fx.Node, PyQnnWrapper.TensorWrapper],
28+
) -> PyQnnWrapper.PyQnnOpWrapper:
29+
input_node = node.args[1]
30+
input_tensor = self.get_tensor(input_node, node)
31+
copy_inp_tensor_wrapper = self.define_tensor(
32+
input_node,
33+
input_tensor,
34+
PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_NATIVE,
35+
nodes_to_wrappers,
36+
is_input_tensor=True,
37+
)
38+
39+
copy_input_tensors = [copy_inp_tensor_wrapper]
40+
41+
if quant_attrs := input_node.meta.get(QCOM_QUANT_ATTRS):
42+
quant_attrs = quant_attrs.copy()
43+
# Because there is no output after convert_pt2e, the QCOM_QUANT_ATTRS of node is none
44+
node.meta[QCOM_QUANT_ATTRS] = quant_attrs
45+
output_tensor = self.get_tensor(node, node)
46+
output_tensor_wrapper = self.define_tensor(
47+
node,
48+
output_tensor,
49+
PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_NATIVE,
50+
nodes_to_wrappers,
51+
is_input_tensor=False,
52+
)
53+
copy_output_tensors = [output_tensor_wrapper]
54+
55+
copy_op = PyQnnWrapper.PyQnnOpWrapper(
56+
node.name,
57+
QNN_OP_PACKAGE_NAME_QTI_AISW,
58+
OpReshape.op_name,
59+
)
60+
copy_op.AddInputTensors(copy_input_tensors)
61+
copy_op.AddOutputTensors(copy_output_tensors)
62+
63+
return copy_op

backends/qualcomm/builders/utils.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,23 @@ def is_graph_input(
7575
return tensor.op == "placeholder" and not is_parameter(tensor, edge_program)
7676

7777

78+
def is_mutable_buffer_input(
79+
tensor: torch.fx.Node, edge_program: torch.export.ExportedProgram
80+
) -> bool:
81+
"""
82+
Check if the given tensor is a mutable buffer input
83+
84+
Args:
85+
tensor: EdgeIR Tensor that is being checked for mutable buffer input
86+
"""
87+
if tensor.op == "placeholder" and is_buffer(edge_program, tensor):
88+
fqn = edge_program.graph_signature.inputs_to_buffers[tensor.target]
89+
# if the buffer is mutated then record that
90+
if fqn in edge_program.graph_signature.buffers_to_mutate.values():
91+
return True
92+
return False
93+
94+
7895
def is_graph_output(tensor: torch.fx.Node) -> bool:
7996
"""
8097
Check if the given tensor is used as a graph output
@@ -91,6 +108,26 @@ def is_graph_output(tensor: torch.fx.Node) -> bool:
91108
return False
92109

93110

111+
def is_mutable_buffer_output(
112+
tensor: torch.fx.Node, edge_program: torch.export.ExportedProgram
113+
) -> bool:
114+
"""
115+
Check if the given tensor is a mutable buffer output
116+
117+
Args:
118+
tensor: EdgeIR Tensor that is being checked for mutable buffer output
119+
"""
120+
for user in tensor.users.keys():
121+
# getitem node is skiped, check the op_skip_ops.py
122+
if user.op == "output" or (
123+
user.target.__name__ == "getitem" and is_graph_output(user)
124+
):
125+
# if the buffer is mutated then record that
126+
if tensor.name in edge_program.graph_signature.buffers_to_mutate.keys():
127+
return True
128+
return False
129+
130+
94131
def is_constant(
95132
tensor: torch.fx.Node, edge_program: torch.export.ExportedProgram
96133
) -> bool:

backends/qualcomm/partition/common_defs.py

100644100755
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
exir_ops.edge.aten.clone.default,
1414
exir_ops.edge.aten.full.default,
1515
exir_ops.edge.aten.slice_scatter.default,
16-
exir_ops.edge.aten.copy.default,
16+
exir_ops.edge.quantized_decomposed.embedding_4bit.dtype,
1717
]
1818

1919
to_be_implemented_operator = [

backends/qualcomm/partition/qnn_partitioner.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
Partitioner,
2424
PartitionResult,
2525
)
26-
from executorch.exir.backend.utils import tag_constant_data
26+
from executorch.exir.backend.utils import tag_constant_data, tag_mutated_buffer
2727
from torch.fx.passes.infra.partitioner import Partition
2828
from torch.fx.passes.operator_support import OperatorSupportBase
2929

@@ -108,6 +108,7 @@ def __init__(
108108
compiler_specs: List[CompileSpec],
109109
skip_node_id_set: set = None,
110110
skip_node_op_set: set = None,
111+
skip_mutable_buffer: bool = True,
111112
):
112113
self.compiler_specs_snapshot = copy.deepcopy(compiler_specs)
113114

@@ -117,6 +118,7 @@ def __init__(
117118
self.partition_tags: Dict[str, DelegationSpec] = {}
118119
self.skip_node_id_set = set() if skip_node_id_set is None else skip_node_id_set
119120
self.skip_node_op_set = set() if skip_node_op_set is None else skip_node_op_set
121+
self.skip_mutable_buffer = skip_mutable_buffer
120122

121123
def generate_partitions(
122124
self, edge_program: torch.export.ExportedProgram
@@ -162,6 +164,8 @@ def partition(self, edge_program: torch.export.ExportedProgram) -> PartitionResu
162164
if len(partitions) != 0:
163165
self.tag_nodes(partitions, edge_program)
164166
tag_constant_data(edge_program)
167+
if not self.skip_mutable_buffer:
168+
tag_mutated_buffer(edge_program)
165169
for node in edge_program.graph_module.graph.nodes:
166170
if hasattr(node, "meta"):
167171
# pop certain keys in meta for not affecting the passes in compilation

backends/qualcomm/quantizer/custom_annotation.py

100644100755
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -192,7 +192,7 @@ def get_custom_quant_ios_dtype(
192192

193193
# Tag index put node before copy node, because copy is a skipped node in qnn
194194
if (
195-
exir_ops.edge.aten.index_put.default == node.target
195+
exir_ops.edge.aten.copy.default == node.target
196196
and node.meta["val"].shape == cache_shape
197197
):
198198
return kv_dtype

backends/qualcomm/runtime/QnnExecuTorchBackend.cpp

Lines changed: 18 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -111,29 +111,34 @@ Error QnnExecuTorchBackend::execute(
111111
std::vector<Qnn_Tensor_t> input_tensor_structs;
112112
std::vector<Qnn_Tensor_t> output_tensor_structs;
113113

114+
int args_index = 0;
114115
input_tensor_structs.reserve(input_tensors.size());
115-
for (int i = 0; i < input_tensors.size(); ++i) {
116-
if (qnn_manager->RegisterMem(
117-
args[i]->toTensor().mutable_data_ptr(), input_tensors[i]) !=
118-
Error::Ok) {
119-
// update data ptr only should be fine
120-
input_tensors[i]->FillDataBuffer(
121-
args[i]->toTensor().const_data_ptr(), false /* copy_data */);
116+
for (const auto& input_tensor : input_tensors) {
117+
if (input_tensor->GetName().find("mutbuf_") == std::string::npos) {
118+
if (qnn_manager->RegisterMem(
119+
args[args_index]->toTensor().mutable_data_ptr(), input_tensor) !=
120+
Error::Ok) {
121+
// update data ptr only should be fine
122+
input_tensor->FillDataBuffer(
123+
args[args_index]->toTensor().const_data_ptr(),
124+
false /* copy_data */);
125+
}
126+
args_index++;
122127
}
123-
input_tensor_structs.push_back(input_tensors[i]->CloneTensorStruct());
128+
129+
input_tensor_structs.push_back(input_tensor->CloneTensorStruct());
124130
}
125131

126-
int output_index = input_tensors.size();
127132
for (const auto& output_tensor : output_tensors) {
128133
// pos=0 limits the search to the prefix
129-
if (output_tensor->GetName().rfind("output_", 0) == 0) {
130-
void* mutable_data_ptr =
131-
args[output_index]->toTensor().mutable_data_ptr();
134+
if (output_tensor->GetName().rfind("output_", 0) == 0 &&
135+
output_tensor->GetName().find("mutbuf_") == std::string::npos) {
136+
void* mutable_data_ptr = args[args_index]->toTensor().mutable_data_ptr();
132137
if (qnn_manager->RegisterMem(mutable_data_ptr, output_tensor) !=
133138
Error::Ok) {
134139
output_tensor->FillDataBuffer(mutable_data_ptr, false /* copy_data */);
135140
}
136-
output_index++;
141+
args_index++;
137142
}
138143
output_tensor_structs.push_back(output_tensor->CloneTensorStruct());
139144
}

backends/qualcomm/runtime/QnnManager.cpp

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include <cstring>
2020
#include <fstream>
2121
#include <string>
22+
#include <unordered_map>
2223

2324
namespace executorch {
2425
namespace backends {
@@ -36,6 +37,16 @@ bool CompareExportedInput(
3637
return numA < numB;
3738
}
3839

40+
int ExtractMutableBufferNumber(const std::string& name) {
41+
std::string prefix = "mutbuf_";
42+
size_t startPos = name.find(prefix);
43+
if (startPos != std::string::npos) {
44+
startPos += prefix.length();
45+
return std::stoi(name.substr(startPos));
46+
}
47+
return -1;
48+
}
49+
3950
QnnManager::~QnnManager() {
4051
backend_params_ptr_.reset(new BackendConfigParameters());
4152
logger_.reset();
@@ -331,9 +342,22 @@ Error QnnManager::AllocateTensor(const std::string& graph_name) {
331342
std::vector<Qnn_Tensor_t> output_tensors =
332343
backend_params_ptr_->qnn_context_ptr_->GetGraphOutputs(graph_name);
333344

345+
// Mapping memory address for the input and output of mutable buffer
346+
std::unordered_map<int, const void*> mutable_buffer_id_to_memory_map;
347+
334348
for (auto& tensor : input_tensors) {
335349
std::shared_ptr<TensorWrapper> tensor_wrapper = CreateTensorWrapper(tensor);
336350
tensor_wrapper->UpdateQnnTensorMeta(tensor);
351+
352+
int mutable_buffer_id =
353+
ExtractMutableBufferNumber(tensor_wrapper->GetName());
354+
if (mutable_buffer_id != -1) {
355+
// Delegate maintain the memory for mutable buffer
356+
tensor_wrapper->AllocateDataBuffer();
357+
mutable_buffer_id_to_memory_map[mutable_buffer_id] =
358+
tensor_wrapper->GetStaticTensorData();
359+
}
360+
337361
input_tensors_[graph_name].emplace_back(std::move(tensor_wrapper));
338362
}
339363
if (!options_->is_from_context_binary()) {
@@ -356,6 +380,18 @@ Error QnnManager::AllocateTensor(const std::string& graph_name) {
356380
if (IsTensorDump()) {
357381
tensor_wrapper->AllocateDataBuffer();
358382
}
383+
384+
int mutable_buffer_id =
385+
ExtractMutableBufferNumber(tensor_wrapper->GetName());
386+
if (mutable_buffer_id != -1 &&
387+
mutable_buffer_id_to_memory_map.find(mutable_buffer_id) !=
388+
mutable_buffer_id_to_memory_map.end()) {
389+
// Fill the same memory for I/O of mutable buffer
390+
tensor_wrapper->FillDataBuffer(
391+
mutable_buffer_id_to_memory_map[mutable_buffer_id],
392+
false /* copy_data */);
393+
}
394+
359395
output_tensors_[graph_name].emplace_back(std::move(tensor_wrapper));
360396
}
361397
return Error::Ok;

0 commit comments

Comments
 (0)