66 * LICENSE file in the root directory of this source tree.
77 */
88#pragma once
9- #include < executorch/backends/qualcomm/aot/ir/qcir_utils.h>
109#include < executorch/backends/qualcomm/aot/python/PyQnnWrapperAdaptor.h>
1110#include < executorch/backends/qualcomm/qc_compiler_spec_generated.h>
1211#include < executorch/backends/qualcomm/runtime/Logging.h>
@@ -50,119 +49,6 @@ class PyQnnManager {
5049 qnn_executorch_options, qnn_executorch_context_binary_);
5150 }
5251
53- // used during stage 2 of multi-graph mode
54- explicit PyQnnManager (const py::bytes& buffer, const py::list& qcirs)
55- : qnn_executorch_option_ptr_(buffer) {
56- auto qnn_executorch_options = GetQnnExecuTorchOptions (
57- qnn_executorch_option_ptr_.cast <std::string_view>().data ());
58-
59- // merge multiple qcirs into one context with multiple graphs
60-
61- // We start retrieving tensor from offsets = 0.
62- std::vector<uint32_t > offsets (1 , 0 );
63- std::vector<uint8_t > tensor_data;
64- std::vector<uint8_t *> tensor_ptr;
65- std::vector<uint64_t > tensor_size;
66- uint64_t total_tensor_size = 0 ;
67- for (size_t i = 0 ; i < qcirs.size (); ++i) {
68- py::buffer_info info (py::buffer (qcirs[i].cast <py::bytes>()).request ());
69-
70- uint8_t * qcir_custom_buffer_ptr = static_cast <uint8_t *>(info.ptr );
71- QnnQcirCustomProtocol qnn_qcir_custom_protocol;
72- auto [status, _, qcir_tensor_size, __, qcir_tensor_ptr] =
73- qnn_qcir_custom_protocol.DeserializeQcirCustomBuffer (
74- qcir_custom_buffer_ptr);
75-
76- if (status != Error::Ok) {
77- QNN_EXECUTORCH_LOG_ERROR (" Fail to verify QnnQcirCustomProtocol" );
78- return ;
79- }
80-
81- tensor_ptr.push_back (static_cast <uint8_t *>(qcir_tensor_ptr));
82- tensor_size.push_back (qcir_tensor_size);
83- total_tensor_size += qcir_tensor_size;
84- offsets.push_back (offsets.back () + qcir_tensor_size);
85- }
86-
87- tensor_data.resize (total_tensor_size);
88-
89- // store multiple graphs tensor in a contiguous memory space
90- for (size_t i = 0 ; i < tensor_ptr.size (); ++i) {
91- std::memcpy (
92- tensor_data.data () + offsets[i], tensor_ptr[i], tensor_size[i]);
93- }
94-
95- std::vector<flatbuffers::Offset<qcir::Graph>> graphs;
96- for (size_t i = 0 ; i < qcirs.size (); ++i) {
97- py::buffer_info info (py::buffer (qcirs[i].cast <py::bytes>()).request ());
98-
99- uint8_t * qcir_custom_buffer_ptr = static_cast <uint8_t *>(info.ptr );
100- QnnQcirCustomProtocol qnn_qcir_custom_protocol;
101- auto [status, qcir_fbs_size, _, qcir_fbs_ptr, __] =
102- qnn_qcir_custom_protocol.DeserializeQcirCustomBuffer (
103- qcir_custom_buffer_ptr);
104-
105- if (status != Error::Ok) {
106- QNN_EXECUTORCH_LOG_ERROR (" Fail to verify QnnQcirCustomProtocol" );
107- return ;
108- }
109-
110- auto context = qcir::GetContext (qcir_fbs_ptr);
111- for (const auto & graph : *context->graphs ()) {
112- std::vector<flatbuffers::Offset<qcir::Tensor>> tensors;
113- for (const auto tensor : *graph->tensors ()) {
114- // here we need to take a detour to merge multiple qcir flatbuffers
115- // outer ToTensor
116- // return: flatbuffers::Offset<Tensor>
117- // consume: QnnTensor, data_offset, flatbuffers::FlatBufferBuilder*
118- // inner ToTensor
119- // return: QnnTensor
120- // consume:
121- // flatbuffers::Vector<::flatbuffers::Offset<qcir::Tensor>>,
122- // data_ptr
123- tensors.emplace_back (ToTensor (
124- ToTensor (tensor, nullptr ),
125- offsets[i] + tensor->offset (),
126- &builder_));
127- }
128- std::vector<flatbuffers::Offset<qcir::Operator>> nodes;
129- for (const auto & node : *graph->nodes ()) {
130- uint32_t * inputs_ptr = const_cast <uint32_t *>(node->inputs ()->data ());
131- uint32_t * outputs_ptr =
132- const_cast <uint32_t *>(node->outputs ()->data ());
133- uint32_t * params_ptr = const_cast <uint32_t *>(node->params ()->data ());
134- std::vector<uint32_t > inputs (
135- inputs_ptr, inputs_ptr + node->inputs ()->size ());
136- std::vector<uint32_t > outputs (
137- outputs_ptr, outputs_ptr + node->outputs ()->size ());
138- std::vector<uint32_t > params (
139- params_ptr, params_ptr + node->params ()->size ());
140- nodes.emplace_back (qcir::CreateOperatorDirect (
141- builder_,
142- node->name ()->str ().c_str (),
143- node->package_name ()->str ().c_str (),
144- node->type_name ()->str ().c_str (),
145- &inputs,
146- &outputs,
147- ¶ms));
148- }
149- graphs.emplace_back (qcir::CreateGraphDirect (
150- builder_, graph->name ()->str ().c_str (), &nodes, &tensors));
151- }
152- }
153-
154- auto context = qcir::CreateContextDirect (builder_, &graphs);
155- builder_.Finish (context);
156- QnnExecuTorchContextBinary qcir_bin (
157- {builder_.GetBufferPointer (), builder_.GetSize ()});
158-
159- // Init QnnQcirCustomProtocol binary
160- qnn_executorch_context_binary_ =
161- MakeQcirCustomBinaryInfo (qcir_bin, tensor_data);
162- qnn_manager_ = std::make_shared<QnnManager>(
163- qnn_executorch_options, qnn_executorch_context_binary_);
164- }
165-
16652 executorch::runtime::Error Init () {
16753 return qnn_manager_->Init ();
16854 }
@@ -172,146 +58,24 @@ class PyQnnManager {
17258 return qnn_manager_->IsNodeSupportedByBackend (op_wrappers);
17359 }
17460
175- // this method is specific for stage 2 of compiling multi-graphs
176- py::array_t <char > Compile () {
177- if (qnn_manager_->CompileQcir () != Error::Ok) {
178- QNN_EXECUTORCH_LOG_ERROR (" Fail to compile qcir" );
179- return py::array_t <char >(0 );
180- }
181-
182- // generate context binary if compilation succeded
183- QnnExecuTorchContextBinary binary_info;
184- qnn_manager_->GetContextBinary (binary_info);
185- // allocate py::array (to pass the result of the C++ function to Python)
186- auto result = py::array_t <char >(binary_info.nbytes );
187- auto result_buffer = result.request ();
188- char * result_ptr = (char *)result_buffer.ptr ;
189- std::memcpy (result_ptr, binary_info.buffer , binary_info.nbytes );
190- return result;
191- }
192-
19361 py::array_t <char > Compile (
194- const std::string& graph_name ,
195- std::vector<std::shared_ptr<OpWrapper>>& op_wrappers) {
62+ const std::vector<std:: string>& graph_names ,
63+ std::vector<std::vector<std:: shared_ptr<OpWrapper> >>& op_wrappers) {
19664 QnnExecuTorchContextBinary binary_info;
19765
198- if (qnn_manager_->IsMultipleGraphs ()) {
199- builder_.Reset ();
200- std::vector<uint8_t > tensor_data;
201- std::vector<uint64_t > offsets;
202- std::unordered_map<void *, int > tensor_map;
203- std::vector<flatbuffers::Offset<qcir::Tensor>> fb_tensors;
204- std::vector<flatbuffers::Offset<qcir::Operator>> fb_ops;
205-
206- auto set_tensor = [&](const std::shared_ptr<TensorWrapper>& wrapper,
207- std::vector<uint32_t >& index) {
208- auto it = tensor_map.find (wrapper.get ());
209- if (it != tensor_map.end ()) {
210- index.push_back (it->second );
211- } else {
212- tensor_map[wrapper.get ()] = fb_tensors.size ();
213- index.push_back (fb_tensors.size ());
214- offsets.push_back (tensor_data.size ());
215- Qnn_Tensor_t qnn_tensor = wrapper->CloneTensorStruct ();
216- fb_tensors.emplace_back (
217- ToTensor (qnn_tensor, offsets.back (), &builder_));
218- uint8_t * data_ptr = static_cast <uint8_t *>(
219- QNN_TENSOR_VER_PTR (qnn_tensor)->clientBuf .data );
220- if (data_ptr != nullptr ) {
221- tensor_data.insert (
222- tensor_data.end (),
223- data_ptr,
224- data_ptr + QNN_TENSOR_VER_PTR (qnn_tensor)->clientBuf .dataSize );
225- }
226- }
227- };
228-
229- for (std::shared_ptr<OpWrapper>& op_wrapper : op_wrappers) {
230- std::vector<uint32_t > inputs, outputs, params;
231-
232- for (const auto & tensor_wrapper : op_wrapper->GetInputTensors ()) {
233- set_tensor (tensor_wrapper, inputs);
234- }
235-
236- for (const auto & tensor_wrapper : op_wrapper->GetOutputTensors ()) {
237- set_tensor (tensor_wrapper, outputs);
238- }
239-
240- for (const auto & param : op_wrapper->GetParams ()) {
241- auto * p_tensor_param = dynamic_cast <TensorParamWrapper*>(param.get ());
242- if (p_tensor_param != nullptr ) {
243- auto wrapper = p_tensor_param->GetTensorWrapper ();
244- wrapper->SetName (param->GetName ());
245- set_tensor (wrapper, params);
246- } else {
247- executorch::runtime::Error err = param->PopulateQnnParam ();
248- if (err != executorch::runtime::Error::Ok) {
249- QNN_EXECUTORCH_LOG_ERROR (
250- " Fail to get scalar parameter in online prepare stage" );
251- return py::array_t <char >(0 );
252- }
253- Qnn_Param_t p = param->GetQnnParam ();
254- Qnn_Tensor_t t (
255- {.version = QNN_TENSOR_VERSION_2, .v2 = QNN_TENSOR_V2_INIT});
256- QNN_TENSOR_VER_PTR (t)->name = p.name ;
257- QNN_TENSOR_VER_PTR (t)->dataType = p.scalarParam .dataType ;
258- QNN_TENSOR_VER_PTR (t)->clientBuf .data =
259- static_cast <void *>(&p.scalarParam .uint8Value );
260- QNN_TENSOR_VER_PTR (t)->clientBuf .dataSize =
261- GetDataTypeSize (QNN_TENSOR_VER_PTR (t)->dataType );
262-
263- // collect tensor data
264- offsets.push_back (tensor_data.size ());
265- const uint8_t * data_ptr =
266- static_cast <uint8_t *>(QNN_TENSOR_VER_PTR (t)->clientBuf .data );
267- tensor_data.insert (
268- tensor_data.end (),
269- data_ptr,
270- data_ptr + QNN_TENSOR_VER_PTR (t)->clientBuf .dataSize );
271- params.push_back (fb_tensors.size ());
272- fb_tensors.emplace_back (ToTensor (t, offsets.back (), &builder_));
273- }
274- }
275-
276- Qnn_OpConfig_t op_config = op_wrapper->GetOpConfig ();
277- fb_ops.emplace_back (qcir::CreateOperatorDirect (
278- builder_,
279- QNN_OP_VER_PTR (op_config)->name ,
280- QNN_OP_VER_PTR (op_config)->packageName ,
281- QNN_OP_VER_PTR (op_config)->typeName ,
282- &inputs,
283- &outputs,
284- ¶ms));
285- }
286-
287- std::vector<flatbuffers::Offset<qcir::Graph>> fb_graphs (
288- {qcir::CreateGraphDirect (
289- builder_, graph_name.c_str (), &fb_ops, &fb_tensors)});
290- auto context = qcir::CreateContextDirect (builder_, &fb_graphs);
291- builder_.Finish (context);
292-
293- QnnExecuTorchContextBinary qcir_binary (
294- {builder_.GetBufferPointer (), builder_.GetSize ()});
295-
296- custom_qcir_protocol_buffer_ =
297- QnnQcirCustomProtocol (qcir_binary.nbytes , tensor_data.size ());
298- custom_qcir_protocol_buffer_.BuildQcirCustomBuffer (
299- qcir_binary, tensor_data);
300- std::tie (binary_info.buffer , binary_info.nbytes ) =
301- custom_qcir_protocol_buffer_.GetCustomProtocolBuffer ();
302- } else {
303- if (qnn_manager_->Compile (graph_name, op_wrappers) !=
66+ for (int i = 0 ; i < graph_names.size (); ++i) {
67+ if (qnn_manager_->Compile (graph_names[i], op_wrappers[i]) !=
30468 executorch::runtime::Error::Ok) {
30569 QNN_EXECUTORCH_LOG_ERROR (" Fail to compile QNN graph" );
30670 return py::array_t <char >(0 );
30771 }
308- auto qnn_executorch_options = GetQnnExecuTorchOptions (
309- qnn_executorch_option_ptr_. cast <std::string_view>(). data ());
310- if (qnn_executorch_options-> saver () ||
311- qnn_manager_-> GetContextBinary (binary_info) !=
312- executorch::runtime::Error::Ok) {
313- return py:: array_t < char >( 0 );
314- }
72+ }
73+ auto qnn_executorch_options = GetQnnExecuTorchOptions (
74+ qnn_executorch_option_ptr_. cast <std::string_view>(). data ());
75+ if (qnn_executorch_options-> saver () ||
76+ qnn_manager_-> GetContextBinary (binary_info) !=
77+ executorch::runtime::Error::Ok) {
78+ return py:: array_t < char >( 0 );
31579 }
31680
31781 // allocate py::array (to pass the result of the C++ function to Python)
0 commit comments