88#pragma once
99#include < executorch/backends/qualcomm/aot/ir/qcir_utils.h>
1010#include < executorch/backends/qualcomm/aot/python/PyQnnWrapperAdaptor.h>
11+ #include < executorch/backends/qualcomm/qc_binary_info_generated.h>
12+ #include < executorch/backends/qualcomm/qc_compiler_spec_generated.h>
1113#include < executorch/backends/qualcomm/runtime/Logging.h>
1214#include < executorch/backends/qualcomm/runtime/QnnExecuTorch.h>
1315#include < executorch/backends/qualcomm/runtime/QnnManager.h>
14- #include < executorch/backends/qualcomm/schema_generated.h>
1516#include < pybind11/numpy.h>
1617#include < pybind11/pybind11.h>
1718#include < pybind11/stl.h>
@@ -35,32 +36,127 @@ class PyQnnManager {
3536 qnn_manager_ = std::make_shared<QnnManager>(
3637 qnn_executorch_options, qnn_executorch_context_binary_);
3738 }
39+
3840 // used for loading context binary directly
3941 explicit PyQnnManager (const py::bytes& buffer, const py::bytes& ctx_bin)
4042 : qnn_executorch_option_ptr_(buffer) {
4143 auto qnn_executorch_options = GetQnnExecuTorchOptions (
4244 qnn_executorch_option_ptr_.cast <std::string_view>().data ());
4345
4446 py::buffer_info info (py::buffer (ctx_bin).request ());
45- qnn_executorch_context_binary_.buffer = static_cast < void *>( info.ptr ) ;
47+ qnn_executorch_context_binary_.buffer = info.ptr ;
4648 qnn_executorch_context_binary_.nbytes = info.size * info.itemsize ;
4749 qnn_manager_ = std::make_shared<QnnManager>(
4850 qnn_executorch_options, qnn_executorch_context_binary_);
4951 }
5052
53+ // used for loading multiple graphs in qcir
54+ explicit PyQnnManager (const py::bytes& buffer, const py::list& qcirs)
55+ : qnn_executorch_option_ptr_(buffer) {
56+ auto qnn_executorch_options = GetQnnExecuTorchOptions (
57+ qnn_executorch_option_ptr_.cast <std::string_view>().data ());
58+
59+ // merge multiple qcirs into one context with multiple graphs
60+ std::vector<flatbuffers::Offset<qcir::Graph>> graphs;
61+ for (size_t i = 0 ; i < qcirs.size (); ++i) {
62+ py::buffer_info info (py::buffer (qcirs[i].cast <py::bytes>()).request ());
63+ flatbuffers::Verifier verifier_binary_info (
64+ static_cast <const uint8_t * const >(info.ptr ),
65+ info.size * info.itemsize );
66+ if (!qnn_delegate::VerifyBinaryInfoBuffer (verifier_binary_info)) {
67+ QNN_EXECUTORCH_LOG_ERROR (" Fail to verify binary info" );
68+ return ;
69+ }
70+ auto binary_info = qnn_delegate::GetBinaryInfo (info.ptr );
71+
72+ flatbuffers::Verifier verifier_qcir (
73+ binary_info->data ()->data (), binary_info->data ()->size ());
74+ if (!qcir::VerifyContextBuffer (verifier_qcir)) {
75+ QNN_EXECUTORCH_LOG_ERROR (" Fail to verify qcir format" );
76+ return ;
77+ }
78+ auto context = qcir::GetContext (binary_info->data ()->data ());
79+ for (const auto & graph : *context->graphs ()) {
80+ std::vector<flatbuffers::Offset<qcir::Tensor>> tensors;
81+ for (const auto tensor : *graph->tensors ()) {
82+ // here we need to take a detour to merge multiple qcir flatbuffers
83+ // outer ToTensor
84+ // return: flatbuffers::Offset<Tensor>
85+ // consume: QnnTensor, flatbuffers::FlatBufferBuilder*
86+ // inner ToTensor
87+ // return: QnnTensor
88+ // consume: flatbuffers::Vector<::flatbuffers::Offset<qcir::Tensor>>
89+ tensors.emplace_back (ToTensor (ToTensor (tensor), &builder_));
90+ }
91+ std::vector<flatbuffers::Offset<qcir::Operator>> nodes;
92+ for (const auto & node : *graph->nodes ()) {
93+ int32_t * inputs_ptr = const_cast <int32_t *>(node->inputs ()->data ());
94+ int32_t * outputs_ptr = const_cast <int32_t *>(node->outputs ()->data ());
95+ int32_t * params_ptr = const_cast <int32_t *>(node->params ()->data ());
96+ std::vector<int32_t > inputs (
97+ inputs_ptr, inputs_ptr + node->inputs ()->size ());
98+ std::vector<int32_t > outputs (
99+ outputs_ptr, outputs_ptr + node->outputs ()->size ());
100+ std::vector<int32_t > params (
101+ params_ptr, params_ptr + node->params ()->size ());
102+ nodes.emplace_back (qcir::CreateOperatorDirect (
103+ builder_,
104+ node->name ()->str ().c_str (),
105+ node->package_name ()->str ().c_str (),
106+ node->type_name ()->str ().c_str (),
107+ &inputs,
108+ &outputs,
109+ ¶ms));
110+ }
111+ graphs.emplace_back (qcir::CreateGraphDirect (
112+ builder_, graph->name ()->str ().c_str (), &nodes, &tensors));
113+ }
114+ }
115+
116+ auto context = qcir::CreateContextDirect (builder_, &graphs);
117+ builder_.Finish (context);
118+ QnnExecuTorchContextBinary qcir_bin (
119+ {builder_.GetBufferPointer (), builder_.GetSize ()});
120+
121+ qnn_executorch_context_binary_ = MakeBinaryInfo (qcir_bin);
122+ qnn_manager_ = std::make_shared<QnnManager>(
123+ qnn_executorch_options, qnn_executorch_context_binary_);
124+ }
125+
51126 executorch::runtime::Error Init () {
52127 return qnn_manager_->Init ();
53128 }
129+
54130 bool IsNodeSupportedByBackend (
55131 std::vector<std::shared_ptr<OpWrapper>>& op_wrappers) {
56132 return qnn_manager_->IsNodeSupportedByBackend (op_wrappers);
57133 }
134+
135+ // this method is specific for compiling multi-graphs
136+ py::array_t <char > Compile () {
137+ if (qnn_manager_->CompileQcir () != Error::Ok) {
138+ QNN_EXECUTORCH_LOG_ERROR (" Fail to compile qcir" );
139+ return py::array_t <char >(0 );
140+ }
141+
142+ // generate context binary if compilation succeded
143+ QnnExecuTorchContextBinary binary_info;
144+ qnn_manager_->GetContextBinary (binary_info);
145+ // allocate py::array (to pass the result of the C++ function to Python)
146+ auto result = py::array_t <char >(binary_info.nbytes );
147+ auto result_buffer = result.request ();
148+ char * result_ptr = (char *)result_buffer.ptr ;
149+ std::memcpy (result_ptr, binary_info.buffer , binary_info.nbytes );
150+ return result;
151+ }
152+
58153 py::array_t <char > Compile (
154+ const std::string& graph_name,
59155 std::vector<std::shared_ptr<OpWrapper>>& op_wrappers) {
60- QnnExecuTorchContextBinary context_binary;
61- flatbuffers::FlatBufferBuilder builder;
156+ QnnExecuTorchContextBinary binary_info;
62157
63- if (qnn_manager_->IsOnlinePrepare ()) {
158+ if (qnn_manager_->IsOnlinePrepare () || qnn_manager_->IsMultipleGraphs ()) {
159+ builder_.Reset ();
64160 std::vector<flatbuffers::Offset<qcir::Tensor>> tensors;
65161 std::unordered_map<void *, int > tensor_map;
66162
@@ -74,7 +170,7 @@ class PyQnnManager {
74170 tensor_map[wrapper.get ()] = i;
75171 index.push_back (i);
76172 tensors.emplace_back (
77- ToTensor (wrapper->CloneTensorStruct (), &builder ));
173+ ToTensor (wrapper->CloneTensorStruct (), &builder_ ));
78174 }
79175 };
80176
@@ -112,38 +208,48 @@ class PyQnnManager {
112208 QNN_VER_PTR (t)->clientBuf .dataSize =
113209 GetDataTypeSize (QNN_VER_PTR (t)->dataType );
114210 params.push_back (tensors.size ());
115- tensors.emplace_back (ToTensor (t, &builder ));
211+ tensors.emplace_back (ToTensor (t, &builder_ ));
116212 }
117213 }
118214
119215 Qnn_OpConfig_t op_config = op_wrapper->GetOpConfig ();
120216 operators.emplace_back (qcir::CreateOperatorDirect (
121- builder ,
217+ builder_ ,
122218 QNN_VER_PTR (op_config)->name ,
123219 QNN_VER_PTR (op_config)->packageName ,
124220 QNN_VER_PTR (op_config)->typeName ,
125221 &inputs,
126222 &outputs,
127223 ¶ms));
128224 }
129- auto graph = qcir::CreateGraphDirect (builder, &operators, &tensors);
130- builder.Finish (graph);
131- context_binary.buffer = builder.GetBufferPointer ();
132- context_binary.nbytes = builder.GetSize ();
133- } else if (
134- qnn_manager_->Compile (op_wrappers, context_binary) !=
135- executorch::runtime::Error::Ok) {
136- return py::array_t <char >(0 );
225+ auto graph = qcir::CreateGraphDirect (
226+ builder_, graph_name.c_str (), &operators, &tensors);
227+ std::vector<flatbuffers::Offset<qcir::Graph>> graphs ({graph});
228+ auto context = qcir::CreateContextDirect (builder_, &graphs);
229+ builder_.Finish (context);
230+ QnnExecuTorchContextBinary qcir_binary (
231+ {builder_.GetBufferPointer (), builder_.GetSize ()});
232+ binary_info = MakeBinaryInfo (qcir_binary);
233+ } else {
234+ if (qnn_manager_->Compile (graph_name, op_wrappers) !=
235+ executorch::runtime::Error::Ok) {
236+ QNN_EXECUTORCH_LOG_ERROR (" Fail to compile QNN graph" );
237+ return py::array_t <char >(0 );
238+ }
239+ if (qnn_manager_->GetContextBinary (binary_info) !=
240+ executorch::runtime::Error::Ok) {
241+ return py::array_t <char >(0 );
242+ }
137243 }
138244
139- // allocate py::array (to pass the result of the C++ function to
140- // Python)
141- auto result = py::array_t <char >(context_binary.nbytes );
245+ // allocate py::array (to pass the result of the C++ function to Python)
246+ auto result = py::array_t <char >(binary_info.nbytes );
142247 auto result_buffer = result.request ();
143248 char * result_ptr = (char *)result_buffer.ptr ;
144- std::memcpy (result_ptr, context_binary .buffer , context_binary .nbytes );
249+ std::memcpy (result_ptr, binary_info .buffer , binary_info .nbytes );
145250 return result;
146251 }
252+
147253 void Destroy () {
148254 return qnn_manager_->Destroy ();
149255 }
@@ -156,38 +262,76 @@ class PyQnnManager {
156262 return qnn_manager_->IsTensorDump ();
157263 }
158264
159- executorch::runtime::Error AllocateTensor () {
160- return qnn_manager_->AllocateTensor ();
265+ executorch::runtime::Error AllocateTensor (const std::string& graph_name ) {
266+ return qnn_manager_->AllocateTensor (graph_name );
161267 }
162268
163- py::list GetGraphInputs () {
269+ py::list GetGraphInputs (const std::string& graph_name ) {
164270 py::list ret;
165271 for (const std::shared_ptr<TensorWrapper>& input :
166- qnn_manager_->GetGraphInputs ()) {
272+ qnn_manager_->GetGraphInputs (graph_name )) {
167273 ret.append (PyQnnTensorWrapper (input));
168274 }
169275 return ret;
170276 }
171277
172- py::list GetGraphOutputs () {
278+ py::list GetGraphOutputs (const std::string& graph_name ) {
173279 py::list ret;
174280 for (const std::shared_ptr<TensorWrapper>& output :
175- qnn_manager_->GetGraphOutputs ()) {
281+ qnn_manager_->GetGraphOutputs (graph_name )) {
176282 ret.append (PyQnnTensorWrapper (output));
177283 }
178284 return ret;
179285 }
180286
287+ py::list GetGraphNames () {
288+ py::list ret;
289+ for (const std::string& graph_name : qnn_manager_->GetGraphNames ()) {
290+ ret.append (graph_name);
291+ }
292+ return ret;
293+ }
294+
181295 uint64_t GetSpillFillBufferSize () {
182296 return qnn_manager_->GetSpillFillBufferSize ();
183297 }
184298
299+ py::array_t <char > MakeBinaryInfo (const py::bytes& ctx_bin) {
300+ py::buffer_info info (py::buffer (ctx_bin).request ());
301+ QnnExecuTorchContextBinary binary (
302+ {info.ptr , static_cast <uint64_t >(info.size * info.itemsize )});
303+ auto binary_info = MakeBinaryInfo (binary);
304+ auto result = py::array_t <char >(binary_info.nbytes );
305+ auto result_buffer = result.request ();
306+ std::memcpy (result_buffer.ptr , binary_info.buffer , binary_info.nbytes );
307+ return result;
308+ }
309+
185310 private:
311+ QnnExecuTorchContextBinary MakeBinaryInfo (
312+ const QnnExecuTorchContextBinary& ctx_bin) {
313+ auto signature = []() {
314+ return std::to_string (
315+ std::chrono::high_resolution_clock::now ().time_since_epoch ().count ());
316+ };
317+ const uint8_t * base = static_cast <uint8_t *>(ctx_bin.buffer );
318+ std::vector<uint8_t > data (base, base + ctx_bin.nbytes );
319+ // add signature to binary for cache reuse in runtime
320+ builder_.Reset ();
321+ auto binary_info = qnn_delegate::CreateBinaryInfoDirect (
322+ builder_, signature ().c_str (), &data);
323+ builder_.Finish (binary_info);
324+
325+ return QnnExecuTorchContextBinary (
326+ {builder_.GetBufferPointer (), builder_.GetSize ()});
327+ }
328+
186329 // Store the bytes object instead of a raw pointer so that this module will
187330 // keep the bytes alive.
188331 const py::bytes qnn_executorch_option_ptr_;
189332 QnnExecuTorchContextBinary qnn_executorch_context_binary_;
190333 std::shared_ptr<QnnManager> qnn_manager_;
334+ flatbuffers::FlatBufferBuilder builder_;
191335};
192336} // namespace qnn
193337} // namespace backends
0 commit comments