@@ -47,20 +47,100 @@ class PyQnnManager {
4747 qnn_manager_ = std::make_shared<QnnManager>(
4848 qnn_executorch_options, qnn_executorch_context_binary_);
4949 }
50+ // used for loading multiple graphs in qcir
51+ explicit PyQnnManager (const py::bytes& buffer, const py::list& qcirs)
52+ : qnn_executorch_option_ptr_(buffer) {
53+ auto qnn_executorch_options = GetQnnExecuTorchOptions (
54+ qnn_executorch_option_ptr_.cast <std::string_view>().data ());
55+
56+ // merge multiple qcirs into one context with multiple graphs
57+ std::vector<flatbuffers::Offset<qcir::Graph>> graphs;
58+ for (size_t i = 0 ; i < qcirs.size (); ++i) {
59+ py::buffer_info info (py::buffer (qcirs[i].cast <py::bytes>()).request ());
60+ flatbuffers::Verifier verifier (
61+ static_cast <const uint8_t * const >(info.ptr ),
62+ info.size * info.itemsize );
63+
64+ if (!qcir::VerifyContextBuffer (verifier)) {
65+ QNN_EXECUTORCH_LOG_ERROR (" Fail to verify qcir format" );
66+ return ;
67+ }
68+ auto context = qcir::GetContext (info.ptr );
69+ for (const auto & graph : *context->graphs ()) {
70+ std::vector<flatbuffers::Offset<qcir::Tensor>> tensors;
71+ for (const auto tensor : *graph->tensors ()) {
72+ // flatbuffers::Offset<Tensor> ToTensor(
73+ // QnnTensor
74+ // ToTensor(flatbuffers::Vector<::flatbuffers::Offset<qcir::Tensor>>
75+ // tensor), flatbuffers::FlatBufferBuilder* builder);
76+ tensors.emplace_back (ToTensor (ToTensor (tensor), &builder_));
77+ }
78+ std::vector<flatbuffers::Offset<qcir::Operator>> nodes;
79+ for (const auto & node : *graph->nodes ()) {
80+ int32_t * inputs_ptr = const_cast <int32_t *>(node->inputs ()->data ());
81+ int32_t * outputs_ptr = const_cast <int32_t *>(node->outputs ()->data ());
82+ int32_t * params_ptr = const_cast <int32_t *>(node->params ()->data ());
83+ std::vector<int32_t > inputs (
84+ inputs_ptr, inputs_ptr + node->inputs ()->size ());
85+ std::vector<int32_t > outputs (
86+ outputs_ptr, outputs_ptr + node->outputs ()->size ());
87+ std::vector<int32_t > params (
88+ params_ptr, params_ptr + node->params ()->size ());
89+ nodes.emplace_back (qcir::CreateOperatorDirect (
90+ builder_,
91+ node->name ()->str ().c_str (),
92+ node->package_name ()->str ().c_str (),
93+ node->type_name ()->str ().c_str (),
94+ &inputs,
95+ &outputs,
96+ ¶ms));
97+ }
98+ graphs.emplace_back (qcir::CreateGraphDirect (
99+ builder_, graph->name ()->str ().c_str (), &nodes, &tensors));
100+ }
101+ }
102+ auto context = qcir::CreateContextDirect (builder_, &graphs);
103+ builder_.Finish (context);
104+ qnn_executorch_context_binary_.buffer = builder_.GetBufferPointer ();
105+ qnn_executorch_context_binary_.nbytes = builder_.GetSize ();
106+ qnn_manager_ = std::make_shared<QnnManager>(
107+ qnn_executorch_options, qnn_executorch_context_binary_);
108+ }
50109
51110 executorch::runtime::Error Init () {
52111 return qnn_manager_->Init ();
53112 }
113+
54114 bool IsNodeSupportedByBackend (
55115 std::vector<std::shared_ptr<OpWrapper>>& op_wrappers) {
56116 return qnn_manager_->IsNodeSupportedByBackend (op_wrappers);
57117 }
118+
119+ // this method is specific for compiling multi-graphs
120+ py::array_t <char > Compile () {
121+ if (qnn_manager_->CompileQcir () != Error::Ok) {
122+ QNN_EXECUTORCH_LOG_ERROR (" Fail to compile qcir" );
123+ return py::array_t <char >(0 );
124+ }
125+
126+ // generate context binary if compilation succeded
127+ QnnExecuTorchContextBinary context_binary;
128+ qnn_manager_->GetContextBinary (context_binary);
129+ // allocate py::array (to pass the result of the C++ function to Python)
130+ auto result = py::array_t <char >(context_binary.nbytes );
131+ auto result_buffer = result.request ();
132+ char * result_ptr = (char *)result_buffer.ptr ;
133+ std::memcpy (result_ptr, context_binary.buffer , context_binary.nbytes );
134+ return result;
135+ }
136+
58137 py::array_t <char > Compile (
138+ const std::string& graph_name,
59139 std::vector<std::shared_ptr<OpWrapper>>& op_wrappers) {
60140 QnnExecuTorchContextBinary context_binary;
61141 flatbuffers::FlatBufferBuilder builder;
62142
63- if (qnn_manager_->IsOnlinePrepare ()) {
143+ if (qnn_manager_->IsOnlinePrepare () || qnn_manager_-> IsMultipleGraphs () ) {
64144 std::vector<flatbuffers::Offset<qcir::Tensor>> tensors;
65145 std::unordered_map<void *, int > tensor_map;
66146
@@ -126,14 +206,19 @@ class PyQnnManager {
126206 &outputs,
127207 ¶ms));
128208 }
129- auto graph = qcir::CreateGraphDirect (builder, &operators, &tensors);
130- builder.Finish (graph);
209+ auto graph = qcir::CreateGraphDirect (
210+ builder, graph_name.c_str (), &operators, &tensors);
211+ std::vector<flatbuffers::Offset<qcir::Graph>> graphs ({graph});
212+ auto context = qcir::CreateContextDirect (builder, &graphs);
213+ builder.Finish (context);
131214 context_binary.buffer = builder.GetBufferPointer ();
132215 context_binary.nbytes = builder.GetSize ();
133- } else if (
134- qnn_manager_->Compile (op_wrappers, context_binary) !=
135- executorch::runtime::Error::Ok) {
136- return py::array_t <char >(0 );
216+ } else {
217+ if (qnn_manager_->Compile (graph_name, op_wrappers) !=
218+ executorch::runtime::Error::Ok) {
219+ return py::array_t <char >(0 );
220+ }
221+ qnn_manager_->GetContextBinary (context_binary);
137222 }
138223
139224 // allocate py::array (to pass the result of the C++ function to
@@ -144,6 +229,7 @@ class PyQnnManager {
144229 std::memcpy (result_ptr, context_binary.buffer , context_binary.nbytes );
145230 return result;
146231 }
232+
147233 void Destroy () {
148234 return qnn_manager_->Destroy ();
149235 }
@@ -156,28 +242,36 @@ class PyQnnManager {
156242 return qnn_manager_->IsTensorDump ();
157243 }
158244
159- executorch::runtime::Error AllocateTensor () {
160- return qnn_manager_->AllocateTensor ();
245+ executorch::runtime::Error AllocateTensor (const std::string& graph_name ) {
246+ return qnn_manager_->AllocateTensor (graph_name );
161247 }
162248
163- py::list GetGraphInputs () {
249+ py::list GetGraphInputs (const std::string& graph_name ) {
164250 py::list ret;
165251 for (const std::shared_ptr<TensorWrapper>& input :
166- qnn_manager_->GetGraphInputs ()) {
252+ qnn_manager_->GetGraphInputs (graph_name )) {
167253 ret.append (PyQnnTensorWrapper (input));
168254 }
169255 return ret;
170256 }
171257
172- py::list GetGraphOutputs () {
258+ py::list GetGraphOutputs (const std::string& graph_name ) {
173259 py::list ret;
174260 for (const std::shared_ptr<TensorWrapper>& output :
175- qnn_manager_->GetGraphOutputs ()) {
261+ qnn_manager_->GetGraphOutputs (graph_name )) {
176262 ret.append (PyQnnTensorWrapper (output));
177263 }
178264 return ret;
179265 }
180266
267+ py::list GetGraphNames () {
268+ py::list ret;
269+ for (const std::string& graph_name : qnn_manager_->GetGraphNames ()) {
270+ ret.append (graph_name);
271+ }
272+ return ret;
273+ }
274+
181275 uint64_t GetSpillFillBufferSize () {
182276 return qnn_manager_->GetSpillFillBufferSize ();
183277 }
@@ -188,6 +282,7 @@ class PyQnnManager {
188282 const py::bytes qnn_executorch_option_ptr_;
189283 QnnExecuTorchContextBinary qnn_executorch_context_binary_;
190284 std::shared_ptr<QnnManager> qnn_manager_;
285+ flatbuffers::FlatBufferBuilder builder_;
191286};
192287} // namespace qnn
193288} // namespace backends
0 commit comments