@@ -91,9 +91,6 @@ template<typename HalVersion>
9191RequestThread<ArmnnPreparedModel, HalVersion, CallbackContext_1_0>
9292 ArmnnPreparedModel<HalVersion>::m_RequestThread;
9393
94- template <typename HalVersion>
95- std::unique_ptr<armnn::Threadpool> ArmnnPreparedModel<HalVersion>::m_Threadpool(nullptr );
96-
9794template <typename HalVersion>
9895template <typename TensorBindingCollection>
9996void ArmnnPreparedModel<HalVersion>::DumpTensorsIfRequired(char const * tensorNamePrefix,
@@ -118,8 +115,6 @@ ArmnnPreparedModel<HalVersion>::ArmnnPreparedModel(armnn::NetworkId networkId,
118115 const HalModel& model,
119116 const std::string& requestInputsAndOutputsDumpDir,
120117 const bool gpuProfilingEnabled,
121- const bool asyncModelExecutionEnabled,
122- const unsigned int numberOfThreads,
123118 const bool importEnabled,
124119 const bool exportEnabled)
125120 : m_NetworkId(networkId)
@@ -128,32 +123,11 @@ ArmnnPreparedModel<HalVersion>::ArmnnPreparedModel(armnn::NetworkId networkId,
128123 , m_RequestCount(0 )
129124 , m_RequestInputsAndOutputsDumpDir(requestInputsAndOutputsDumpDir)
130125 , m_GpuProfilingEnabled(gpuProfilingEnabled)
131- , m_AsyncModelExecutionEnabled(asyncModelExecutionEnabled)
132126 , m_EnableImport(importEnabled)
133127 , m_EnableExport(exportEnabled)
134128{
135129 // Enable profiling if required.
136130 m_Runtime->GetProfiler (m_NetworkId)->EnableProfiling (m_GpuProfilingEnabled);
137-
138- if (m_AsyncModelExecutionEnabled)
139- {
140- std::vector<std::shared_ptr<armnn::IWorkingMemHandle>> memHandles;
141- for (unsigned int i=0 ; i < numberOfThreads; ++i)
142- {
143- memHandles.emplace_back (m_Runtime->CreateWorkingMemHandle (networkId));
144- }
145-
146- if (!m_Threadpool)
147- {
148- m_Threadpool = std::make_unique<armnn::Threadpool>(numberOfThreads, runtime, memHandles);
149- }
150- else
151- {
152- m_Threadpool->LoadMemHandles (memHandles);
153- }
154-
155- m_WorkingMemHandle = memHandles.back ();
156- }
157131}
158132
159133template <typename HalVersion>
@@ -170,12 +144,6 @@ ArmnnPreparedModel<HalVersion>::~ArmnnPreparedModel()
170144
171145 // Unload the network associated with this model.
172146 m_Runtime->UnloadNetwork (m_NetworkId);
173-
174- // Unload the network memhandles from the threadpool
175- if (m_AsyncModelExecutionEnabled)
176- {
177- m_Threadpool->UnloadMemHandles (m_NetworkId);
178- }
179147}
180148
181149template <typename HalVersion>
@@ -295,14 +263,6 @@ Return<V1_0::ErrorStatus> ArmnnPreparedModel<HalVersion>::execute(
295263 CallbackContext_1_0 armnnCb;
296264 armnnCb.callback = cb;
297265
298- if (m_AsyncModelExecutionEnabled)
299- {
300- ALOGV (" ArmnnPreparedModel::execute(...) before ScheduleGraphForExecution" );
301- ScheduleGraphForExecution (pMemPools, pInputTensors, pOutputTensors, armnnCb);
302- ALOGV (" ArmnnPreparedModel::execute(...) after ScheduleGraphForExecution" );
303- return V1_0::ErrorStatus::NONE;
304- }
305-
306266 // post the request for asynchronous execution
307267 ALOGV (" ArmnnPreparedModel::execute(...) before PostMsg" );
308268 m_RequestThread.PostMsg (this , pMemPools, pInputTensors, pOutputTensors, armnnCb);
@@ -327,30 +287,21 @@ void ArmnnPreparedModel<HalVersion>::ExecuteGraph(
327287 try
328288 {
329289 armnn::Status status;
330- if (m_AsyncModelExecutionEnabled)
290+
291+ // Create a vector of Input and Output Ids which can be imported. An empty vector means all will be copied.
292+ std::vector<armnn::ImportedInputId> importedInputIds;
293+ if (m_EnableImport)
331294 {
332- ARMNN_NO_DEPRECATE_WARN_BEGIN
333- ALOGW (" ArmnnPreparedModel::ExecuteGraph m_AsyncModelExecutionEnabled true" );
334- status = m_Runtime->Execute (*m_WorkingMemHandle, inputTensors, outputTensors);
335- ARMNN_NO_DEPRECATE_WARN_END
295+ importedInputIds = m_Runtime->ImportInputs (m_NetworkId, inputTensors, armnn::MemorySource::Malloc);
336296 }
337- else
297+ std::vector<armnn::ImportedOutputId> importedOutputIds;
298+ if (m_EnableExport)
338299 {
339- ALOGW (" ArmnnPreparedModel::ExecuteGraph m_AsyncModelExecutionEnabled false" );
340- // Create a vector of Input and Output Ids which can be imported. An empty vector means all will be copied.
341- std::vector<armnn::ImportedInputId> importedInputIds;
342- if (m_EnableImport)
343- {
344- importedInputIds = m_Runtime->ImportInputs (m_NetworkId, inputTensors, armnn::MemorySource::Malloc);
345- }
346- std::vector<armnn::ImportedOutputId> importedOutputIds;
347- if (m_EnableExport)
348- {
349- importedOutputIds = m_Runtime->ImportOutputs (m_NetworkId, outputTensors, armnn::MemorySource::Malloc);
350- }
351- status = m_Runtime->EnqueueWorkload (m_NetworkId, inputTensors, outputTensors,
352- importedInputIds, importedOutputIds);
300+ importedOutputIds = m_Runtime->ImportOutputs (m_NetworkId, outputTensors, armnn::MemorySource::Malloc);
353301 }
302+ status = m_Runtime->EnqueueWorkload (m_NetworkId, inputTensors, outputTensors,
303+ importedInputIds, importedOutputIds);
304+
354305 if (status != armnn::Status::Success)
355306 {
356307 ALOGW (" EnqueueWorkload failed" );
@@ -427,30 +378,21 @@ bool ArmnnPreparedModel<HalVersion>::ExecuteWithDummyInputs()
427378 try
428379 {
429380 armnn::Status status;
430- if (m_AsyncModelExecutionEnabled)
381+
382+ // Create a vector of Input and Output Ids which can be imported. An empty vector means all will be copied.
383+ std::vector<armnn::ImportedInputId> importedInputIds;
384+ if (m_EnableImport)
431385 {
432- ARMNN_NO_DEPRECATE_WARN_BEGIN
433- ALOGW (" ArmnnPreparedModel::ExecuteGraph m_AsyncModelExecutionEnabled true" );
434- status = m_Runtime->Execute (*m_WorkingMemHandle, inputTensors, outputTensors);
435- ARMNN_NO_DEPRECATE_WARN_END
386+ importedInputIds = m_Runtime->ImportInputs (m_NetworkId, inputTensors, armnn::MemorySource::Malloc);
436387 }
437- else
388+ std::vector<armnn::ImportedOutputId> importedOutputIds;
389+ if (m_EnableExport)
438390 {
439- ALOGW (" ArmnnPreparedModel::ExecuteGraph m_AsyncModelExecutionEnabled false" );
440- // Create a vector of Input and Output Ids which can be imported. An empty vector means all will be copied.
441- std::vector<armnn::ImportedInputId> importedInputIds;
442- if (m_EnableImport)
443- {
444- importedInputIds = m_Runtime->ImportInputs (m_NetworkId, inputTensors, armnn::MemorySource::Malloc);
445- }
446- std::vector<armnn::ImportedOutputId> importedOutputIds;
447- if (m_EnableExport)
448- {
449- importedOutputIds = m_Runtime->ImportOutputs (m_NetworkId, outputTensors, armnn::MemorySource::Malloc);
450- }
451- status = m_Runtime->EnqueueWorkload (m_NetworkId, inputTensors, outputTensors,
452- importedInputIds, importedOutputIds);
391+ importedOutputIds = m_Runtime->ImportOutputs (m_NetworkId, outputTensors, armnn::MemorySource::Malloc);
453392 }
393+ status = m_Runtime->EnqueueWorkload (m_NetworkId, inputTensors, outputTensors,
394+ importedInputIds, importedOutputIds);
395+
454396 if (status != armnn::Status::Success)
455397 {
456398 ALOGW (" ExecuteWithDummyInputs: EnqueueWorkload failed" );
@@ -470,73 +412,11 @@ ARMNN_NO_DEPRECATE_WARN_END
470412 return true ;
471413}
472414
473- // / Schedule the graph prepared from the request for execution
474- template <typename HalVersion>
475- template <typename CallbackContext>
476- void ArmnnPreparedModel<HalVersion>::ScheduleGraphForExecution(
477- std::shared_ptr<std::vector<::android::nn::RunTimePoolInfo>>& pMemPools,
478- std::shared_ptr<armnn::InputTensors>& inputTensors,
479- std::shared_ptr<armnn::OutputTensors>& outputTensors,
480- CallbackContext callbackContext)
481- {
482- ALOGV (" ArmnnPreparedModel::ScheduleGraphForExecution(...)" );
483-
484- DumpTensorsIfRequired (" Input" , *inputTensors);
485-
486-
487- auto tpCb = std::make_shared<
488- ArmnnThreadPoolCallback<CallbackContext_1_0>>(this ,
489- pMemPools,
490- inputTensors,
491- outputTensors,
492- callbackContext);
493-
494- m_Threadpool->Schedule (m_NetworkId,
495- *tpCb->m_InputTensors ,
496- *tpCb->m_OutputTensors ,
497- armnn::QosExecPriority::Medium,
498- tpCb);
499- ALOGV (" ArmnnPreparedModel::ScheduleGraphForExecution end" );
500- }
501-
502- template <typename HalVersion>
503- template <typename CallbackContext>
504- void ArmnnPreparedModel<HalVersion>::ArmnnThreadPoolCallback<CallbackContext>::Notify(
505- armnn::Status status, armnn::InferenceTimingPair timeTaken)
506- {
507- armnn::IgnoreUnused (status, timeTaken);
508- ALOGV (" ArmnnPreparedModel::ArmnnThreadPoolCallback_1_2 Notify" );
509-
510- m_Model->DumpTensorsIfRequired (" Output" , *m_OutputTensors);
511-
512- // Commit output buffers.
513- // Note that we update *all* pools, even if they aren't actually used as outputs -
514- // this is simpler and is what the CpuExecutor does.
515- for (android::nn::RunTimePoolInfo& pool : *m_MemPools)
516- {
517- // Type android::nn::RunTimePoolInfo has changed between Android P & Q and Android R, where
518- // update() has been removed and flush() added.
519- #if defined(ARMNN_ANDROID_R) || defined(ARMNN_ANDROID_S) // Use the new Android implementation.
520- pool.flush ();
521- #else
522- pool.update ();
523- #endif
524- }
525-
526- m_CallbackContext.callback (V1_0::ErrorStatus::NONE, " ArmnnPreparedModel::ArmnnThreadPoolCallback_1_2 Notify" );
527- return ;
528- }
529-
530415// /
531416// / Class template specializations
532417// /
533418
534419template class ArmnnPreparedModel <hal_1_0::HalPolicy>;
535- template void ArmnnPreparedModel<hal_1_0::HalPolicy>::ScheduleGraphForExecution<CallbackContext_1_0>(
536- std::shared_ptr<std::vector<::android::nn::RunTimePoolInfo>>& pMemPools,
537- std::shared_ptr<armnn::InputTensors>& inputTensors,
538- std::shared_ptr<armnn::OutputTensors>& outputTensors,
539- CallbackContext_1_0 callbackContext);
540420
541421#ifdef ARMNN_ANDROID_NN_V1_1
542422template class ArmnnPreparedModel <hal_1_1::HalPolicy>;
0 commit comments