Add log to narrow down the issue

winskuo-quic · winskuo-quic · commit e9d99ecd4af3 · 2025-02-12T22:28:19.000+08:00
diff --git a/backends/qualcomm/runtime/QnnExecuTorchBackend.cpp b/backends/qualcomm/runtime/QnnExecuTorchBackend.cpp
@@ -11,6 +11,8 @@
 #include <executorch/backends/qualcomm/runtime/QnnExecuTorchBackend.h>
 #include <executorch/backends/qualcomm/runtime/QnnManager.h>
 #include <executorch/backends/qualcomm/runtime/backends/QnnCustomProtocol.h>
+#include <chrono>
+#include <iostream>
 
 namespace executorch {
 namespace backends {
@@ -27,12 +29,22 @@ using executorch::runtime::FreeableBuffer;
 using executorch::runtime::MemoryAllocator;
 using executorch::runtime::Result;
 
+void QnnExecuTorchBackend::print_profile() const {
+  std::cout << "=================QnnExecuTorchBackend.cpp Profiling Time=================" << std::endl;
+  std::cout << "QnnExecuTorchBackend::init(): " << std::chrono::duration_cast<std::chrono::milliseconds>(qnn_executorch_backend_init_end - qnn_executorch_backend_init_start).count() << " ms" << std::endl;
+  std::cout << "Create QNN Manager: " << std::chrono::duration_cast<std::chrono::milliseconds>(qnn_manager_create_end - qnn_manager_create_start).count() << " ms" << std::endl;
+  std::cout << "qnn_manager->Init(): " << std::chrono::duration_cast<std::chrono::milliseconds>(qnn_manager_init_end - qnn_manager_init_start).count() << " ms" << std::endl;
+  std::cout << "qnn_manager->AllocateTensor(): " << std::chrono::duration_cast<std::chrono::milliseconds>(qnn_manager_allocate_tensor_end - qnn_manager_allocate_tensor_start).count() << " ms" << std::endl;
+}
+
 // ========== Public method implementations =========================
 constexpr const char* QNN_COMPILE_SPEC = "qnn_compile_spec";
 Result<DelegateHandle*> QnnExecuTorchBackend::init(
     BackendInitContext& context,
     FreeableBuffer* processed,
     ArrayRef<CompileSpec> compile_specs) const {
+  qnn_executorch_backend_init_start = std::chrono::high_resolution_clock::now();
+  QNN_EXECUTORCH_LOG_INFO("QnnExecuTorchBackend::init start");
   // covert SizedBuffer to qnn ExecuTorch option
   QnnExecuTorchContextBinary qnn_context_blob;
   const qnn_delegate::QnnExecuTorchOptions* qnn_executorch_options = nullptr;
@@ -64,6 +76,7 @@ Result<DelegateHandle*> QnnExecuTorchBackend::init(
       QNN_EXECUTORCH_LOG_WARN("unknown argument: %s", compile_spec.key);
   }
 
+  qnn_manager_create_start = std::chrono::high_resolution_clock::now();
   // Create QnnManager
   MemoryAllocator* runtime_allocator = context.get_runtime_allocator();
   QnnManager* qnn_manager =
@@ -72,7 +85,7 @@ Result<DelegateHandle*> QnnExecuTorchBackend::init(
   // NOTE: Since we use placement new and since this type is not trivially
   // destructible, we must call the destructor manually in destroy().
   new (qnn_manager) QnnManager(qnn_executorch_options, qnn_context_blob);
-
+  qnn_manager_create_end = std::chrono::high_resolution_clock::now();
   // TODO: this is a temporal solution for multi-graph support, will be
   //       removed once framework starts to accept runtime configuration
   // ---
@@ -84,30 +97,38 @@ Result<DelegateHandle*> QnnExecuTorchBackend::init(
     QNN_EXECUTORCH_LOG_INFO(
         "Use cached delegate handle for current method: %s",
         context.get_method_name());
+    qnn_executorch_backend_init_end = std::chrono::high_resolution_clock::now();
+    print_profile();
     return iter->second;
   }
 
+  qnn_manager_init_start = std::chrono::high_resolution_clock::now();
   ET_CHECK_OR_RETURN_ERROR(
       qnn_manager->Init() == Error::Ok,
       Internal,
       "Fail to initialize Qnn Manager");
+  qnn_manager_init_end = std::chrono::high_resolution_clock::now();
 
   if (qnn_manager->IsOnlinePrepare()) {
     ET_CHECK_OR_RETURN_ERROR(
         qnn_manager->CompileQcir() == Error::Ok,
         Internal,
         "Fail to compile binary in qcir format");
   } else {
+    qnn_manager_allocate_tensor_start = std::chrono::high_resolution_clock::now();
     for (const std::string& graph_name : qnn_manager->GetGraphNames()) {
       ET_CHECK_OR_RETURN_ERROR(
           qnn_manager->AllocateTensor(graph_name) == Error::Ok,
           Internal,
           "Fail to allocate tensor");
     }
+    qnn_manager_allocate_tensor_end = std::chrono::high_resolution_clock::now();
   }
   add_cached_delegate(signature, qnn_manager);
   // This backend does not need its processed data after Init.
   processed->Free();
+  qnn_executorch_backend_init_end = std::chrono::high_resolution_clock::now();
+  print_profile();
   return qnn_manager;
 }
 
diff --git a/backends/qualcomm/runtime/QnnExecuTorchBackend.h b/backends/qualcomm/runtime/QnnExecuTorchBackend.h
@@ -23,6 +23,8 @@ class QnnExecuTorchBackend final
  public:
   ~QnnExecuTorchBackend(){};
 
+  void print_profile() const;
+
   executorch::runtime::Result<executorch::runtime::DelegateHandle*> init(
       executorch::runtime::BackendInitContext& context,
       executorch::runtime::FreeableBuffer* processed,
@@ -49,6 +51,15 @@ class QnnExecuTorchBackend final
       delegate_map_;
   mutable std::unordered_map<executorch::runtime::DelegateHandle*, std::int64_t>
       delegate_map_rev_;
+  mutable std::chrono::high_resolution_clock::time_point qnn_executorch_backend_init_start;
+  mutable std::chrono::high_resolution_clock::time_point qnn_executorch_backend_init_end;
+  mutable std::chrono::high_resolution_clock::time_point qnn_manager_create_start;
+  mutable std::chrono::high_resolution_clock::time_point qnn_manager_create_end;
+  mutable std::chrono::high_resolution_clock::time_point qnn_manager_init_start;
+  mutable std::chrono::high_resolution_clock::time_point qnn_manager_init_end;
+  mutable std::chrono::high_resolution_clock::time_point qnn_manager_allocate_tensor_start;
+  mutable std::chrono::high_resolution_clock::time_point qnn_manager_allocate_tensor_end;
+
 };
 
 } // namespace qnn
diff --git a/backends/qualcomm/runtime/QnnManager.cpp b/backends/qualcomm/runtime/QnnManager.cpp
@@ -19,6 +19,8 @@
 #include <cstring>
 #include <fstream>
 #include <string>
+#include <chrono>
+#include <iostream>
 
 namespace executorch {
 namespace backends {
@@ -98,6 +100,15 @@ QnnManager::QnnManager(
   backend_params_ptr_ = std::make_unique<BackendConfigParameters>();
 }
 
+void QnnManager::print_profile() {
+  std::cout << "=================QnnManager.cpp Profiling Time=================" << std::endl;
+  std::cout << "backend_params_ptr_->qnn_backend_cache_ptr_->Configure(): " << std::chrono::duration_cast<std::chrono::milliseconds>(qnn_backend_ptr_configure_start - qnn_backend_cache_ptr_configure_start).count() << " ms" << std::endl;
+  std::cout << "backend_params_ptr_->qnn_backend_ptr_->Configure(): " << std::chrono::duration_cast<std::chrono::milliseconds>(qnn_device_ptr_configure_start - qnn_backend_ptr_configure_start).count() << " ms" << std::endl;
+  std::cout << "backend_params_ptr_->qnn_device_ptr_->Configure(): " << std::chrono::duration_cast<std::chrono::milliseconds>(qnn_context_ptr_configure_start - qnn_device_ptr_configure_start).count() << " ms" << std::endl;
+  std::cout << "backend_params_ptr_->qnn_context_ptr_->Configure(): " << std::chrono::duration_cast<std::chrono::milliseconds>(qnn_graph_ptr_configure_start - qnn_context_ptr_configure_start).count() << " ms" << std::endl;
+  std::cout << "backend_params_ptr_->qnn_graph_ptr_->Configure(): " << std::chrono::duration_cast<std::chrono::milliseconds>(qnn_graph_ptr_configure_end - qnn_graph_ptr_configure_start).count() << " ms" << std::endl;
+}
+
 Error QnnManager::LoadQnnLibrary() {
   Error ret = qnn_loaded_backend_.Load(nullptr);
   return ret;
@@ -291,22 +302,27 @@ Error QnnManager::Init() {
         backend_params_ptr_ != nullptr,
         Internal,
         "Failed to load Qnn backend.");
+    qnn_backend_cache_ptr_configure_start = std::chrono::high_resolution_clock::now();
     ET_CHECK_OR_RETURN_ERROR(
         backend_params_ptr_->qnn_backend_cache_ptr_->Configure() == Error::Ok,
         Internal,
         "Fail to configure Qnn backend cache");
+    qnn_backend_ptr_configure_start = std::chrono::high_resolution_clock::now();
     ET_CHECK_OR_RETURN_ERROR(
         backend_params_ptr_->qnn_backend_ptr_->Configure() == Error::Ok,
         Internal,
         "Fail to configure Qnn backend");
+    qnn_device_ptr_configure_start = std::chrono::high_resolution_clock::now();
     ET_CHECK_OR_RETURN_ERROR(
         backend_params_ptr_->qnn_device_ptr_->Configure() == Error::Ok,
         Internal,
         "Fail to configure Qnn device");
+    qnn_context_ptr_configure_start = std::chrono::high_resolution_clock::now();
     ET_CHECK_OR_RETURN_ERROR(
         backend_params_ptr_->qnn_context_ptr_->Configure() == Error::Ok,
         Internal,
         "Fail to configure Qnn context");
+    qnn_graph_ptr_configure_start = std::chrono::high_resolution_clock::now();
     for (const std::string& graph_name :
          backend_params_ptr_->qnn_context_ptr_->GetGraphNames()) {
       ET_CHECK_OR_RETURN_ERROR(
@@ -315,6 +331,7 @@ Error QnnManager::Init() {
           Internal,
           "Fail to configure Qnn graph");
     }
+    qnn_graph_ptr_configure_end = std::chrono::high_resolution_clock::now();
 
     backend_params_ptr_->backend_init_state_ =
         BackendInitializeState::INITIALIZED;
@@ -444,6 +461,7 @@ Error QnnManager::ProfileExecuteData(
 }
 
 void QnnManager::Destroy() {
+  print_profile();
   QNN_EXECUTORCH_LOG_INFO("Destroy Qnn backend parameters");
   backend_params_ptr_.reset(new BackendConfigParameters());
   logger_.reset();
diff --git a/backends/qualcomm/runtime/QnnManager.h b/backends/qualcomm/runtime/QnnManager.h
@@ -23,6 +23,7 @@ namespace backends {
 namespace qnn {
 class QnnManager {
  public:
+  void print_profile();
   // Construct QnnManager
   explicit QnnManager(
       const QnnExecuTorchOptions* options,
@@ -147,6 +148,12 @@ class QnnManager {
           {Qnn_DataType_t::QNN_DATATYPE_UFIXED_POINT_16,
            executorch::aten::ScalarType::UInt16},
   };
+  std::chrono::high_resolution_clock::time_point qnn_backend_cache_ptr_configure_start;
+  std::chrono::high_resolution_clock::time_point qnn_backend_ptr_configure_start;
+  std::chrono::high_resolution_clock::time_point qnn_device_ptr_configure_start;
+  std::chrono::high_resolution_clock::time_point qnn_context_ptr_configure_start;
+  std::chrono::high_resolution_clock::time_point qnn_graph_ptr_configure_start;
+  std::chrono::high_resolution_clock::time_point qnn_graph_ptr_configure_end;
 };
 } // namespace qnn
 } // namespace backends