diff --git a/backends/qualcomm/CMakeLists.txt b/backends/qualcomm/CMakeLists.txt index cc7957dfdbe..b03cbd1a99e 100644 --- a/backends/qualcomm/CMakeLists.txt +++ b/backends/qualcomm/CMakeLists.txt @@ -150,6 +150,7 @@ add_library(qnn_executorch_backend SHARED) add_library(qnn_executorch_header INTERFACE) add_library(qnn_executorch_logging STATIC) add_library(qnn_factory STATIC) +add_library(qnn_backend_unified_registry STATIC) add_library(qnn_function_interface INTERFACE) add_library(qnn_graph STATIC) add_library(qnn_implementation STATIC) @@ -213,13 +214,30 @@ target_link_libraries( ) target_link_libraries( - qnn_dlc_manager PRIVATE qnn_factory qnn_backend qnn_device qnn_context - qnn_graph qnn_mem_manager + qnn_backend_unified_registry PRIVATE qnn_schema qnn_backend qnn_device + qnn_implementation ) target_link_libraries( - qnn_manager PRIVATE qnn_factory wrappers qnn_schema utils shared_buffer - qnn_dlc_manager + qnn_dlc_manager + PRIVATE qnn_factory + qnn_backend_unified_registry + qnn_backend + qnn_device + qnn_context + qnn_graph + qnn_mem_manager +) + +target_link_libraries( + qnn_manager + PRIVATE qnn_factory + qnn_backend_unified_registry + wrappers + qnn_schema + utils + shared_buffer + qnn_dlc_manager ) target_link_libraries( qnn_executorch_backend diff --git a/backends/qualcomm/aot/python/PyQnnManagerAdaptor.cpp b/backends/qualcomm/aot/python/PyQnnManagerAdaptor.cpp index 2511cd96636..fc134d4f51b 100644 --- a/backends/qualcomm/aot/python/PyQnnManagerAdaptor.cpp +++ b/backends/qualcomm/aot/python/PyQnnManagerAdaptor.cpp @@ -28,15 +28,39 @@ std::string GetQnnSdkBuildId(std::string library_path) { if (err != QNN_SUCCESS || id == nullptr) { throw std::runtime_error("Failed to get QNN backend build ID"); } - qnn_loaded_backend.TerminateAllBackends(); + qnn_loaded_backend.Unload(); return std::string(id); } +py::array_t StripProtocol(const py::bytes& preprocessed_binary) { + py::buffer_info info(py::buffer(preprocessed_binary).request()); + + void* buf_ptr = nullptr; + size_t buf_size = 0; + // check if it's a qnn context binary + auto [status, signature, ctx_size, ctx_bin] = + QnnContextCustomProtocol().DeserializeContextCustomBuffer(info.ptr); + + if (status == Error::Ok) { + buf_size = ctx_size; + buf_ptr = ctx_bin; + } else { + // the format should be DLC, return nothing here + return py::array_t(0); + } + + auto result = py::array_t(buf_size); + auto result_buffer = result.request(); + std::memcpy(result_buffer.ptr, buf_ptr, buf_size); + return result; +} + PYBIND11_MODULE(PyQnnManagerAdaptor, m) { // TODO: Add related documents for configurations listed below using namespace qnn_delegate; m.def("GetQnnSdkBuildId", &GetQnnSdkBuildId); + m.def("StripProtocol", &StripProtocol); py::class_(m, "QnnExecuTorchContextBinary") .def(py::init<>()); @@ -49,6 +73,8 @@ PYBIND11_MODULE(PyQnnManagerAdaptor, m) { .def(py::init()) .def(py::init()) .def("Init", &PyQnnManager::Init) + .def("InitBackend", &PyQnnManager::InitBackend) + .def("InitContext", &PyQnnManager::InitContext) .def("IsNodeSupportedByBackend", &PyQnnManager::IsNodeSupportedByBackend) .def( "Compile", @@ -57,6 +83,7 @@ PYBIND11_MODULE(PyQnnManagerAdaptor, m) { std::vector>>&>( &PyQnnManager::Compile)) .def("Destroy", &PyQnnManager::Destroy) + .def("DestroyContext", &PyQnnManager::DestroyContext) .def("IsAvailable", &PyQnnManager::IsAvailable) .def("IsTensorDump", &PyQnnManager::IsTensorDump) .def("AllocateTensor", &PyQnnManager::AllocateTensor) @@ -66,8 +93,7 @@ PYBIND11_MODULE(PyQnnManagerAdaptor, m) { .def("GetSpillFillBufferSize", &PyQnnManager::GetSpillFillBufferSize) .def( "MakeBinaryInfo", - py::overload_cast(&PyQnnManager::MakeBinaryInfo)) - .def("StripProtocol", &PyQnnManager::StripProtocol); + py::overload_cast(&PyQnnManager::MakeBinaryInfo)); } } // namespace qnn } // namespace backends diff --git a/backends/qualcomm/aot/python/PyQnnManagerAdaptor.h b/backends/qualcomm/aot/python/PyQnnManagerAdaptor.h index c8044e5db0e..9c2bd18b1f7 100644 --- a/backends/qualcomm/aot/python/PyQnnManagerAdaptor.h +++ b/backends/qualcomm/aot/python/PyQnnManagerAdaptor.h @@ -50,7 +50,24 @@ class PyQnnManager { } executorch::runtime::Error Init() { - return qnn_manager_->Init(); + ET_CHECK_OR_RETURN_ERROR( + qnn_manager_->InitBackend() == Error::Ok, + Internal, + "Fail to initailize backend"); + ET_CHECK_OR_RETURN_ERROR( + qnn_manager_->InitContext() == Error::Ok, + Internal, + "Fail to initailize context"); + return Error::Ok; + } + + executorch::runtime::Error InitBackend() { + return qnn_manager_->InitBackend(); + } + + executorch::runtime::Error InitContext( + const std::vector& graph_names) { + return qnn_manager_->InitContext(std::optional{graph_names}); } bool IsNodeSupportedByBackend( @@ -90,6 +107,10 @@ class PyQnnManager { return qnn_manager_->Destroy(); } + void DestroyContext() { + return qnn_manager_->DestroyContext(); + } + bool IsAvailable() { return qnn_manager_->IsAvailable(); } @@ -148,29 +169,6 @@ class PyQnnManager { return result; } - py::array_t StripProtocol(const py::bytes& preprocessed_binary) { - py::buffer_info info(py::buffer(preprocessed_binary).request()); - - void* buf_ptr = nullptr; - size_t buf_size = 0; - // check if it's a qnn context binary - auto [status, signature, ctx_size, ctx_bin] = - QnnContextCustomProtocol().DeserializeContextCustomBuffer(info.ptr); - - if (status == Error::Ok) { - buf_size = ctx_size; - buf_ptr = ctx_bin; - } else { - // the format should be DLC, return nothing here - return py::array_t(0); - } - - auto result = py::array_t(buf_size); - auto result_buffer = result.request(); - std::memcpy(result_buffer.ptr, buf_ptr, buf_size); - return result; - } - private: // Store the bytes object instead of a raw pointer so that this module will // keep the bytes alive. @@ -178,7 +176,6 @@ class PyQnnManager { QnnExecuTorchContextBinary qnn_executorch_context_binary_; std::shared_ptr qnn_manager_; QnnContextCustomProtocol custom_context_custom_buffer_; - flatbuffers::FlatBufferBuilder builder_; }; } // namespace qnn } // namespace backends diff --git a/backends/qualcomm/builders/README.md b/backends/qualcomm/builders/README.md index 2f1c2d54828..437eb85859c 100644 --- a/backends/qualcomm/builders/README.md +++ b/backends/qualcomm/builders/README.md @@ -18,8 +18,8 @@ Thank you for contributing to Qualcomm AI Engine Direct delegate for ExecuTorch. ## References ### Qualcomm AI Engine Direct -- [Operator Definitions](https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-50/MasterOpDef.html) -- [Supported Operators in Backends](https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-50/operations.html#backend-supplements) +- [Operator Definitions](https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-10/MasterOpDef.html) +- [Supported Operators in Backends](https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-10/operations.html#backend-supplements) ### PyTorch - [torch.nn Operator Definitions](https://pytorch.org/docs/stable/nn.html) @@ -124,9 +124,9 @@ It will provide more hint to the source PyTorch layer where the missing operator }; } Qnn_Param_t; ``` - The name value equals to the parameter name described in [Operator Definitions](https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-50/MasterOpDef.html), there are `epsilon`, `axes` for `LayerNorm` case.
+ The name value equals to the parameter name described in [Operator Definitions](https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-10/MasterOpDef.html), there are `epsilon`, `axes` for `LayerNorm` case.
- If you find it hard to correlate missing operator with documentation, this [table](https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-50/SupportedOps.html) might be helpful for searching. In some cases, an exact match may not exist. Consider seeking for a math equivalent approach or notify maintainer for further analysis. + If you find it hard to correlate missing operator with documentation, this [table](https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-10/SupportedOps.html) might be helpful for searching. In some cases, an exact match may not exist. Consider seeking for a math equivalent approach or notify maintainer for further analysis. - **PyTorch**:
We could also read the IO spec from [function declaration](https://github.com/pytorch/pytorch/blob/main/aten/src/ATen/native/layer_norm.cpp) mentioned in [PyTorch Documentation](#pytorch): diff --git a/backends/qualcomm/debugger/utils.py b/backends/qualcomm/debugger/utils.py index d6b91c83996..29daa1f8784 100644 --- a/backends/qualcomm/debugger/utils.py +++ b/backends/qualcomm/debugger/utils.py @@ -348,8 +348,8 @@ def generate_optrace( qnn_binary_file="forward_0.dlc", ): """ - Generate Qnn HTP Optrace Profiling https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-50/htp_backend.html#qnn-htp-optrace-profiling - and QNN HTP Analysis Summary (QHAS) https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-50/htp_backend.html#qnn-htp-analysis-summary-qhas + Generate Qnn HTP Optrace Profiling https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-10/htp_backend.html#qnn-htp-optrace-profiling + and QNN HTP Analysis Summary (QHAS) https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-10/htp_backend.html#qnn-htp-analysis-summary-qhas . You can utilize the QAIRT Visualizer (https://pypi.org/project/qairt-visualizer/) to visualize the results from the files above. """ graph_name, file_extension = os.path.splitext(qnn_binary_file) diff --git a/backends/qualcomm/partition/qnn_partitioner.py b/backends/qualcomm/partition/qnn_partitioner.py index 19e998f59a3..c57bad3cee3 100644 --- a/backends/qualcomm/partition/qnn_partitioner.py +++ b/backends/qualcomm/partition/qnn_partitioner.py @@ -8,7 +8,6 @@ from collections import defaultdict from typing import Any, Callable, Dict, List, Optional, Tuple -import executorch.backends.qualcomm.python.PyQnnManagerAdaptor as PyQnnManager import torch from executorch.backends.qualcomm.builders import node_visitor_manager from executorch.backends.qualcomm.builders.qnn_constants import OpContextLoader @@ -21,6 +20,9 @@ QCOM_BYPASS_NODE, ) +from executorch.backends.qualcomm.utils.qnn_manager_lifecycle import ( + get_current_qnn_manager, +) from executorch.exir.backend.backend_details import CompileSpec from executorch.exir.backend.canonical_partitioners.pattern_op_partitioner import ( generate_partitions_from_list_of_nodes, @@ -55,7 +57,8 @@ def __init__( skip_node_id_set: set = None, skip_node_op_set: set = None, ): - python_options = flatbuffer_to_option(compiler_specs[0].value) + option = generate_qnn_executorch_option(compiler_specs) + python_options = flatbuffer_to_option(option) self.node_visitors = node_visitor_manager.get_node_visitors( edge_program, op_package_infos=python_options.op_package_options.op_package_infos, @@ -64,12 +67,10 @@ def __init__( self.skip_node_op_set = skip_node_op_set self.skip_node_id_set = skip_node_id_set self.nodes_to_wrappers = defaultdict(dict) - self.qnn_manager = PyQnnManager.QnnManager( - generate_qnn_executorch_option(compiler_specs) + self.qnn_manager = get_current_qnn_manager( + python_options.backend_options.backend_type, compiler_specs ) - self.qnn_manager.Init() - def is_node_supported(self, _, node: torch.fx.Node) -> bool: if node.op != "call_function" or node.target in not_supported_operator: return False @@ -118,9 +119,6 @@ def is_node_supported(self, _, node: torch.fx.Node) -> bool: print(f"[QNN Partitioner Op Support]: {node.target.__name__} | {supported}") return supported - def __del__(self): - self.qnn_manager.Destroy() - class QnnPartitioner(Partitioner): """ diff --git a/backends/qualcomm/qnn_preprocess.py b/backends/qualcomm/qnn_preprocess.py index 4e9cda21d02..4bdcfaa3fd1 100644 --- a/backends/qualcomm/qnn_preprocess.py +++ b/backends/qualcomm/qnn_preprocess.py @@ -8,8 +8,6 @@ from collections import defaultdict from typing import Dict, final, List -import executorch.backends.qualcomm.python.PyQnnManagerAdaptor as PyQnnManager - import torch # noqa: F401 from executorch.backends.qualcomm._passes.qnn_pass_manager import QnnPassManager from executorch.backends.qualcomm.builders.node_visitor_manager import get_node_visitors @@ -20,7 +18,9 @@ ) from executorch.backends.qualcomm.serialization.qc_schema_serialize import ( flatbuffer_to_option, - option_to_flatbuffer, +) +from executorch.backends.qualcomm.utils.qnn_manager_lifecycle import ( + get_current_qnn_manager, ) from executorch.exir.backend.backend_details import ( BackendDetails, @@ -30,6 +30,7 @@ from torch.export.exported_program import ExportedProgram DEFAULT_DEBUG_HANDLE = 65535 +DEFAULT_GRAPH_NAME = "forward" logger = logging.getLogger(__name__) logger.setLevel(logging.DEBUG) @@ -99,9 +100,11 @@ def preprocess( compile_specs: List[CompileSpec], ) -> PreprocessResult: option = generate_qnn_executorch_option(compile_specs) - qnn_manager = PyQnnManager.QnnManager(option) - qnn_manager.Init() obj_options = flatbuffer_to_option(option) + qnn_manager = get_current_qnn_manager( + obj_options.backend_options.backend_type, compile_specs + ) + qnn_manager.InitContext([DEFAULT_GRAPH_NAME]) py_op_wrapper_list = QnnBackend._build_op_wrappers( edge_program, qnn_manager.IsTensorDump(), @@ -118,7 +121,7 @@ def preprocess( f"Record all QNN API calls from saver backend at: {obj_options.saver_output_dir}" ) assert len(qnn_context_binary) != 0, "Failed to generate Qnn context binary." - qnn_manager.Destroy() + qnn_manager.DestroyContext() # For now, debug_handle_map is not used by QNN ExecuTorch return PreprocessResult( processed_bytes=bytes(qnn_context_binary), @@ -132,12 +135,9 @@ def preprocess_multimethod( ) -> PreprocessResult: # TODO: refactor QnnManager to consume multiple compile_spec # take first compile_specs here for the same partitions - graph_name = list(edge_programs.keys()) + graph_names = list(edge_programs.keys()) compile_spec = list(compile_specs.values())[0][0] - # gather all graph names option = flatbuffer_to_option(compile_spec[0].value) - option.graph_name = graph_name - compile_spec[0].value = option_to_flatbuffer(option) # check if each graph has equal number of partitions num_sub_graphs = set() for edge_program in edge_programs.values(): @@ -149,15 +149,15 @@ def preprocess_multimethod( all_processed_results = {key: [] for key in edge_programs.keys()} num_sub_graphs = next(iter(num_sub_graphs)) + qnn_manager = get_current_qnn_manager( + option.backend_options.backend_type, compile_spec + ) for i in range(num_sub_graphs): # e.g. 2 methods (x, y) with 3 partitions # > context_binary_0: [x.subgraph_0, y.subgraph_0] # > context_binary_1: [x.subgraph_1, y.subgraph_1] # > context_binary_2: [x.subgraph_2, y.subgraph_2] - qnn_manager = PyQnnManager.QnnManager( - generate_qnn_executorch_option(compile_spec) - ) - qnn_manager.Init() + qnn_manager.InitContext(graph_names) py_op_wrapper_list, ctx_binary_list = [], [] for j, programs in enumerate(edge_programs.values()): logger.info(f"Processing Method({j}): ({i+1}/{num_sub_graphs})") @@ -177,7 +177,9 @@ def preprocess_multimethod( ) if len(py_op_wrapper_list) == len(edge_programs.values()): - qnn_context_binary = qnn_manager.Compile(graph_name, py_op_wrapper_list) + qnn_context_binary = qnn_manager.Compile( + graph_names, py_op_wrapper_list + ) if option.saver: # TODO: Currently, only the first method is saved. Update this logic if saving multiple methods becomes necessary in the future. exit( @@ -186,7 +188,7 @@ def preprocess_multimethod( assert ( len(qnn_context_binary) != 0 ), "Failed to generate Qnn context binary." - qnn_manager.Destroy() + qnn_manager.DestroyContext() # methods should share the same context binary for current partition for key in edge_programs.keys(): all_processed_results[key].append( diff --git a/backends/qualcomm/quantizer/README.md b/backends/qualcomm/quantizer/README.md index 6870ecc76ac..6954b6e05b7 100644 --- a/backends/qualcomm/quantizer/README.md +++ b/backends/qualcomm/quantizer/README.md @@ -9,7 +9,7 @@ Thank you for contributing to Qualcomm AI Engine Direct delegate for ExecuTorch. ## References ### Qualcomm AI Engine Direct -- [Operator Definitions for HTP](https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-50/HtpOpDefSupplement.html) +- [Operator Definitions for HTP](https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-10/HtpOpDefSupplement.html) ### PyTorch - [ATen Operator Definitions](https://github.com/pytorch/pytorch/tree/main/aten/src/ATen/native) @@ -66,7 +66,7 @@ def annotate_xxx(node: Node, quantization_config: QuantizationConfig) -> None: - __quantization_config__: data structure describing quantization configurations for IO activation / weight / bias ### Example of Conv2d Annotation -Conv2d accepts up to three input tensors: `input activation`, `kernel`, `bias`. There are constraints imposed by [Qualcomm AI Engine Direct Manual](https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-50/HtpOpDefSupplement.html#conv2d).
+Conv2d accepts up to three input tensors: `input activation`, `kernel`, `bias`. There are constraints imposed by [Qualcomm AI Engine Direct Manual](https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-10/HtpOpDefSupplement.html#conv2d).
Take 8-bit fixed point as example: - __weight__: must be symmetrically quantized if per-channel observer is applied - __bias__: must have `QNN_DATATYPE_SFIXED_POINT_32` and be symmetrically quantized with expected encoding `scales = weight.scales * input.scale`, `offset = 0` if per-channel observer is applied. @@ -147,13 +147,13 @@ Now, we can start to fill in the function body: - Update node's meta with framework compatible data structure ```python - node.meta[QUANT_ANNOTATION_KEY] = QuantizationAnnotation( + node.meta[Q_ANNOTATION_KEY] = QuantizationAnnotation( input_qspec_map=input_qspec_map, output_qspec=quantization_config.output_activation, _annotated=True, ) ``` - After done processing `input_qspec_map`, it's required to have it in node's meta with special tag (`QUANT_ANNOTATION_KEY`) for `convert_pt2e` to properly insert observers. + After done processing `input_qspec_map`, it's required to have it in node's meta with special tag (`Q_ANNOTATION_KEY`) for `convert_pt2e` to properly insert observers. ### Common Annotators For operators without extra parameters to be observed, there are pre-defined annotation method for convenience: diff --git a/backends/qualcomm/runtime/QnnExecuTorchBackend.cpp b/backends/qualcomm/runtime/QnnExecuTorchBackend.cpp index 988c4b84a68..41c2370e4cb 100644 --- a/backends/qualcomm/runtime/QnnExecuTorchBackend.cpp +++ b/backends/qualcomm/runtime/QnnExecuTorchBackend.cpp @@ -90,7 +90,11 @@ Result QnnExecuTorchBackend::init( } ET_CHECK_OR_RETURN_ERROR( - qnn_manager->Init() == Error::Ok, + qnn_manager->InitBackend() == Error::Ok, + Internal, + "Fail to initialize Qnn Manager"); + ET_CHECK_OR_RETURN_ERROR( + qnn_manager->InitContext() == Error::Ok, Internal, "Fail to initialize Qnn Manager"); diff --git a/backends/qualcomm/runtime/QnnManager.cpp b/backends/qualcomm/runtime/QnnManager.cpp index 5e3220f25d9..558f3ec0a10 100644 --- a/backends/qualcomm/runtime/QnnManager.cpp +++ b/backends/qualcomm/runtime/QnnManager.cpp @@ -54,15 +54,9 @@ QnnManager::~QnnManager() { QnnManager::QnnManager( const QnnExecuTorchOptions* options, const QnnExecuTorchContextBinary& qnn_executorch_context_binary) - : qnn_context_blob_(qnn_executorch_context_binary), - qnn_loaded_backend_(""), - // options' life cycle is decided by compiler specs which is - // kept by executorch runtime framework - // please pay attention to any potential seg fault - options_(options) { + : qnn_context_blob_(qnn_executorch_context_binary), options_(options) { QnnExecuTorchBackendType backend_type = options->backend_options()->backend_type(); - std::string library_path = options->library_path()->str(); if (get_option(options_->log_level()) >= QnnExecuTorchLogLevel::kLogLevelInfo) { @@ -71,10 +65,8 @@ QnnManager::QnnManager( EnumNameQcomChipset(options_->soc_info()->soc_model())); QNN_EXECUTORCH_LOG_INFO( "backend_type: %s", EnumNameQnnExecuTorchBackendType(backend_type)); - for (auto name : *options_->graph_name()) { - QNN_EXECUTORCH_LOG_INFO("graph_name: %s", name->c_str()); - } - QNN_EXECUTORCH_LOG_INFO("library_path: %s", library_path.c_str()); + QNN_EXECUTORCH_LOG_INFO( + "library_path: %s", options->library_path()->str().c_str()); QNN_EXECUTORCH_LOG_INFO("dump intermediate outputs: %s", IsTensorDump()); QNN_EXECUTORCH_LOG_INFO( "log_level: %s", @@ -95,35 +87,13 @@ QnnManager::QnnManager( options_->op_package_options()->op_package_infos()->size()); } - if (library_path.empty()) { - switch (backend_type) { - case QnnExecuTorchBackendType::kHtpBackend: - library_path = htp_library_name_; - break; - case QnnExecuTorchBackendType::kDspBackend: - library_path = dsp_library_name_; - break; - case QnnExecuTorchBackendType::kGpuBackend: - library_path = gpu_library_name_; - break; - default: - QNN_EXECUTORCH_LOG_ERROR("Unknown backend type: %d", backend_type); - break; - } - } - qnn_loaded_backend_ = QnnImplementation(library_path); backend_params_ptr_ = std::make_unique(); + backend_bundle_ptr_ = std::make_shared(); qnn_dlc_manager_ = std::make_shared(qnn_context_blob_, options_); } -Error QnnManager::LoadQnnLibrary() { - auto config = GetImplementationConfig(); - Error ret = qnn_loaded_backend_.Load(config.get()); - return ret; -} - Error QnnManager::PreRegisterMem() { SharedBuffer& shared_buffer_manager = SharedBuffer::GetSharedBufferManager(); for (const auto info : shared_buffer_manager.GetCustomMemTensorInfoSet()) { @@ -299,15 +269,20 @@ Error QnnManager::RegisterCustomMem( return Error::Ok; } -Error QnnManager::Init() { +Error QnnManager::InitBackend() { + // Get or create the shared backend bundle + Error err = QnnBackendUnifiedRegistry::GetInstance().GetOrCreateBackendBundle( + options_, backend_bundle_ptr_); ET_CHECK_OR_RETURN_ERROR( - LoadQnnLibrary() == Error::Ok, Internal, "Fail to load Qnn library"); - logger_ = std::make_unique( - qnn_loaded_backend_, LoggingCallback, get_option(options_->log_level())); - std::vector graph_names; - for (auto name : *options_->graph_name()) { - graph_names.emplace_back(name->str()); - } + err == Error::Ok, + Internal, + "Fail to get or create shared Qnn backend bundle. Error code: %d", + static_cast(err)); + return Error::Ok; +} + +Error QnnManager::InitContext( + std::optional> graph_names) { if (backend_params_ptr_->backend_init_state_ == BackendInitializeState::UNINITIALIZED) { QNN_EXECUTORCH_LOG_INFO( @@ -315,8 +290,9 @@ Error QnnManager::Init() { "parameters for Qnn executorch backend type %d", options_->backend_options()->backend_type()); backend_params_ptr_ = QnnBackendFactory().Create( - qnn_loaded_backend_, - logger_.get(), + backend_bundle_ptr_->implementation.get(), + backend_bundle_ptr_->qnn_backend_ptr.get(), + backend_bundle_ptr_->qnn_device_ptr.get(), qnn_context_blob_, options_, qnn_dlc_manager_.get()); @@ -324,20 +300,13 @@ Error QnnManager::Init() { backend_params_ptr_ != nullptr, Internal, "Failed to load Qnn backend."); + // Note: For online_prepare or deserialization, the graph name will be + // obtained from the binary. ET_CHECK_OR_RETURN_ERROR( - backend_params_ptr_->qnn_backend_cache_ptr_->Configure(graph_names) == - Error::Ok, + backend_params_ptr_->qnn_backend_cache_ptr_->Configure( + graph_names.value_or(std::vector{})) == Error::Ok, Internal, "Fail to configure Qnn backend cache"); - ET_CHECK_OR_RETURN_ERROR( - backend_params_ptr_->qnn_backend_ptr_->Configure( - options_->op_package_options()) == Error::Ok, - Internal, - "Fail to configure Qnn backend"); - ET_CHECK_OR_RETURN_ERROR( - backend_params_ptr_->qnn_device_ptr_->Configure() == Error::Ok, - Internal, - "Fail to configure Qnn device"); ET_CHECK_OR_RETURN_ERROR( backend_params_ptr_->qnn_context_ptr_->Configure() == Error::Ok, Internal, @@ -363,13 +332,15 @@ Error QnnManager::Init() { #endif if (IsOnlinePrepare()) { + // Check whether the QNN version supports the DLC format. Qnn_ApiVersion_t qnn_version = {QNN_VERSION_INIT}; - qnn_loaded_backend_.GetQnnInterface().qnn_backend_get_api_version( - &qnn_version); + backend_bundle_ptr_->implementation->GetQnnInterface() + .qnn_backend_get_api_version(&qnn_version); ET_CHECK_OR_RETURN_ERROR( - qnn_dlc_manager_->SetUpDlcEnvironment(qnn_version.coreApiVersion) == - Error::Ok, + qnn_dlc_manager_->SetUpDlcEnvironment( + qnn_version.coreApiVersion, + graph_names.value_or(std::vector{})) == Error::Ok, Internal, "Fail to setup Dlc environment"); } @@ -514,13 +485,14 @@ Error QnnManager::ProfileExecuteData( } void QnnManager::Destroy() { - QNN_EXECUTORCH_LOG_INFO("Destroy Qnn backend parameters"); backend_params_ptr_.reset(new BackendConfigParameters()); - qnn_dlc_manager_->ResetBackendParams(); - logger_.reset(); - qnn_dlc_manager_->ResetLogger(); - qnn_loaded_backend_.TerminateAllBackends(); - qnn_dlc_manager_->TerminateAllBackends(); + backend_bundle_ptr_.reset(new QnnBackendBundle()); + qnn_dlc_manager_->Destroy(); +} + +void QnnManager::DestroyContext() { + backend_params_ptr_.reset(new BackendConfigParameters()); + qnn_dlc_manager_->Destroy(); } bool QnnManager::IsNodeSupportedByBackend( @@ -540,7 +512,7 @@ bool QnnManager::IsNodeSupportedByBackend( } } - error = backend_params_ptr_->qnn_backend_ptr_->BackendValidateOpConfig( + error = backend_bundle_ptr_->qnn_backend_ptr->BackendValidateOpConfig( op_wrapper->GetOpConfig()); if (error != QNN_SUCCESS) { QNN_EXECUTORCH_LOG_WARN( diff --git a/backends/qualcomm/runtime/QnnManager.h b/backends/qualcomm/runtime/QnnManager.h index c01a537f7bd..ec9973039bc 100644 --- a/backends/qualcomm/runtime/QnnManager.h +++ b/backends/qualcomm/runtime/QnnManager.h @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -30,7 +31,9 @@ class QnnManager { const QnnExecuTorchContextBinary& qnn_executorch_context_binary); ~QnnManager(); - executorch::runtime::Error Init(); + executorch::runtime::Error InitBackend(); + executorch::runtime::Error InitContext( + std::optional> graph_names = std::nullopt); executorch::runtime::Error AllocateTensor(const std::string& graph_name); executorch::runtime::Error AllocateTensor( const std::string& graph_name, @@ -47,7 +50,11 @@ class QnnManager { const std::string& graph_name, executorch::runtime::EventTracer* event_tracer); + // Destroy all QNN components and decrease reference count of shared QNN + // resource void Destroy(); + // Only destroy non-shared QNN components + void DestroyContext(); bool IsAvailable() { return true; @@ -103,35 +110,11 @@ class QnnManager { return backend_params_ptr_->qnn_context_ptr_->GetGraphNames(); } - std::string GetBinarySignature(); - private: - std::unique_ptr GetImplementationConfig() { - if (options_->saver()) { - auto outputDirCfg = std::make_unique(); - outputDirCfg->option = QNN_SAVER_CONFIG_OPTION_OUTPUT_DIRECTORY; - outputDirCfg->outputDirectory = options_->saver_output_dir()->c_str(); - - auto saverCfg = std::make_unique(2); - saverCfg[0] = outputDirCfg.release(); - saverCfg[1] = nullptr; - - return saverCfg; - } else { - return nullptr; - } - } - - executorch::runtime::Error LoadQnnLibrary(); - - static constexpr const char* htp_library_name_ = "libQnnHtp.so"; - static constexpr const char* gpu_library_name_ = "libQnnGpu.so"; - static constexpr const char* dsp_library_name_ = "libQnnDsp.so"; - QnnExecuTorchContextBinary qnn_context_blob_; std::unique_ptr backend_params_ptr_; - QnnImplementation qnn_loaded_backend_; - std::unique_ptr logger_; + std::shared_ptr + backend_bundle_ptr_; // New member to hold shared resources const QnnExecuTorchOptions* options_; std::unordered_map>> input_tensors_; diff --git a/backends/qualcomm/runtime/backends/CMakeLists.txt b/backends/qualcomm/runtime/backends/CMakeLists.txt index 6a44f3234c5..a0066ae4933 100644 --- a/backends/qualcomm/runtime/backends/CMakeLists.txt +++ b/backends/qualcomm/runtime/backends/CMakeLists.txt @@ -141,3 +141,10 @@ target_sources( PRIVATE ${CMAKE_CURRENT_LIST_DIR}/irbackend/${CMAKE_SYSTEM_PROCESSOR}/QnnDlcManager.cpp ) + +# qnn_backend_unified_registry +target_sources( + qnn_backend_unified_registry + PUBLIC ${CMAKE_CURRENT_LIST_DIR}/QnnBackendUnifiedRegistry.h + PRIVATE ${CMAKE_CURRENT_LIST_DIR}/QnnBackendUnifiedRegistry.cpp +) diff --git a/backends/qualcomm/runtime/backends/QnnBackendCommon.cpp b/backends/qualcomm/runtime/backends/QnnBackendCommon.cpp index 960bbd9513e..81ec3ebde26 100644 --- a/backends/qualcomm/runtime/backends/QnnBackendCommon.cpp +++ b/backends/qualcomm/runtime/backends/QnnBackendCommon.cpp @@ -13,7 +13,7 @@ namespace qnn { using executorch::runtime::Error; QnnBackend::~QnnBackend() { - const QnnInterface& qnn_interface = implementation_.GetQnnInterface(); + const QnnInterface& qnn_interface = implementation_->GetQnnInterface(); Qnn_ErrorHandle_t error = QNN_SUCCESS; if (nullptr != handle_) { QNN_EXECUTORCH_LOG_INFO("Destroy Qnn backend"); @@ -34,7 +34,7 @@ void QnnBackend::BackendRegisterOpPackage( const flatbuffers::Vector< flatbuffers::Offset>* op_packages_infos) { - const QnnInterface& qnn_interface = implementation_.GetQnnInterface(); + const QnnInterface& qnn_interface = implementation_->GetQnnInterface(); Qnn_ErrorHandle_t error = QNN_SUCCESS; QnnExecuTorchOpPackagePlatform current_platform = QnnExecuTorchOpPackagePlatform::UNKNOWN; @@ -71,7 +71,7 @@ void QnnBackend::BackendRegisterOpPackage( Error QnnBackend::Configure( const QnnExecuTorchOpPackageOptions* op_package_options) { // create qnn backend - const QnnInterface& qnn_interface = implementation_.GetQnnInterface(); + const QnnInterface& qnn_interface = implementation_->GetQnnInterface(); Qnn_ErrorHandle_t error = QNN_SUCCESS; std::vector temp_backend_config; @@ -102,7 +102,7 @@ Error QnnBackend::Configure( } Error QnnBackend::VerifyQNNSDKVersion() { - const QnnInterface& qnn_interface = implementation_.GetQnnInterface(); + const QnnInterface& qnn_interface = implementation_->GetQnnInterface(); Qnn_ApiVersion_t qnn_version = {QNN_VERSION_INIT}; Qnn_ErrorHandle_t error = diff --git a/backends/qualcomm/runtime/backends/QnnBackendCommon.h b/backends/qualcomm/runtime/backends/QnnBackendCommon.h index a66119dab22..e146a67d772 100644 --- a/backends/qualcomm/runtime/backends/QnnBackendCommon.h +++ b/backends/qualcomm/runtime/backends/QnnBackendCommon.h @@ -27,10 +27,11 @@ namespace qnn { // qnn backend class QnnBackend { public: - explicit QnnBackend( - const QnnImplementation& implementation, - QnnLogger* logger) + explicit QnnBackend(QnnImplementation* implementation, QnnLogger* logger) : handle_(nullptr), implementation_(implementation), logger_(logger) {} + QnnBackend(const QnnBackend&) = delete; // Delete copy constructor + QnnBackend& operator=(const QnnBackend&) = + delete; // Delete assignment operator virtual ~QnnBackend(); virtual bool IsProfileEventTypeParentOfNodeTime( @@ -42,7 +43,7 @@ class QnnBackend { const QnnExecuTorchOpPackageOptions* op_package_options); Qnn_ErrorHandle_t BackendValidateOpConfig(const Qnn_OpConfig_t& op_config) { - return implementation_.GetQnnInterface().qnn_backend_validate_op_config( + return implementation_->GetQnnInterface().qnn_backend_validate_op_config( handle_, op_config); }; @@ -65,7 +66,7 @@ class QnnBackend { flatbuffers::Offset>* op_packages_info); Qnn_BackendHandle_t handle_; - const QnnImplementation& implementation_; + QnnImplementation* implementation_; QnnOpPackageManager op_package_manager_; QnnLogger* logger_; executorch::runtime::Error VersionChecker( diff --git a/backends/qualcomm/runtime/backends/QnnBackendFactory.cpp b/backends/qualcomm/runtime/backends/QnnBackendFactory.cpp index e7e9db6fed8..0f0524d739e 100644 --- a/backends/qualcomm/runtime/backends/QnnBackendFactory.cpp +++ b/backends/qualcomm/runtime/backends/QnnBackendFactory.cpp @@ -16,8 +16,9 @@ namespace qnn { using executorch::runtime::Error; std::unique_ptr QnnBackendFactory::Create( - const QnnImplementation& implementation, - QnnLogger* logger, + QnnImplementation* implementation_ptr, + QnnBackend* qnn_backend_ptr, + QnnDevice* qnn_device_ptr, const QnnExecuTorchContextBinary& qnn_context_blob, const QnnExecuTorchOptions* options, QnnDlcManager* qnn_dlc_manager) { @@ -26,15 +27,8 @@ std::unique_ptr QnnBackendFactory::Create( switch (options->backend_options()->backend_type()) { case QnnExecuTorchBackendType::kHtpBackend: { auto htp_options = options->backend_options()->htp_options(); - const std::string skel_library_dir = - htp_options->skel_library_dir()->str(); - if (!skel_library_dir.empty()) { - setenv("ADSP_LIBRARY_PATH", skel_library_dir.c_str(), /*overwrite=*/1); - } if (get_option(options->log_level()) >= QnnExecuTorchLogLevel::kLogLevelInfo) { - QNN_EXECUTORCH_LOG_INFO( - "skel_library_dir: %s", skel_library_dir.c_str()); QNN_EXECUTORCH_LOG_INFO( "htp_arch in htp_info: %s", EnumNameHtpArch(options->soc_info()->htp_info()->htp_arch())); @@ -56,32 +50,26 @@ std::unique_ptr QnnBackendFactory::Create( QNN_EXECUTORCH_LOG_INFO( "use_fold_relu in htp_options: %d", htp_options->use_fold_relu()); } - backend_params->qnn_backend_ptr_ = - std::make_unique(implementation, logger); - - backend_params->qnn_device_ptr_ = std::make_unique( - implementation, logger, options->soc_info(), htp_options); - backend_params->qnn_backend_cache_ptr_ = std::make_unique(qnn_context_blob); backend_params->qnn_context_ptr_ = std::make_unique( - implementation, - backend_params->qnn_backend_ptr_.get(), - backend_params->qnn_device_ptr_.get(), + implementation_ptr, + qnn_backend_ptr, + qnn_device_ptr, backend_params->qnn_backend_cache_ptr_.get(), htp_options, qnn_dlc_manager); backend_params->qnn_graph_ptr_ = std::make_unique( - implementation, - backend_params->qnn_backend_ptr_.get(), + implementation_ptr, + qnn_backend_ptr, backend_params->qnn_context_ptr_.get(), get_option(options->profile_level()), options->soc_info(), htp_options); backend_params->qnn_mem_manager_ptr_ = std::make_unique( - implementation, + implementation_ptr, backend_params->qnn_context_ptr_.get(), get_option(options->log_level())); backend_params->backend_init_state_ = BackendInitializeState::INITIALIZED; @@ -93,11 +81,7 @@ std::unique_ptr QnnBackendFactory::Create( return nullptr; } - if (backend_params->qnn_backend_ptr_->VerifyQNNSDKVersion() == Error::Ok) { - return backend_params; - } - - return nullptr; + return backend_params; } } // namespace qnn } // namespace backends diff --git a/backends/qualcomm/runtime/backends/QnnBackendFactory.h b/backends/qualcomm/runtime/backends/QnnBackendFactory.h index 3d78a36b9f0..8be3e5e448f 100644 --- a/backends/qualcomm/runtime/backends/QnnBackendFactory.h +++ b/backends/qualcomm/runtime/backends/QnnBackendFactory.h @@ -31,22 +31,18 @@ namespace qnn { class QnnDlcManager; typedef enum { UNINITIALIZED, INITIALIZED } BackendInitializeState; -// @brief Struct containing all handles for a given QNN backend +// @brief Struct containing non-shared handles for a given QNN backend typedef struct BackendConfigParameters { - std::unique_ptr qnn_backend_ptr_; BackendInitializeState backend_init_state_; std::unique_ptr qnn_context_ptr_; - std::unique_ptr qnn_device_ptr_; std::unique_ptr qnn_graph_ptr_; std::unique_ptr qnn_mem_manager_ptr_; std::unique_ptr qnn_backend_cache_ptr_; // Default ctor BackendConfigParameters() - : qnn_backend_ptr_(nullptr), - backend_init_state_(BackendInitializeState::UNINITIALIZED), + : backend_init_state_(BackendInitializeState::UNINITIALIZED), qnn_context_ptr_(nullptr), - qnn_device_ptr_(nullptr), qnn_graph_ptr_(nullptr), qnn_mem_manager_ptr_(nullptr), qnn_backend_cache_ptr_(nullptr) {} @@ -56,8 +52,6 @@ typedef struct BackendConfigParameters { qnn_backend_cache_ptr_.reset(); qnn_mem_manager_ptr_.reset(); qnn_context_ptr_.reset(); - qnn_device_ptr_.reset(); - qnn_backend_ptr_.reset(); backend_init_state_ = BackendInitializeState::UNINITIALIZED; } @@ -66,8 +60,9 @@ typedef struct BackendConfigParameters { class QnnBackendFactory { public: std::unique_ptr Create( - const QnnImplementation& implementation, - QnnLogger* logger, + QnnImplementation* implementation, + QnnBackend* qnn_backend_ptr, + QnnDevice* qnn_device_ptr, const QnnExecuTorchContextBinary& qnn_context_blob, const QnnExecuTorchOptions* options, QnnDlcManager* qnn_dlc_manager); diff --git a/backends/qualcomm/runtime/backends/QnnBackendUnifiedRegistry.cpp b/backends/qualcomm/runtime/backends/QnnBackendUnifiedRegistry.cpp new file mode 100644 index 00000000000..73bc3ae9710 --- /dev/null +++ b/backends/qualcomm/runtime/backends/QnnBackendUnifiedRegistry.cpp @@ -0,0 +1,153 @@ +/* + * Copyright (c) Qualcomm Innovation Center, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ +#include +#include +#include +#include +#include +#include + +#include + +namespace executorch { +namespace backends { +namespace qnn { +using executorch::runtime::Error; + +// Static instance for the singleton +QnnBackendUnifiedRegistry& QnnBackendUnifiedRegistry::GetInstance() { + static QnnBackendUnifiedRegistry instance; + return instance; +} + +// Private constructor +QnnBackendUnifiedRegistry::QnnBackendUnifiedRegistry() = default; + +// Destructor +QnnBackendUnifiedRegistry::~QnnBackendUnifiedRegistry() { + CleanupExpired(); +} + +Error QnnBackendUnifiedRegistry::GetOrCreateBackendBundle( + const QnnExecuTorchOptions* options, + std::shared_ptr& bundle) { + std::lock_guard lock(mutex_); + + // Extract relevant parameters from options for creation and validation + std::string current_lib_path = options->library_path()->str(); + QnnExecuTorchLogLevel current_log_level = get_option(options->log_level()); + QnnExecuTorchBackendType backend_type = + options->backend_options()->backend_type(); + + if (current_lib_path.empty()) { + switch (backend_type) { + case QnnExecuTorchBackendType::kHtpBackend: { + current_lib_path = htp_library_name_; + break; + } + case QnnExecuTorchBackendType::kGpuBackend: + case QnnExecuTorchBackendType::kDspBackend: + case QnnExecuTorchBackendType::kUndefinedBackend: + default: + QNN_EXECUTORCH_LOG_ERROR( + "Unsupported backend type: %s", + EnumNameQnnExecuTorchBackendType(backend_type)); + return Error::NotFound; + } + } + + // Check if resources already exist + auto it = qnn_backend_bundles_map_.find(backend_type); + if (it != qnn_backend_bundles_map_.end()) { + // Create new shared_ptr that shares ownership of the managed object. + if (auto existing_bundle = it->second.lock()) { + bundle = existing_bundle; + if (bundle->qnn_logger_ptr->GetLogLevel() != current_log_level) { + bundle->qnn_logger_ptr = std::make_unique( + bundle->implementation.get(), LoggingCallback, current_log_level); + } + QNN_EXECUTORCH_LOG_INFO( + "Use cached backend bundle for current backend: %s", + EnumNameQnnExecuTorchBackendType(backend_type)); + return Error::Ok; + } + } + + QNN_EXECUTORCH_LOG_INFO("Creating new backend bundle."); + + // 1. Create QnnImplementation and load qnn library + std::unique_ptr implementation = + std::make_unique(current_lib_path); + auto config = GetImplementationConfig(options); + Error ret = implementation->Load(config.get()); + ET_CHECK_OR_RETURN_ERROR( + ret == Error::Ok, Internal, "Fail to load Qnn library"); + + // 2. Create QnnLogger + std::unique_ptr logger = std::make_unique( + implementation.get(), LoggingCallback, current_log_level); + + // 3. Create QnnBackend (specific type based on options) + // 4. Create QnnDevice (specific type based on options) + std::unique_ptr backend = nullptr; + std::unique_ptr device = nullptr; + + switch (backend_type) { + case QnnExecuTorchBackendType::kHtpBackend: { + auto htp_options = options->backend_options()->htp_options(); + backend = + std::make_unique(implementation.get(), logger.get()); + ET_CHECK_OR_RETURN_ERROR( + backend->Configure(options->op_package_options()) == Error::Ok, + Internal, + "Fail to configure Qnn backend"); + + device = std::make_unique( + implementation.get(), logger.get(), options->soc_info(), htp_options); + ET_CHECK_OR_RETURN_ERROR( + device->Configure() == Error::Ok, + Internal, + "Fail to configure Qnn device"); + break; + } + case QnnExecuTorchBackendType::kGpuBackend: + case QnnExecuTorchBackendType::kDspBackend: + case QnnExecuTorchBackendType::kUndefinedBackend: + default: + return Error::NotFound; + } + if (backend->VerifyQNNSDKVersion() != Error::Ok) { + return Error::Internal; + } + + bundle->implementation = std::move(implementation); + bundle->qnn_logger_ptr = std::move(logger); + bundle->qnn_backend_ptr = std::move(backend); + bundle->qnn_device_ptr = std::move(device); + qnn_backend_bundles_map_.emplace( + backend_type, bundle); // Store weak_ptr to the bundle + + return Error::Ok; +} + +void QnnBackendUnifiedRegistry::CleanupExpired() { + std::lock_guard lock(mutex_); + + for (auto it = qnn_backend_bundles_map_.begin(); + it != qnn_backend_bundles_map_.end();) { + if (it->second.expired()) { + it = qnn_backend_bundles_map_.erase(it); + } else { + ++it; + } + } +} + +} // namespace qnn +} // namespace backends +} // namespace executorch diff --git a/backends/qualcomm/runtime/backends/QnnBackendUnifiedRegistry.h b/backends/qualcomm/runtime/backends/QnnBackendUnifiedRegistry.h new file mode 100644 index 00000000000..b2549a3356c --- /dev/null +++ b/backends/qualcomm/runtime/backends/QnnBackendUnifiedRegistry.h @@ -0,0 +1,106 @@ +/* + * Copyright (c) Qualcomm Innovation Center, Inc. + * All rights reserved. + * + * This source code is licensed under the BSD-style license found in the + * LICENSE file in the root directory of this source tree. + */ +#pragma once + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +namespace executorch { +namespace backends { +namespace qnn { + +// A bundle struct to hold all shared QNN backend resources +struct QnnBackendBundle { + std::unique_ptr implementation; + std::unique_ptr qnn_logger_ptr; + std::unique_ptr qnn_backend_ptr; + std::unique_ptr qnn_device_ptr; + + // Default ctor + QnnBackendBundle() + : implementation(nullptr), + qnn_logger_ptr(nullptr), + qnn_backend_ptr(nullptr), + qnn_device_ptr(nullptr) {} + // Default dtor + ~QnnBackendBundle() { + qnn_device_ptr.reset(); + qnn_backend_ptr.reset(); + qnn_logger_ptr.reset(); + implementation.reset(); + } +}; + +class QnnBackendUnifiedRegistry { + // Singleton class to manage shared QNN backend resources. It ensures that + // only one instance of the registry exists throughout the application's + // lifetime. The registry maintains a map of backend bundles indexed by + // backend_type. Each bundle contains QnnImplentation, QnnLogger, QnnBackend, + // and QnnDevice objects for a specific backend type. The registry provides + // methods to get or create backend bundles, ensuring that resources are + // properly managed and reused when possible. It also includes a cleanup + // mechanism to remove expired bundles. + public: + static QnnBackendUnifiedRegistry& GetInstance(); + + executorch::runtime::Error GetOrCreateBackendBundle( + const QnnExecuTorchOptions* options, + std::shared_ptr& bundle); + + void CleanupExpired(); + + private: + QnnBackendUnifiedRegistry(); + ~QnnBackendUnifiedRegistry(); + + // Delete copy constructor and assignment operator + QnnBackendUnifiedRegistry(const QnnBackendUnifiedRegistry&) = delete; + QnnBackendUnifiedRegistry& operator=(const QnnBackendUnifiedRegistry&) = + delete; + + static constexpr const char* htp_library_name_ = "libQnnHtp.so"; + static constexpr const char* gpu_library_name_ = "libQnnGpu.so"; + static constexpr const char* dsp_library_name_ = "libQnnDsp.so"; + + std::unique_ptr GetImplementationConfig( + const QnnExecuTorchOptions* options) { + if (options->saver()) { + auto outputDirCfg = std::make_unique(); + outputDirCfg->option = QNN_SAVER_CONFIG_OPTION_OUTPUT_DIRECTORY; + outputDirCfg->outputDirectory = options->saver_output_dir()->c_str(); + + auto saverCfg = std::make_unique(2); + saverCfg[0] = outputDirCfg.release(); + saverCfg[1] = nullptr; + + return saverCfg; + } else { + return nullptr; + } + } + + // Stores the collection of shared resources, with backend_type being used as + // the key. + std::unordered_map> + qnn_backend_bundles_map_; + + std::mutex mutex_; // Protects access to resources and ensures atomic + // creation/destruction +}; + +} // namespace qnn +} // namespace backends +} // namespace executorch diff --git a/backends/qualcomm/runtime/backends/QnnContextCommon.cpp b/backends/qualcomm/runtime/backends/QnnContextCommon.cpp index ee49b10215a..e16a173db6c 100644 --- a/backends/qualcomm/runtime/backends/QnnContextCommon.cpp +++ b/backends/qualcomm/runtime/backends/QnnContextCommon.cpp @@ -14,7 +14,7 @@ namespace backends { namespace qnn { QnnContext::~QnnContext() { - const QnnInterface& qnn_interface = implementation_.GetQnnInterface(); + const QnnInterface& qnn_interface = implementation_->GetQnnInterface(); Qnn_ErrorHandle_t error = QNN_SUCCESS; if (handle_ != nullptr) { QNN_EXECUTORCH_LOG_INFO("Destroy Qnn context"); @@ -33,7 +33,7 @@ QnnContext::~QnnContext() { Error QnnContext::Configure() { // create qnn context - const QnnInterface& qnn_interface = implementation_.GetQnnInterface(); + const QnnInterface& qnn_interface = implementation_->GetQnnInterface(); Qnn_ErrorHandle_t error = QNN_SUCCESS; std::vector temp_context_config; @@ -95,7 +95,7 @@ Error QnnContext::Configure() { Error QnnContext::GetContextBinary( QnnExecuTorchContextBinary& qnn_executorch_context_binary) { - const QnnInterface& qnn_interface = implementation_.GetQnnInterface(); + const QnnInterface& qnn_interface = implementation_->GetQnnInterface(); Qnn_ContextBinarySize_t binary_size = 0; Qnn_ContextBinarySize_t bytes_written = 0; Qnn_ErrorHandle_t error = diff --git a/backends/qualcomm/runtime/backends/QnnContextCommon.h b/backends/qualcomm/runtime/backends/QnnContextCommon.h index 0e9e12ef544..7d507a4a50c 100644 --- a/backends/qualcomm/runtime/backends/QnnContextCommon.h +++ b/backends/qualcomm/runtime/backends/QnnContextCommon.h @@ -24,7 +24,7 @@ class QnnDlcManager; class QnnContext { public: explicit QnnContext( - const QnnImplementation& implementation, + QnnImplementation* implementation, QnnBackend* backend, QnnDevice* device, QnnBackendCache* cache, @@ -74,7 +74,7 @@ class QnnContext { private: Qnn_ContextHandle_t handle_; - const QnnImplementation& implementation_; + QnnImplementation* implementation_; QnnBackend* backend_; QnnDevice* device_; QnnBackendCache* cache_; diff --git a/backends/qualcomm/runtime/backends/QnnDeviceCommon.cpp b/backends/qualcomm/runtime/backends/QnnDeviceCommon.cpp index 93d705efd3e..0280ec4f383 100644 --- a/backends/qualcomm/runtime/backends/QnnDeviceCommon.cpp +++ b/backends/qualcomm/runtime/backends/QnnDeviceCommon.cpp @@ -13,7 +13,7 @@ namespace qnn { using executorch::runtime::Error; QnnDevice::~QnnDevice() { - const QnnInterface& qnn_interface = implementation_.GetQnnInterface(); + const QnnInterface& qnn_interface = implementation_->GetQnnInterface(); Qnn_ErrorHandle_t error = QNN_SUCCESS; if (nullptr != handle_) { QNN_EXECUTORCH_LOG_INFO("Destroy Qnn device"); @@ -32,7 +32,7 @@ QnnDevice::~QnnDevice() { Error QnnDevice::Configure() { // create qnn device - const QnnInterface& qnn_interface = implementation_.GetQnnInterface(); + const QnnInterface& qnn_interface = implementation_->GetQnnInterface(); Qnn_ErrorHandle_t error = QNN_SUCCESS; std::vector temp_device_config; diff --git a/backends/qualcomm/runtime/backends/QnnDeviceCommon.h b/backends/qualcomm/runtime/backends/QnnDeviceCommon.h index 85de00f8623..6736e4a49ca 100644 --- a/backends/qualcomm/runtime/backends/QnnDeviceCommon.h +++ b/backends/qualcomm/runtime/backends/QnnDeviceCommon.h @@ -20,8 +20,10 @@ namespace backends { namespace qnn { class QnnDevice { public: - explicit QnnDevice(const QnnImplementation& implementation, QnnLogger* logger) + explicit QnnDevice(QnnImplementation* implementation, QnnLogger* logger) : implementation_(implementation), handle_(nullptr), logger_(logger) {} + QnnDevice(const QnnDevice&) = delete; // Delete copy constructor + QnnDevice& operator=(const QnnDevice&) = delete; // Delete assignment operator virtual ~QnnDevice(); @@ -40,7 +42,7 @@ class QnnDevice { virtual executorch::runtime::Error AfterCreateDevice() { return executorch::runtime::Error::Ok; }; - const QnnImplementation& implementation_; + QnnImplementation* implementation_; private: Qnn_DeviceHandle_t handle_; diff --git a/backends/qualcomm/runtime/backends/QnnDlcManager.h b/backends/qualcomm/runtime/backends/QnnDlcManager.h index a57906df4e3..1d9312464e2 100644 --- a/backends/qualcomm/runtime/backends/QnnDlcManager.h +++ b/backends/qualcomm/runtime/backends/QnnDlcManager.h @@ -10,6 +10,7 @@ #include #include +#include #include #include "QnnWrapperUtils.hpp" @@ -35,23 +36,23 @@ class QnnDlcManager { std::unique_ptr backend_params_ptr_ = std::make_unique(); + std::unique_ptr backend_bundle_ptr_ = + std::make_unique(); - void ResetBackendParams(); - void ResetLogger(); - void TerminateAllBackends(); + void Destroy(); - Error SetUpDlcEnvironment(const Qnn_Version_t& coreApiVersion); + Error SetUpDlcEnvironment( + const Qnn_Version_t& coreApiVersion, + const std::vector& graph_names); Error RegisterGraphsFromDLC( - const QnnImplementation& implementation, + QnnImplementation* implementation, QnnBackend* backend, QnnContext* context, QnnBackendCache* cache); private: static constexpr const char* library_name_ = "libQnnIr.so"; - QnnImplementation qnn_loaded_backend_; - std::unique_ptr logger_; const QnnExecuTorchContextBinary& qnn_context_blob_; const QnnExecuTorchOptions* options_; @@ -64,7 +65,7 @@ class QnnDlcManager { Error Create(); - Error Configure(); + Error Configure(const std::vector& graph_names); }; } // namespace qnn } // namespace backends diff --git a/backends/qualcomm/runtime/backends/QnnFunctionInterface.h b/backends/qualcomm/runtime/backends/QnnFunctionInterface.h index 548c363f388..0e1e4727aa3 100644 --- a/backends/qualcomm/runtime/backends/QnnFunctionInterface.h +++ b/backends/qualcomm/runtime/backends/QnnFunctionInterface.h @@ -105,6 +105,9 @@ class QnnInterface { const QNN_INTERFACE_VER_TYPE& GetInterfaceVer() const { return qnn_interface_->QNN_INTERFACE_VER_NAME; } + void Unload() { + qnn_interface_ = nullptr; + } private: // --------- QnnInterface --------- diff --git a/backends/qualcomm/runtime/backends/QnnGraphCommon.cpp b/backends/qualcomm/runtime/backends/QnnGraphCommon.cpp index 9fe81f4cf54..44bf11bc0f5 100644 --- a/backends/qualcomm/runtime/backends/QnnGraphCommon.cpp +++ b/backends/qualcomm/runtime/backends/QnnGraphCommon.cpp @@ -14,7 +14,7 @@ using executorch::runtime::Error; Error QnnGraph::Configure(const std::string& graph_name) { // create qnn backend - const QnnInterface& qnn_interface = implementation_.GetQnnInterface(); + const QnnInterface& qnn_interface = implementation_->GetQnnInterface(); Qnn_ErrorHandle_t error = QNN_SUCCESS; std::vector temp_graph_config; ET_CHECK_OR_RETURN_ERROR( @@ -81,7 +81,7 @@ Qnn_ErrorHandle_t QnnGraph::GraphExecute( return QNN_COMMON_ERROR_GENERAL; } - return implementation_.GetQnnInterface().qnn_graph_execute( + return implementation_->GetQnnInterface().qnn_graph_execute( handle_[graph_name], input_tensor_structs.data(), input_tensor_structs.size(), @@ -94,7 +94,7 @@ Qnn_ErrorHandle_t QnnGraph::GraphExecute( Error QnnGraph::EnsureTensorInQnnGraph( const std::string& graph_name, const std::shared_ptr& tensor_wrapper) { - const QnnInterface& qnn_interface = implementation_.GetQnnInterface(); + const QnnInterface& qnn_interface = implementation_->GetQnnInterface(); Qnn_ErrorHandle_t error = QNN_SUCCESS; if (!tensor_wrapper->IsTensorCreated()) { diff --git a/backends/qualcomm/runtime/backends/QnnGraphCommon.h b/backends/qualcomm/runtime/backends/QnnGraphCommon.h index 33f903dae41..fbb5ab80140 100644 --- a/backends/qualcomm/runtime/backends/QnnGraphCommon.h +++ b/backends/qualcomm/runtime/backends/QnnGraphCommon.h @@ -23,7 +23,7 @@ namespace qnn { class QnnGraph { public: explicit QnnGraph( - const QnnImplementation& implementation, + QnnImplementation* implementation, QnnBackend* backend, QnnContext* context, const QnnExecuTorchProfileLevel& profile_level) @@ -44,7 +44,7 @@ class QnnGraph { Qnn_ErrorHandle_t GraphAddNode( const std::string& graph_name, const Qnn_OpConfig_t& op_config) { - return implementation_.GetQnnInterface().qnn_graph_add_node( + return implementation_->GetQnnInterface().qnn_graph_add_node( handle_[graph_name], op_config); }; executorch::runtime::Error EnsureTensorInQnnGraph( @@ -52,7 +52,7 @@ class QnnGraph { const std::shared_ptr& tensor_wrapper); Qnn_ErrorHandle_t GraphFinalize(const std::string& graph_name) { - return implementation_.GetQnnInterface().qnn_graph_finalize( + return implementation_->GetQnnInterface().qnn_graph_finalize( handle_[graph_name], profile_[graph_name]->GetHandle(), nullptr /* signal_handle */); @@ -84,7 +84,7 @@ class QnnGraph { private: std::unordered_map handle_; - const QnnImplementation& implementation_; + QnnImplementation* implementation_; QnnBackend* backend_; QnnContext* context_; QnnExecuTorchProfileLevel profile_level_; diff --git a/backends/qualcomm/runtime/backends/QnnImplementation.cpp b/backends/qualcomm/runtime/backends/QnnImplementation.cpp index 7083f2bef30..246800791e6 100644 --- a/backends/qualcomm/runtime/backends/QnnImplementation.cpp +++ b/backends/qualcomm/runtime/backends/QnnImplementation.cpp @@ -6,6 +6,7 @@ * LICENSE file in the root directory of this source tree. */ #include +#include #include "QnnInterface.h" namespace executorch { namespace backends { @@ -13,6 +14,14 @@ namespace qnn { using executorch::runtime::Error; +struct DlCloser { + int operator()(void* handle) { + if (handle == nullptr) + return 0; + return dlclose(handle); + } +}; + Error QnnImplementation::InitBackend( void* const lib_handle, const QnnSaver_Config_t** saver_config) { @@ -33,47 +42,39 @@ Error QnnImplementation::InitBackend( return Error::Ok; } -// instantiate static members -// NOLINTNEXTLINE(fuchsia-statically-constructed-objects) -std::unordered_map - QnnImplementation::lib_path_to_backend_id_; -// NOLINTNEXTLINE(fuchsia-statically-constructed-objects) -std::unordered_map - QnnImplementation::loaded_backend_; -// NOLINTNEXTLINE(fuchsia-statically-constructed-objects) -std::unordered_map - QnnImplementation::loaded_lib_handle_; -// NOLINTNEXTLINE(fuchsia-statically-constructed-objects) -std::mutex QnnImplementation::be_init_mutex_; - -Error QnnImplementation::StartBackend( +QnnImplementation::~QnnImplementation() { + Unload(); +} + +const QnnInterface_t* QnnImplementation::StartBackend( const std::string& lib_path, const QnnSaver_Config_t** saver_config) { Qnn_ErrorHandle_t error = QNN_SUCCESS; - void* lib_handle = nullptr; // If the library is already loaded, return the handle. - lib_handle = dlopen(lib_path.c_str(), RTLD_NOW | RTLD_NOLOAD); + std::unique_ptr lib_handle( + dlopen(lib_path.c_str(), RTLD_NOW | RTLD_NOLOAD)); if (!lib_handle) { - lib_handle = dlopen(lib_path.c_str(), RTLD_NOW | RTLD_GLOBAL); + lib_handle = std::unique_ptr( + dlopen(lib_path.c_str(), RTLD_NOW | RTLD_GLOBAL)); } if (lib_handle == nullptr) { QNN_EXECUTORCH_LOG_ERROR( "Cannot Open QNN library %s, with error: %s", lib_path.c_str(), dlerror()); - return Error::Internal; + return nullptr; } // load get_provider function auto get_providers = loadQnnFunction( - lib_handle, "QnnInterface_getProviders"); + lib_handle.get(), "QnnInterface_getProviders"); if (get_providers == nullptr) { QNN_EXECUTORCH_LOG_ERROR( "QnnImplementation::Load Cannot load symbol " "QnnInterface_getProviders : %s", dlerror()); - return Error::Internal; + return nullptr; } // Get QnnInterface Providers @@ -85,7 +86,7 @@ Error QnnImplementation::StartBackend( QNN_EXECUTORCH_LOG_ERROR( "Qnn Interface failed to get providers. Error %d", QNN_GET_ERROR_CODE(error)); - return Error::Internal; + return nullptr; } if (num_providers != required_num_providers_) { @@ -94,115 +95,47 @@ Error QnnImplementation::StartBackend( "%d instead of required %d", num_providers, required_num_providers_); - return Error::Internal; - } - - BackendIdType backend_id = provider_list[0]->backendId; - - // store everything - lib_path_to_backend_id_[lib_path] = backend_id; - - // we use lib_path as the first unique key. - // Users can get wrong like, he or she assigns - // library_path=libQnnHtp_1.so - // library_path=libQnnHtp_2.so - // for different QnnBackend instances. - // So we warning out here. - if (loaded_backend_.count(backend_id) > 0) { - QNN_EXECUTORCH_LOG_WARN( - "lib_path %s is loaded, but backend %d " - "already exists. Overwriting previous loaded backend...", - lib_path.c_str(), - backend_id); - } - loaded_backend_[backend_id] = provider_list[0]; - - if (loaded_lib_handle_.count(backend_id) > 0) { - QNN_EXECUTORCH_LOG_WARN("closing %pK...", loaded_lib_handle_[backend_id]); - - int dlclose_error = dlclose(loaded_lib_handle_[backend_id]); - if (dlclose_error != 0) { - QNN_EXECUTORCH_LOG_WARN( - "Sadly, fail to close %pK with error %s", - loaded_lib_handle_[backend_id], - dlerror()); - } + return nullptr; } - loaded_lib_handle_[backend_id] = lib_handle; // Saver backend need initialization. - Error be_init_st = InitBackend(loaded_lib_handle_[backend_id], saver_config); + Error be_init_st = InitBackend(lib_handle.get(), saver_config); if (be_init_st != Error::Ok) { - // backend init fails. clear things - lib_path_to_backend_id_.erase(lib_path); - loaded_backend_.erase(backend_id); - - int dlclose_error = dlclose(loaded_lib_handle_[backend_id]); - if (dlclose_error != 0) { - QNN_EXECUTORCH_LOG_WARN( - "fail to close %pK after backend-init " - "failure, with error %s", - loaded_lib_handle_[backend_id], - dlerror()); - } - - loaded_lib_handle_.erase(backend_id); - return be_init_st; + return nullptr; } - return Error::Ok; + // hold the lib_handle + lib_handle_ = lib_handle.release(); + return provider_list[0]; } -Error QnnImplementation::TerminateAllBackends() { - Error ret_status = Error::Ok; +Error QnnImplementation::Unload() { + qnn_interface_.Unload(); - loaded_backend_.clear(); - - for (auto& it : loaded_lib_handle_) { - int dlclose_error = dlclose(it.second); - if (dlclose_error != 0) { - QNN_EXECUTORCH_LOG_ERROR( - "Fail to close QNN backend %d with error %s", it.first, dlerror()); - ret_status = Error::Internal; - } + if (lib_handle_ == nullptr) { + return Error::Ok; } - loaded_lib_handle_.clear(); - lib_path_to_backend_id_.clear(); - return ret_status; + int dlclose_error = dlclose(lib_handle_); + if (dlclose_error != 0) { + QNN_EXECUTORCH_LOG_ERROR( + "Fail to close QNN backend %s with error %s", + lib_path_.c_str(), + dlerror()); + return Error::Internal; + } + lib_handle_ = nullptr; + return Error::Ok; } Error QnnImplementation::Load(const QnnSaver_Config_t** saver_config) { - BackendIdType backend_id = QNN_BACKEND_ID_NULL; - { - const std::lock_guard lock(be_init_mutex_); - - if (lib_path_to_backend_id_.count(lib_path_) == 0) { - Error st = StartBackend(lib_path_, saver_config); - ET_CHECK_OR_RETURN_ERROR( - st == Error::Ok, Internal, "Fail to start backend"); - } - - // Get backend ID - backend_id = lib_path_to_backend_id_[lib_path_]; - - // really don't expect. - if (loaded_backend_.count(backend_id) == 0 || - loaded_lib_handle_.count(backend_id) == 0) { - QNN_EXECUTORCH_LOG_ERROR( - "library %s is loaded but " - "loaded backend count=%zu, " - "loaded lib_handle count=%zu", - lib_path_.c_str(), - loaded_backend_.count(backend_id), - loaded_lib_handle_.count(backend_id)); - return Error::Internal; - } - } // be_init_mutex_ release. + const QnnInterface_t* p_qnn_intf = StartBackend(lib_path_, saver_config); + ET_CHECK_OR_RETURN_ERROR( + p_qnn_intf != nullptr, Internal, "Fail to start backend"); // Connect QnnInterface - qnn_interface_.SetQnnInterface(loaded_backend_[backend_id]); + qnn_interface_.SetQnnInterface(p_qnn_intf); return Error::Ok; } diff --git a/backends/qualcomm/runtime/backends/QnnImplementation.h b/backends/qualcomm/runtime/backends/QnnImplementation.h index a49ee6516fc..3059166523d 100644 --- a/backends/qualcomm/runtime/backends/QnnImplementation.h +++ b/backends/qualcomm/runtime/backends/QnnImplementation.h @@ -11,9 +11,7 @@ #include #include -#include #include -#include namespace executorch { namespace backends { namespace qnn { @@ -29,32 +27,32 @@ class QnnImplementation { explicit QnnImplementation(std::string lib_path) : lib_path_(std::move(lib_path)){}; + QnnImplementation(const QnnImplementation&) = + delete; // Delete copy constructor + QnnImplementation& operator=(const QnnImplementation&) = + delete; // Delete assignment operator + ~QnnImplementation(); executorch::runtime::Error Load(const QnnSaver_Config_t** saver_config); const QnnInterface& GetQnnInterface() const; - executorch::runtime::Error TerminateAllBackends(); + executorch::runtime::Error Unload(); private: - static constexpr const int required_num_providers_{1}; + static constexpr int required_num_providers_{1}; - static executorch::runtime::Error StartBackend( + const QnnInterface_t* StartBackend( const std::string& lib_path, const QnnSaver_Config_t** saver_config); - static executorch::runtime::Error InitBackend( + executorch::runtime::Error InitBackend( void* const lib_handle, const QnnSaver_Config_t** saver_config); std::string lib_path_; + void* lib_handle_{nullptr}; QnnInterface qnn_interface_; - - static std::unordered_map lib_path_to_backend_id_; - static std::unordered_map - loaded_backend_; - static std::unordered_map loaded_lib_handle_; - static std::mutex be_init_mutex_; }; } // namespace qnn } // namespace backends diff --git a/backends/qualcomm/runtime/backends/QnnLogger.cpp b/backends/qualcomm/runtime/backends/QnnLogger.cpp index 5b86894d874..fec6d426c04 100644 --- a/backends/qualcomm/runtime/backends/QnnLogger.cpp +++ b/backends/qualcomm/runtime/backends/QnnLogger.cpp @@ -40,11 +40,11 @@ void LoggingCallback( QNN_EXECUTORCH_LOG(log_level, buffer); } QnnLogger::QnnLogger( - const QnnImplementation& implementation, + QnnImplementation* implementation, QnnLog_Callback_t callback, QnnExecuTorchLogLevel log_level) - : handle_(nullptr), implementation_(implementation) { - const QnnInterface& qnn_interface = implementation.GetQnnInterface(); + : handle_(nullptr), implementation_(implementation), log_level_(log_level) { + const QnnInterface& qnn_interface = implementation->GetQnnInterface(); QnnLog_Level_t qnn_log_level = QNN_LOG_LEVEL_ERROR; if (log_level > QnnExecuTorchLogLevel::kLogOff) { @@ -86,7 +86,7 @@ QnnLogger::QnnLogger( } QnnLogger::~QnnLogger() { - const QnnInterface& qnn_interface = implementation_.GetQnnInterface(); + const QnnInterface& qnn_interface = implementation_->GetQnnInterface(); if (handle_ != nullptr) { Qnn_ErrorHandle_t error = qnn_interface.qnn_log_free(handle_); if (error != QNN_SUCCESS) { diff --git a/backends/qualcomm/runtime/backends/QnnLogger.h b/backends/qualcomm/runtime/backends/QnnLogger.h index 80be4f61c59..d329ab94407 100644 --- a/backends/qualcomm/runtime/backends/QnnLogger.h +++ b/backends/qualcomm/runtime/backends/QnnLogger.h @@ -21,18 +21,25 @@ void LoggingCallback( class QnnLogger { public: explicit QnnLogger( - const QnnImplementation& implementation, + QnnImplementation* implementation, QnnLog_Callback_t callback, QnnExecuTorchLogLevel log_level); + QnnLogger(const QnnLogger&) = delete; // Delete copy constructor + QnnLogger& operator=(const QnnLogger&) = delete; // Delete assignment operator ~QnnLogger(); Qnn_LogHandle_t GetHandle() { return handle_; } + QnnExecuTorchLogLevel GetLogLevel() { + return log_level_; + } + private: Qnn_LogHandle_t handle_; - const QnnImplementation& implementation_; + QnnImplementation* implementation_; + QnnExecuTorchLogLevel log_level_; }; } // namespace qnn } // namespace backends diff --git a/backends/qualcomm/runtime/backends/QnnMemManager.cpp b/backends/qualcomm/runtime/backends/QnnMemManager.cpp index 3b99dd10868..f0d6096d29a 100644 --- a/backends/qualcomm/runtime/backends/QnnMemManager.cpp +++ b/backends/qualcomm/runtime/backends/QnnMemManager.cpp @@ -25,7 +25,7 @@ Error QnnMemManager::RegisterIonMem( const std::shared_ptr& tensor_wrapper, int32_t mem_fd, void* mem_ptr) { - const QnnInterface& qnn_interface = implementation_.GetQnnInterface(); + const QnnInterface& qnn_interface = implementation_->GetQnnInterface(); Qnn_MemDescriptor_t descriptor = { {tensor_wrapper->GetRank(), tensor_wrapper->GetDims(), nullptr}, tensor_wrapper->GetDataType(), @@ -66,7 +66,7 @@ Error QnnMemManager::RegisterCustomMem( size_t total_custom_mem_size, size_t tensor_offset, const CustomMemTensorInfo& info) { - const QnnInterface& qnn_interface = implementation_.GetQnnInterface(); + const QnnInterface& qnn_interface = implementation_->GetQnnInterface(); Qnn_MemDescriptor_t descriptor = { {tensor_wrapper->GetRank(), tensor_wrapper->GetDims(), nullptr}, tensor_wrapper->GetDataType(), @@ -113,7 +113,7 @@ Error QnnMemManager::PreRegisterCustomMemHandle( size_t total_custom_mem_size, size_t tensor_offset, const CustomMemTensorInfo& info) { - const QnnInterface& qnn_interface = implementation_.GetQnnInterface(); + const QnnInterface& qnn_interface = implementation_->GetQnnInterface(); Qnn_MemDescriptor_t descriptor = { {info.rank, info.shape, nullptr}, scalar_type_to_qnn_dtype_[info.dtype], @@ -165,7 +165,7 @@ Error QnnMemManager::SetMemHandle( } void QnnMemManager::DeRegisterMem() { - const QnnInterface& qnn_interface = implementation_.GetQnnInterface(); + const QnnInterface& qnn_interface = implementation_->GetQnnInterface(); Qnn_ErrorHandle_t error = QNN_SUCCESS; for (auto& it : registered_map_) { diff --git a/backends/qualcomm/runtime/backends/QnnMemManager.h b/backends/qualcomm/runtime/backends/QnnMemManager.h index 6a7f00b016a..0c5b3476b33 100644 --- a/backends/qualcomm/runtime/backends/QnnMemManager.h +++ b/backends/qualcomm/runtime/backends/QnnMemManager.h @@ -20,7 +20,7 @@ namespace qnn { class QnnMemManager { public: explicit QnnMemManager( - const QnnImplementation& implementation, + QnnImplementation* implementation, QnnContext* context, QnnExecuTorchLogLevel log_level) : implementation_(implementation), @@ -65,7 +65,7 @@ class QnnMemManager { private: void DeRegisterMem(); - const QnnImplementation& implementation_; + QnnImplementation* implementation_; QnnContext* context_; QnnExecuTorchLogLevel log_level_; // Store the registered Qnn_MemHandle_t for de-registration diff --git a/backends/qualcomm/runtime/backends/QnnProfiler.cpp b/backends/qualcomm/runtime/backends/QnnProfiler.cpp index fd580867db5..8345434a145 100644 --- a/backends/qualcomm/runtime/backends/QnnProfiler.cpp +++ b/backends/qualcomm/runtime/backends/QnnProfiler.cpp @@ -13,12 +13,12 @@ namespace backends { namespace qnn { QnnProfile::QnnProfile( - const QnnImplementation& implementation, + QnnImplementation* implementation, QnnBackend* backend, const QnnExecuTorchProfileLevel& profile_level) : handle_(nullptr), implementation_(implementation), backend_(backend) { if (profile_level != QnnExecuTorchProfileLevel::kProfileOff) { - const QnnInterface& qnn_interface = implementation_.GetQnnInterface(); + const QnnInterface& qnn_interface = implementation_->GetQnnInterface(); QnnProfile_Level_t qnnProfileLevel = 0; if (profile_level == QnnExecuTorchProfileLevel::kProfileBasic) { @@ -72,7 +72,7 @@ QnnProfile::QnnProfile( Qnn_ErrorHandle_t QnnProfile::ProfileData( executorch::runtime::EventTracer* event_tracer) { - const QnnInterface& qnn_interface = implementation_.GetQnnInterface(); + const QnnInterface& qnn_interface = implementation_->GetQnnInterface(); const QnnProfile_EventId_t* events_ptr = nullptr; const QnnProfile_EventId_t* sub_events_ptr = nullptr; std::uint32_t num_events = 0; @@ -167,7 +167,7 @@ Qnn_ErrorHandle_t QnnProfile::ProfileData( } QnnProfile::~QnnProfile() { - const QnnInterface& qnn_interface = implementation_.GetQnnInterface(); + const QnnInterface& qnn_interface = implementation_->GetQnnInterface(); if (handle_ != nullptr) { Qnn_ErrorHandle_t error = qnn_interface.qnn_profile_free(handle_); if (error != QNN_SUCCESS) { diff --git a/backends/qualcomm/runtime/backends/QnnProfiler.h b/backends/qualcomm/runtime/backends/QnnProfiler.h index e21385aca7d..de8fbd1d9d5 100644 --- a/backends/qualcomm/runtime/backends/QnnProfiler.h +++ b/backends/qualcomm/runtime/backends/QnnProfiler.h @@ -19,7 +19,7 @@ namespace qnn { class QnnProfile { public: explicit QnnProfile( - const QnnImplementation& implementation, + QnnImplementation* implementation, QnnBackend* backend, const QnnExecuTorchProfileLevel& profile_level); ~QnnProfile(); @@ -31,7 +31,7 @@ class QnnProfile { private: Qnn_ProfileHandle_t handle_; - const QnnImplementation& implementation_; + QnnImplementation* implementation_; QnnBackend* backend_; }; } // namespace qnn diff --git a/backends/qualcomm/runtime/backends/htpbackend/HtpBackend.h b/backends/qualcomm/runtime/backends/htpbackend/HtpBackend.h index 5b5b1586cdb..3e3f727ecea 100644 --- a/backends/qualcomm/runtime/backends/htpbackend/HtpBackend.h +++ b/backends/qualcomm/runtime/backends/htpbackend/HtpBackend.h @@ -16,7 +16,7 @@ namespace backends { namespace qnn { class HtpBackend : public QnnBackend { public: - HtpBackend(const QnnImplementation& implementation, QnnLogger* logger) + HtpBackend(QnnImplementation* implementation, QnnLogger* logger) : QnnBackend(implementation, logger) {} ~HtpBackend() {} diff --git a/backends/qualcomm/runtime/backends/htpbackend/HtpContext.h b/backends/qualcomm/runtime/backends/htpbackend/HtpContext.h index 88660db080a..d80ef95db4a 100644 --- a/backends/qualcomm/runtime/backends/htpbackend/HtpContext.h +++ b/backends/qualcomm/runtime/backends/htpbackend/HtpContext.h @@ -20,7 +20,7 @@ class QnnDlcManager; class HtpContext : public QnnContext { public: HtpContext( - const QnnImplementation& implementation, + QnnImplementation* implementation, QnnBackend* backend, QnnDevice* device, QnnBackendCache* cache, diff --git a/backends/qualcomm/runtime/backends/htpbackend/HtpDevice.cpp b/backends/qualcomm/runtime/backends/htpbackend/HtpDevice.cpp index 35a20048fc5..9c9713bae09 100644 --- a/backends/qualcomm/runtime/backends/htpbackend/HtpDevice.cpp +++ b/backends/qualcomm/runtime/backends/htpbackend/HtpDevice.cpp @@ -376,7 +376,7 @@ void HtpDevice::ReleasePerformanceVote() { Error HtpDevice::AfterCreateDevice() { if (IsPerfModeEnabled()) { - const QnnInterface& qnn_interface = implementation_.GetQnnInterface(); + const QnnInterface& qnn_interface = implementation_->GetQnnInterface(); Qnn_ErrorHandle_t error = QNN_SUCCESS; // Get htp_perf_infra diff --git a/backends/qualcomm/runtime/backends/htpbackend/HtpDevice.h b/backends/qualcomm/runtime/backends/htpbackend/HtpDevice.h index 9052deb6b52..5bfb4aa4b79 100644 --- a/backends/qualcomm/runtime/backends/htpbackend/HtpDevice.h +++ b/backends/qualcomm/runtime/backends/htpbackend/HtpDevice.h @@ -24,7 +24,7 @@ namespace qnn { class HtpDevice : public QnnDevice { public: HtpDevice( - const QnnImplementation& implementation, + QnnImplementation* implementation, QnnLogger* logger, const SocInfo* soc_info, const QnnExecuTorchHtpBackendOptions* htp_options) diff --git a/backends/qualcomm/runtime/backends/htpbackend/HtpGraph.h b/backends/qualcomm/runtime/backends/htpbackend/HtpGraph.h index c3add50d08b..e253643bba5 100644 --- a/backends/qualcomm/runtime/backends/htpbackend/HtpGraph.h +++ b/backends/qualcomm/runtime/backends/htpbackend/HtpGraph.h @@ -19,7 +19,7 @@ namespace qnn { class HtpGraph : public QnnGraph { public: HtpGraph( - const QnnImplementation& implementation, + QnnImplementation* implementation, QnnBackend* backend, QnnContext* context, const QnnExecuTorchProfileLevel& profile_level, diff --git a/backends/qualcomm/runtime/backends/irbackend/IrBackend.h b/backends/qualcomm/runtime/backends/irbackend/IrBackend.h index ddeb3a24460..72bb59c84f9 100644 --- a/backends/qualcomm/runtime/backends/irbackend/IrBackend.h +++ b/backends/qualcomm/runtime/backends/irbackend/IrBackend.h @@ -18,7 +18,7 @@ namespace backends { namespace qnn { class IrBackend : public QnnBackend { public: - IrBackend(const QnnImplementation& implementation, QnnLogger* logger) + IrBackend(QnnImplementation* implementation, QnnLogger* logger) : QnnBackend(implementation, logger) {} ~IrBackend() {} diff --git a/backends/qualcomm/runtime/backends/irbackend/aarch64/QnnDlcManager.cpp b/backends/qualcomm/runtime/backends/irbackend/aarch64/QnnDlcManager.cpp index d8c09dabcbe..6512b5730b5 100644 --- a/backends/qualcomm/runtime/backends/irbackend/aarch64/QnnDlcManager.cpp +++ b/backends/qualcomm/runtime/backends/irbackend/aarch64/QnnDlcManager.cpp @@ -19,9 +19,7 @@ namespace qnn { QnnDlcManager::QnnDlcManager( const QnnExecuTorchContextBinary& qnn_context_blob, const QnnExecuTorchOptions* options) - : qnn_loaded_backend_(""), - qnn_context_blob_(qnn_context_blob), - options_(options) { + : qnn_context_blob_(qnn_context_blob), options_(options) { if (options_ == nullptr) { QNN_EXECUTORCH_LOG_ERROR( "Fail to create QnnDlcManager, options is nullptr"); @@ -36,16 +34,18 @@ Error QnnDlcManager::Create() { return Error::Ok; } -Error QnnDlcManager::Configure() { +Error QnnDlcManager::Configure(const std::vector& graph_names) { return Error::Ok; } -Error QnnDlcManager::SetUpDlcEnvironment(const Qnn_Version_t& coreApiVersion) { +Error QnnDlcManager::SetUpDlcEnvironment( + const Qnn_Version_t& coreApiVersion, + const std::vector& graph_names) { return Error::Ok; } Error QnnDlcManager::RegisterGraphsFromDLC( - const QnnImplementation& implementation, + QnnImplementation* implementation, QnnBackend* backend, QnnContext* context, QnnBackendCache* cache) { @@ -103,7 +103,7 @@ Error QnnDlcManager::RegisterGraphsFromDLC( snprintf(dlc_path, sizeof(dlc_path), "/proc/self/fd/%d", fd); const QNN_INTERFACE_VER_TYPE& interfaceVer = - implementation.GetQnnInterface().GetInterfaceVer(); + implementation->GetQnnInterface().GetInterfaceVer(); if (composeGraphsFromDlc( /*backendHandle=*/backend->GetHandle(), @@ -133,9 +133,7 @@ Error QnnDlcManager::RegisterGraphsFromDLC( return Error::Ok; } -void QnnDlcManager::ResetBackendParams() {} -void QnnDlcManager::ResetLogger() {} -void QnnDlcManager::TerminateAllBackends() {} +void QnnDlcManager::Destroy() {} } // namespace qnn } // namespace backends diff --git a/backends/qualcomm/runtime/backends/irbackend/x86_64/QnnDlcManager.cpp b/backends/qualcomm/runtime/backends/irbackend/x86_64/QnnDlcManager.cpp index 280751cf160..f3300026b64 100644 --- a/backends/qualcomm/runtime/backends/irbackend/x86_64/QnnDlcManager.cpp +++ b/backends/qualcomm/runtime/backends/irbackend/x86_64/QnnDlcManager.cpp @@ -16,9 +16,7 @@ namespace qnn { QnnDlcManager::QnnDlcManager( const QnnExecuTorchContextBinary& qnn_context_blob, const QnnExecuTorchOptions* options) - : qnn_loaded_backend_(""), - qnn_context_blob_(qnn_context_blob), - options_(options) { + : qnn_context_blob_(qnn_context_blob), options_(options) { if (options_ == nullptr) { QNN_EXECUTORCH_LOG_ERROR( "Fail to create QnnDlcManager, options is nullptr"); @@ -26,52 +24,51 @@ QnnDlcManager::QnnDlcManager( } Error QnnDlcManager::LoadQnnIrLibrary() { - qnn_loaded_backend_ = QnnImplementation(library_name_); - Error ret = qnn_loaded_backend_.Load(nullptr); + backend_bundle_ptr_->implementation = + std::make_unique(library_name_); + Error ret = backend_bundle_ptr_->implementation->Load(nullptr); return ret; } Error QnnDlcManager::Create() { - backend_params_ptr_->qnn_backend_ptr_ = - std::make_unique(qnn_loaded_backend_, logger_.get()); + backend_bundle_ptr_->qnn_backend_ptr = std::make_unique( + backend_bundle_ptr_->implementation.get(), + backend_bundle_ptr_->qnn_logger_ptr.get()); - backend_params_ptr_->qnn_device_ptr_ = - std::make_unique(qnn_loaded_backend_, logger_.get()); + backend_bundle_ptr_->qnn_device_ptr = std::make_unique( + backend_bundle_ptr_->implementation.get(), + backend_bundle_ptr_->qnn_logger_ptr.get()); backend_params_ptr_->qnn_backend_cache_ptr_ = std::make_unique(qnn_context_blob_); backend_params_ptr_->qnn_context_ptr_ = std::make_unique( - qnn_loaded_backend_, - backend_params_ptr_->qnn_backend_ptr_.get(), - backend_params_ptr_->qnn_device_ptr_.get(), + backend_bundle_ptr_->implementation.get(), + backend_bundle_ptr_->qnn_backend_ptr.get(), + backend_bundle_ptr_->qnn_device_ptr.get(), backend_params_ptr_->qnn_backend_cache_ptr_.get(), nullptr); backend_params_ptr_->qnn_graph_ptr_ = std::make_unique( - qnn_loaded_backend_, - backend_params_ptr_->qnn_backend_ptr_.get(), + backend_bundle_ptr_->implementation.get(), + backend_bundle_ptr_->qnn_backend_ptr.get(), backend_params_ptr_->qnn_context_ptr_.get(), get_option(options_->profile_level())); backend_params_ptr_->backend_init_state_ = BackendInitializeState::INITIALIZED; - return backend_params_ptr_->qnn_backend_ptr_->VerifyQNNSDKVersion(); + return backend_bundle_ptr_->qnn_backend_ptr->VerifyQNNSDKVersion(); } -Error QnnDlcManager::Configure() { +Error QnnDlcManager::Configure(const std::vector& graph_names) { ET_CHECK_OR_RETURN_ERROR( backend_params_ptr_ != nullptr, Internal, "Failed to load Qnn backend."); - std::vector graph_names; - for (auto name : *options_->graph_name()) { - graph_names.emplace_back(name->str()); - } ET_CHECK_OR_RETURN_ERROR( backend_params_ptr_->qnn_backend_cache_ptr_->Configure(graph_names) == Error::Ok, Internal, "Fail to configure Qnn backend cache"); ET_CHECK_OR_RETURN_ERROR( - backend_params_ptr_->qnn_backend_ptr_->Configure( + backend_bundle_ptr_->qnn_backend_ptr->Configure( options_->op_package_options()) == Error::Ok, Internal, "Fail to configure Qnn backend"); @@ -92,7 +89,9 @@ Error QnnDlcManager::Configure() { return Error::Ok; } -Error QnnDlcManager::SetUpDlcEnvironment(const Qnn_Version_t& coreApiVersion) { +Error QnnDlcManager::SetUpDlcEnvironment( + const Qnn_Version_t& coreApiVersion, + const std::vector& graph_names) { ET_CHECK_MSG( (coreApiVersion.major >= 2 && coreApiVersion.minor >= 23), "Qnn API version %u.%u.%u is not supported for Qnn IR backend, The minimum supported version is 2.23.0 or QNN_SDK version 2.30.0", @@ -105,36 +104,33 @@ Error QnnDlcManager::SetUpDlcEnvironment(const Qnn_Version_t& coreApiVersion) { Internal, "Fail to Load Qnn IR library."); - logger_ = std::make_unique( - qnn_loaded_backend_, LoggingCallback, get_option(options_->log_level())); + backend_bundle_ptr_->qnn_logger_ptr = std::make_unique( + backend_bundle_ptr_->implementation.get(), + LoggingCallback, + get_option(options_->log_level())); ET_CHECK_OR_RETURN_ERROR( Create() == Error::Ok, Internal, "Failed to load Qnn IR backend."); ET_CHECK_OR_RETURN_ERROR( - Configure() == Error::Ok, Internal, "Fail to configure IR backend."); + Configure(graph_names) == Error::Ok, + Internal, + "Fail to configure IR backend."); return Error::Ok; } Error QnnDlcManager::RegisterGraphsFromDLC( - const QnnImplementation& implementation, + QnnImplementation* implementation, QnnBackend* backend, QnnContext* context, QnnBackendCache* cache) { return Error::Ok; } -void QnnDlcManager::ResetBackendParams() { +void QnnDlcManager::Destroy() { backend_params_ptr_.reset(new BackendConfigParameters()); -} - -void QnnDlcManager::ResetLogger() { - logger_.reset(); -} - -void QnnDlcManager::TerminateAllBackends() { - qnn_loaded_backend_.TerminateAllBackends(); + backend_bundle_ptr_.reset(new QnnBackendBundle()); } } // namespace qnn diff --git a/backends/qualcomm/serialization/qc_compiler_spec.fbs b/backends/qualcomm/serialization/qc_compiler_spec.fbs index 85affe3464d..4fb55ccbcdc 100644 --- a/backends/qualcomm/serialization/qc_compiler_spec.fbs +++ b/backends/qualcomm/serialization/qc_compiler_spec.fbs @@ -110,10 +110,6 @@ table QnnExecuTorchHtpBackendOptions { /// Signed or unsigned HTP PD session. The default PD session is unsigned. pd_session:QnnExecuTorchHtpPdSession; - /// Optional parameter specifying the directory of QNN Skel library. Only - /// useful for backends which have a Skel library. - skel_library_dir:string; - /// With using conv hmx with short depths, we might have better performance, /// but convolution that have short depth and/or weights that are not /// symmetric could exhibit inaccurate results. @@ -217,10 +213,6 @@ table QnnExecuTorchOptions { /// Optional backend specific options for the HTP backend. backend_options:QnnExecuTorchBackendOptions; - /// Optional parameter to create qnn graph if QNN context blob is not given - /// It could be a list of names only when doing weight-sharing lowering - graph_name:[string]; - /// Optional parameter to override the QNN backend library. library_path:string; diff --git a/backends/qualcomm/serialization/qc_schema.py b/backends/qualcomm/serialization/qc_schema.py index c188c555c41..02ccfd404aa 100644 --- a/backends/qualcomm/serialization/qc_schema.py +++ b/backends/qualcomm/serialization/qc_schema.py @@ -122,7 +122,6 @@ class QnnExecuTorchHtpBackendOptions: ) precision: QnnExecuTorchHtpPrecision = QnnExecuTorchHtpPrecision.kHtpQuantized pd_session: QnnExecuTorchHtpPdSession = QnnExecuTorchHtpPdSession.kHtpUnsignedPd - skel_library_dir: str = "" use_conv_hmx: bool = True use_dlbc: bool = False use_fold_relu: bool = True @@ -188,7 +187,6 @@ class QnnExecuTorchOpPackageOptions: class QnnExecuTorchOptions: soc_info: SocInfo backend_options: QnnExecuTorchBackendOptions - graph_name: List[str] = field(default_factory=lambda: ["forward"]) library_path: str = "" log_level: QnnExecuTorchLogLevel = QnnExecuTorchLogLevel.kLogOff online_prepare: bool = False diff --git a/backends/qualcomm/tests/test_qnn_delegate.py b/backends/qualcomm/tests/test_qnn_delegate.py index a9403f98b17..269d3fbe334 100644 --- a/backends/qualcomm/tests/test_qnn_delegate.py +++ b/backends/qualcomm/tests/test_qnn_delegate.py @@ -4531,10 +4531,8 @@ def test_qnn_backend_multi_graphs(self): generate_qnn_executorch_compiler_spec( soc_model=self.chipset_table[TestQNN.model], backend_options=backend_options, - graph_name=graph_name, ) - for graph_name in graph_names - ] + ] * len(graph_names) modules_dict = {} sample_inputs_dict = {} @@ -4739,11 +4737,7 @@ def test_qnn_backend_context_extraction(self): lowered_module = edge_prog_mgr.exported_program().graph_module._modules[ "lowered_module_0" ] - qnn_mgr = PyQnnManagerAdaptor.QnnManager( - lowered_module.compile_specs[0].value - ) - qnn_mgr.Init() - binary = qnn_mgr.StripProtocol(lowered_module.processed_bytes) + binary = PyQnnManagerAdaptor.StripProtocol(lowered_module.processed_bytes) validate(binary) def test_qnn_backend_dump_context_from_pte(self): @@ -5348,10 +5342,8 @@ def test_qnn_backend_multi_graphs(self): generate_qnn_executorch_compiler_spec( soc_model=self.chipset_table[TestQNN.model], backend_options=backend_options, - graph_name=graph_name, ) - for graph_name in graph_names - ] + ] * len(graph_names) modules_dict = {} sample_inputs_dict = {} compiler_specs_dict = {} @@ -5566,11 +5558,7 @@ def test_qnn_backend_context_extraction(self): lowered_module = edge_prog_mgr.exported_program().graph_module._modules[ "lowered_module_0" ] - qnn_mgr = PyQnnManagerAdaptor.QnnManager( - lowered_module.compile_specs[0].value - ) - qnn_mgr.Init() - binary = qnn_mgr.StripProtocol(lowered_module.processed_bytes) + binary = PyQnnManagerAdaptor.StripProtocol(lowered_module.processed_bytes) validate(binary) def test_qnn_backend_dump_context_from_pte(self): @@ -8376,6 +8364,8 @@ def test_cli(self): "--input_list", f"{tmp_dir}/input_list", ] + if self.host: + cmds.extend(["--host", self.host]) subprocess.run(cmds, stdout=subprocess.DEVNULL) self.assertTrue(os.path.isfile(f"{tmp_dir}/e_out/output_0_0.pt")) diff --git a/backends/qualcomm/utils/qnn_manager_lifecycle.py b/backends/qualcomm/utils/qnn_manager_lifecycle.py new file mode 100644 index 00000000000..2e1ba7fd2d7 --- /dev/null +++ b/backends/qualcomm/utils/qnn_manager_lifecycle.py @@ -0,0 +1,88 @@ +import contextlib +import logging +import threading +from typing import Dict, List + +import executorch.backends.qualcomm.python.PyQnnManagerAdaptor as PyQnnManager + +from executorch.backends.qualcomm.partition.utils import generate_qnn_executorch_option +from executorch.backends.qualcomm.serialization.qc_schema import ( + QnnExecuTorchBackendType, +) +from executorch.backends.qualcomm.serialization.qc_schema_serialize import ( + flatbuffer_to_option, +) +from executorch.exir.backend.compile_spec_schema import CompileSpec + +# Thread-local storage for QnnManager instances +_current_qnn_managers = threading.local() + + +class QnnManagerRegistry: + def __init__(self): + # Registry stores {backend_type: QnnManager instance} + self._registry = {} + + def get_or_create_qnn_manager( + self, backend_type: QnnExecuTorchBackendType, option: bytes + ) -> PyQnnManager.QnnManager: + if backend_type not in self._registry: + qnn_manager = PyQnnManager.QnnManager(option) + qnn_manager.InitBackend() + self._registry[backend_type] = qnn_manager + return self._registry[backend_type] + + def destroy_qnn_manager(self, backend_type: QnnExecuTorchBackendType): + if backend_type in self._registry: + self._registry[backend_type].Destroy() + del self._registry[backend_type] + else: + logging.warning( + f"Attempted to destroy non-existent QnnManager for backend type {backend_type.name}" + ) + + +@contextlib.contextmanager +def QnnManagerContext(compile_specs: Dict[str, List[CompileSpec]]): + # Create a new registry for the current context + current_context_registry = QnnManagerRegistry() + _current_qnn_managers.active_registry = current_context_registry + + backend_types_in_this_context = set() + + try: + for compile_spec_list in compile_specs.values(): + option = generate_qnn_executorch_option(compile_spec_list) + python_options = flatbuffer_to_option(option) + backend_type = python_options.backend_options.backend_type + + # Use the current_context_registry to get/create the manager + current_context_registry.get_or_create_qnn_manager(backend_type, option) + backend_types_in_this_context.add(backend_type) + yield + finally: + # Destroy only the managers created within this context + for backend_type in backend_types_in_this_context: + current_context_registry.destroy_qnn_manager(backend_type) + + # Clear the active registry reference + _current_qnn_managers.active_registry = None + + +def get_current_qnn_manager( + backend_type: QnnExecuTorchBackendType, compile_specs: List[CompileSpec] +) -> PyQnnManager.QnnManager: + """ + Retrieves the QnnManager instance active for the current QnnManagerContext invocation. + Return a new QnnManger if no QnnManager is active for the given backend_type in the current context. + """ + active_registry = getattr(_current_qnn_managers, "active_registry", None) + if active_registry is None or backend_type not in active_registry._registry: + logging.warning( + f"No QnnManager active for backend type {backend_type.name} in the current QnnManagerContext. " + "It would be better to use to_edge_transform_and_lower_to_qnn to lowering to QNN Backend." + ) + return QnnManagerRegistry().get_or_create_qnn_manager( + backend_type, generate_qnn_executorch_option(compile_specs) + ) + return active_registry._registry[backend_type] diff --git a/backends/qualcomm/utils/utils.py b/backends/qualcomm/utils/utils.py index 20a1d3c0f72..e4d0dc03d6d 100644 --- a/backends/qualcomm/utils/utils.py +++ b/backends/qualcomm/utils/utils.py @@ -50,6 +50,7 @@ QCOM_QNN_COMPILE_SPEC, QCOM_QUANTIZED_IO, ) +from executorch.backends.qualcomm.utils.qnn_manager_lifecycle import QnnManagerContext from executorch.exir import EdgeCompileConfig, ExirExportedProgram, to_edge from executorch.exir.backend.compile_spec_schema import CompileSpec @@ -185,8 +186,9 @@ def replace_linear(module: torch.nn.Module): def dump_context_from_pte(pte_path) -> List[str]: """ Dump compiled binaries under the same directory of pte_path. - For partitioned graph, there will be multiple files with names f"{graph_name}_{index}". - Where 'graph_name' comes from the compiler_specs and 'index' represents the execution order. + For partitioned graph, there will be multiple files with names f"{method_name}_{index}". + 'method_name' refers to the name of a method in the nn.Module that was traced to + generate this program, while 'index' indicates the order of execution. Args: pte_path (str): The path of generated pte. @@ -201,14 +203,6 @@ def dump_context_from_pte(pte_path) -> List[str]: program = deserialize_pte_binary(program_data).program ctx_path = os.path.dirname(pte_path) - dummy_compiler_specs = generate_qnn_executorch_compiler_spec( - soc_model=QcomChipset.SM8650, - backend_options=generate_htp_compiler_spec(use_fp16=False), - ) - qnn_mgr = PyQnnManagerAdaptor.QnnManager( - generate_qnn_executorch_option(dummy_compiler_specs) - ) - qnn_mgr.Init() dumpfiles = [] for execution_plan in program.execution_plan: for i, delegate in enumerate(execution_plan.delegates): @@ -216,7 +210,7 @@ def dump_context_from_pte(pte_path) -> List[str]: processed_bytes = program.backend_delegate_data[ delegate.processed.index ].data - binary = qnn_mgr.StripProtocol(processed_bytes) + binary = PyQnnManagerAdaptor.StripProtocol(processed_bytes) file_extension = ".bin" if len(binary) == 0: binary = processed_bytes @@ -442,15 +436,15 @@ def ensure_graph_specific_dict(value, graph_names): transform_passes[graph_name] = QnnPassManager().get_to_edge_transform_passes( ep, passes_job=passes_job[graph_name], dep_table=dep_table[graph_name] ) - - return to_edge_transform_and_lower( - aten_programs, - transform_passes=transform_passes, - partitioner=qnn_partitioners, - constant_methods=constant_methods, - compile_config=qnn_edge_config(), - generate_etrecord=generate_etrecord, - ) + with QnnManagerContext(compiler_specs): + return to_edge_transform_and_lower( + aten_programs, + transform_passes=transform_passes, + partitioner=qnn_partitioners, + constant_methods=constant_methods, + compile_config=qnn_edge_config(), + generate_etrecord=generate_etrecord, + ) def capture_program( @@ -988,7 +982,6 @@ def generate_qnn_executorch_compiler_spec( optrace: bool = False, shared_buffer: bool = False, is_from_context_binary: bool = False, - graph_name: str = "forward", op_package_options: QnnExecuTorchOpPackageOptions = None, ) -> List[CompileSpec]: """ @@ -1017,7 +1010,6 @@ def generate_qnn_executorch_compiler_spec( shared_buffer: Enables usage of shared buffer between application and backend for graph I/O. is_from_context_binary: True if current graph comes from pre-built context binary. - graph_name: Assign unique graph name if lowering multiple methods. op_package_options: Optional structure to specify op packages loaded and used by the backend. @@ -1042,7 +1034,6 @@ def generate_qnn_executorch_compiler_spec( qnn_executorch_options = QnnExecuTorchOptions( _soc_info_table[soc_model], backend_options ) - qnn_executorch_options.graph_name = [graph_name] qnn_executorch_options.log_level = ( QnnExecuTorchLogLevel.kLogLevelDebug if debug diff --git a/docs/source/backends-qualcomm.md b/docs/source/backends-qualcomm.md index 6c5397f02be..ea5aadfb8c0 100644 --- a/docs/source/backends-qualcomm.md +++ b/docs/source/backends-qualcomm.md @@ -27,7 +27,7 @@ Qualcomm AI Engine Direct is also referred to as QNN in the source and documenta is designed to provide unified, low-level APIs for AI development. Developers can interact with various accelerators on Qualcomm SoCs with these set of APIs, including -Kryo CPU, Adreno GPU, and Hexagon processors. More details can be found [here](https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-50/overview.html). +Kryo CPU, Adreno GPU, and Hexagon processors. More details can be found [here](https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-10/overview.html). Currently, this ExecuTorch Backend can delegate AI computations to Hexagon processors through Qualcomm AI Engine Direct APIs. diff --git a/examples/qualcomm/custom_op/README.md b/examples/qualcomm/custom_op/README.md index e3d6b216d8b..98f6886eefb 100644 --- a/examples/qualcomm/custom_op/README.md +++ b/examples/qualcomm/custom_op/README.md @@ -10,7 +10,7 @@ This folder contains examples demonstrating how to register custom operators int - Please finish [setup QNN backend](../../../docs/source/backends-qualcomm.md). -- Please follow [the instructions to install proper version of Hexagon SDK and Hexagon Tools.](https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-50/linux_setup.html#htp-and-dsp) +- Please follow [the instructions to install proper version of Hexagon SDK and Hexagon Tools.](https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-10/linux_setup.html#htp-and-dsp) - This example is verified with SM8650 (Snapdragon 8 Gen 3). - Install hexagon-sdk-5.4.0, hexagon-sdk-6.0.0, and hexagon tool 8.8.02 ```bash @@ -91,7 +91,7 @@ For now, only support one output tensors. * Data type: backend specific * Shape: Any -Consult the Qualcomm AI Engine Direct documentation for information on [generation op packages](https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-50/op_def_schema.html). +Consult the Qualcomm AI Engine Direct documentation for information on [generation op packages](https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-10/op_def_schema.html). ## Registering Op Packages After an op package library has been generated, certain information needs to be passed to the `compile_spec` in order to properly delegate the nodes. [The example script](custom_ops_1.py) shows how to construct the `QnnExecuTorchOpPackageOptions` and register op packages with the `compile spec`. diff --git a/examples/qualcomm/custom_op/custom_ops_1.py b/examples/qualcomm/custom_op/custom_ops_1.py index e84ee87a251..1745e2df7fa 100644 --- a/examples/qualcomm/custom_op/custom_ops_1.py +++ b/examples/qualcomm/custom_op/custom_ops_1.py @@ -69,16 +69,14 @@ def annotate_custom(gm: torch.fx.GraphModule) -> None: This function is specific for custom op. The source_fn of the rewritten nn module turns out to be "my_ops.mul3.default" """ - from executorch.backends.qualcomm.quantizer.annotators import ( - _is_annotated, - QUANT_ANNOTATION_KEY, - ) + from executorch.backends.qualcomm.quantizer.annotators import _is_annotated from executorch.backends.qualcomm.quantizer.qconfig import ( get_ptq_per_channel_quant_config, ) from torch.fx import Node from torchao.quantization.pt2e.quantizer import QuantizationAnnotation + from torchao.quantization.pt2e.quantizer.quantizer import Q_ANNOTATION_KEY quantization_config = get_ptq_per_channel_quant_config() for node in gm.graph.nodes: @@ -95,7 +93,7 @@ def annotate_custom(gm: torch.fx.GraphModule) -> None: input_spec = quantization_config.input_activation input_qspec_map[input_act] = input_spec - node.meta[QUANT_ANNOTATION_KEY] = QuantizationAnnotation( + node.meta[Q_ANNOTATION_KEY] = QuantizationAnnotation( input_qspec_map=input_qspec_map, output_qspec=quantization_config.output_activation, _annotated=True, diff --git a/examples/qualcomm/oss_scripts/llama/artifacts/stories260k_hybrid_llama_qnn.pte b/examples/qualcomm/oss_scripts/llama/artifacts/stories260k_hybrid_llama_qnn.pte index 198b96e5b9b..5dc70df4253 100644 Binary files a/examples/qualcomm/oss_scripts/llama/artifacts/stories260k_hybrid_llama_qnn.pte and b/examples/qualcomm/oss_scripts/llama/artifacts/stories260k_hybrid_llama_qnn.pte differ diff --git a/examples/qualcomm/oss_scripts/llama/llama.py b/examples/qualcomm/oss_scripts/llama/llama.py index 29212c7855b..aa0f09d413e 100755 --- a/examples/qualcomm/oss_scripts/llama/llama.py +++ b/examples/qualcomm/oss_scripts/llama/llama.py @@ -425,6 +425,9 @@ def compile( params_path = decoder_model_config.params_path with open(params_path) as f: kv_config = ModelArgs(**json.load(f)) + if args.decoder_model in {"gemma-2b", "gemma3-1b"}: + # For gemma, we have preprocessed the weight of rmsnorm + kv_config.norm_type = "rmsnorm" # get quant recipe quant_recipe: StaticLLMQuantRecipe = decoder_model_config.quant_recipe(True) @@ -808,10 +811,8 @@ def permute(w, heads, partial_rotary_dim): soc_model=get_soc_to_chipset_map()[args.model], backend_options=backend_options, shared_buffer=args.shared_buffer, - graph_name=graph_name, ) - for graph_name in graph_names - ] + ] * len(graph_names) llama_instance_list[1].save_logits_quant_attrs() edge_prog_mgr = to_edge_transform_and_lower_to_qnn( diff --git a/examples/qualcomm/qaihub_scripts/utils/README.md b/examples/qualcomm/qaihub_scripts/utils/README.md index f0fe7f6acca..ade61a1fa76 100644 --- a/examples/qualcomm/qaihub_scripts/utils/README.md +++ b/examples/qualcomm/qaihub_scripts/utils/README.md @@ -32,7 +32,7 @@ If users are interested in well-known applications, [Qualcomm AI HUB](https://ai # target chipset is `SM8650` python -m qai_hub_models.models.quicksrnetlarge_quantized.export --target-runtime qnn --chipset qualcomm-snapdragon-8gen3 ``` -* The compiled model library will be located under `$MY_WS/build/quicksrnetlarge_quantized/quicksrnetlarge_quantized.so`. This model library maps to the artifacts generated by SDK tools mentioned in `Integration workflow` section on [Qualcomm AI Engine Direct document](https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-50/overview.html). +* The compiled model library will be located under `$MY_WS/build/quicksrnetlarge_quantized/quicksrnetlarge_quantized.so`. This model library maps to the artifacts generated by SDK tools mentioned in `Integration workflow` section on [Qualcomm AI Engine Direct document](https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-10/overview.html). ### Compiling Program @@ -82,7 +82,7 @@ If users are interested in well-known applications, [Qualcomm AI HUB](https://ai # generally we would have same layout for input / output tensors: e.g. either NHWC or NCHW # this might not be true under different converter configurations # learn more with converter tool from Qualcomm AI Engine Direct documentation - # https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-50/tools.html#model-conversion + # https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-10/tools.html#model-conversion with open('output__142.pt', 'rb') as f: buffer = io.BytesIO(f.read()) img = torch.load(buffer, weights_only=False)