diff --git a/backends/qualcomm/CMakeLists.txt b/backends/qualcomm/CMakeLists.txt
index cc7957dfdbe..b03cbd1a99e 100644
--- a/backends/qualcomm/CMakeLists.txt
+++ b/backends/qualcomm/CMakeLists.txt
@@ -150,6 +150,7 @@ add_library(qnn_executorch_backend SHARED)
 add_library(qnn_executorch_header INTERFACE)
 add_library(qnn_executorch_logging STATIC)
 add_library(qnn_factory STATIC)
+add_library(qnn_backend_unified_registry STATIC)
 add_library(qnn_function_interface INTERFACE)
 add_library(qnn_graph STATIC)
 add_library(qnn_implementation STATIC)
@@ -213,13 +214,30 @@ target_link_libraries(
 )
 
 target_link_libraries(
-  qnn_dlc_manager PRIVATE qnn_factory qnn_backend qnn_device qnn_context
-                          qnn_graph qnn_mem_manager
+  qnn_backend_unified_registry PRIVATE qnn_schema qnn_backend qnn_device
+                                       qnn_implementation
 )
 
 target_link_libraries(
-  qnn_manager PRIVATE qnn_factory wrappers qnn_schema utils shared_buffer
-                      qnn_dlc_manager
+  qnn_dlc_manager
+  PRIVATE qnn_factory
+          qnn_backend_unified_registry
+          qnn_backend
+          qnn_device
+          qnn_context
+          qnn_graph
+          qnn_mem_manager
+)
+
+target_link_libraries(
+  qnn_manager
+  PRIVATE qnn_factory
+          qnn_backend_unified_registry
+          wrappers
+          qnn_schema
+          utils
+          shared_buffer
+          qnn_dlc_manager
 )
 target_link_libraries(
   qnn_executorch_backend
diff --git a/backends/qualcomm/aot/python/PyQnnManagerAdaptor.cpp b/backends/qualcomm/aot/python/PyQnnManagerAdaptor.cpp
index 2511cd96636..fc134d4f51b 100644
--- a/backends/qualcomm/aot/python/PyQnnManagerAdaptor.cpp
+++ b/backends/qualcomm/aot/python/PyQnnManagerAdaptor.cpp
@@ -28,15 +28,39 @@ std::string GetQnnSdkBuildId(std::string library_path) {
   if (err != QNN_SUCCESS || id == nullptr) {
     throw std::runtime_error("Failed to get QNN backend build ID");
   }
-  qnn_loaded_backend.TerminateAllBackends();
+  qnn_loaded_backend.Unload();
   return std::string(id);
 }
 
+py::array_t<char> StripProtocol(const py::bytes& preprocessed_binary) {
+  py::buffer_info info(py::buffer(preprocessed_binary).request());
+
+  void* buf_ptr = nullptr;
+  size_t buf_size = 0;
+  // check if it's a qnn context binary
+  auto [status, signature, ctx_size, ctx_bin] =
+      QnnContextCustomProtocol().DeserializeContextCustomBuffer(info.ptr);
+
+  if (status == Error::Ok) {
+    buf_size = ctx_size;
+    buf_ptr = ctx_bin;
+  } else {
+    // the format should be DLC, return nothing here
+    return py::array_t<char>(0);
+  }
+
+  auto result = py::array_t<char>(buf_size);
+  auto result_buffer = result.request();
+  std::memcpy(result_buffer.ptr, buf_ptr, buf_size);
+  return result;
+}
+
 PYBIND11_MODULE(PyQnnManagerAdaptor, m) {
   // TODO: Add related documents for configurations listed below
   using namespace qnn_delegate;
 
   m.def("GetQnnSdkBuildId", &GetQnnSdkBuildId);
+  m.def("StripProtocol", &StripProtocol);
   py::class_<QnnExecuTorchContextBinary>(m, "QnnExecuTorchContextBinary")
       .def(py::init<>());
 
@@ -49,6 +73,8 @@ PYBIND11_MODULE(PyQnnManagerAdaptor, m) {
       .def(py::init<const py::bytes&>())
       .def(py::init<const py::bytes&, const py::bytes&>())
       .def("Init", &PyQnnManager::Init)
+      .def("InitBackend", &PyQnnManager::InitBackend)
+      .def("InitContext", &PyQnnManager::InitContext)
       .def("IsNodeSupportedByBackend", &PyQnnManager::IsNodeSupportedByBackend)
       .def(
           "Compile",
@@ -57,6 +83,7 @@ PYBIND11_MODULE(PyQnnManagerAdaptor, m) {
               std::vector<std::vector<std::shared_ptr<OpWrapper>>>&>(
               &PyQnnManager::Compile))
       .def("Destroy", &PyQnnManager::Destroy)
+      .def("DestroyContext", &PyQnnManager::DestroyContext)
       .def("IsAvailable", &PyQnnManager::IsAvailable)
       .def("IsTensorDump", &PyQnnManager::IsTensorDump)
       .def("AllocateTensor", &PyQnnManager::AllocateTensor)
@@ -66,8 +93,7 @@ PYBIND11_MODULE(PyQnnManagerAdaptor, m) {
       .def("GetSpillFillBufferSize", &PyQnnManager::GetSpillFillBufferSize)
       .def(
           "MakeBinaryInfo",
-          py::overload_cast<const py::bytes&>(&PyQnnManager::MakeBinaryInfo))
-      .def("StripProtocol", &PyQnnManager::StripProtocol);
+          py::overload_cast<const py::bytes&>(&PyQnnManager::MakeBinaryInfo));
 }
 } // namespace qnn
 } // namespace backends
diff --git a/backends/qualcomm/aot/python/PyQnnManagerAdaptor.h b/backends/qualcomm/aot/python/PyQnnManagerAdaptor.h
index c8044e5db0e..9c2bd18b1f7 100644
--- a/backends/qualcomm/aot/python/PyQnnManagerAdaptor.h
+++ b/backends/qualcomm/aot/python/PyQnnManagerAdaptor.h
@@ -50,7 +50,24 @@ class PyQnnManager {
   }
 
   executorch::runtime::Error Init() {
-    return qnn_manager_->Init();
+    ET_CHECK_OR_RETURN_ERROR(
+        qnn_manager_->InitBackend() == Error::Ok,
+        Internal,
+        "Fail to initailize backend");
+    ET_CHECK_OR_RETURN_ERROR(
+        qnn_manager_->InitContext() == Error::Ok,
+        Internal,
+        "Fail to initailize context");
+    return Error::Ok;
+  }
+
+  executorch::runtime::Error InitBackend() {
+    return qnn_manager_->InitBackend();
+  }
+
+  executorch::runtime::Error InitContext(
+      const std::vector<std::string>& graph_names) {
+    return qnn_manager_->InitContext(std::optional{graph_names});
   }
 
   bool IsNodeSupportedByBackend(
@@ -90,6 +107,10 @@ class PyQnnManager {
     return qnn_manager_->Destroy();
   }
 
+  void DestroyContext() {
+    return qnn_manager_->DestroyContext();
+  }
+
   bool IsAvailable() {
     return qnn_manager_->IsAvailable();
   }
@@ -148,29 +169,6 @@ class PyQnnManager {
     return result;
   }
 
-  py::array_t<char> StripProtocol(const py::bytes& preprocessed_binary) {
-    py::buffer_info info(py::buffer(preprocessed_binary).request());
-
-    void* buf_ptr = nullptr;
-    size_t buf_size = 0;
-    // check if it's a qnn context binary
-    auto [status, signature, ctx_size, ctx_bin] =
-        QnnContextCustomProtocol().DeserializeContextCustomBuffer(info.ptr);
-
-    if (status == Error::Ok) {
-      buf_size = ctx_size;
-      buf_ptr = ctx_bin;
-    } else {
-      // the format should be DLC, return nothing here
-      return py::array_t<char>(0);
-    }
-
-    auto result = py::array_t<char>(buf_size);
-    auto result_buffer = result.request();
-    std::memcpy(result_buffer.ptr, buf_ptr, buf_size);
-    return result;
-  }
-
  private:
   // Store the bytes object instead of a raw pointer so that this module will
   // keep the bytes alive.
@@ -178,7 +176,6 @@ class PyQnnManager {
   QnnExecuTorchContextBinary qnn_executorch_context_binary_;
   std::shared_ptr<QnnManager> qnn_manager_;
   QnnContextCustomProtocol custom_context_custom_buffer_;
-  flatbuffers::FlatBufferBuilder builder_;
 };
 } // namespace qnn
 } // namespace backends
diff --git a/backends/qualcomm/builders/README.md b/backends/qualcomm/builders/README.md
index 2f1c2d54828..437eb85859c 100644
--- a/backends/qualcomm/builders/README.md
+++ b/backends/qualcomm/builders/README.md
@@ -18,8 +18,8 @@ Thank you for contributing to Qualcomm AI Engine Direct delegate for ExecuTorch.
 
 ## References
 ### Qualcomm AI Engine Direct
-- [Operator Definitions](https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-50/MasterOpDef.html)
-- [Supported Operators in Backends](https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-50/operations.html#backend-supplements)
+- [Operator Definitions](https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-10/MasterOpDef.html)
+- [Supported Operators in Backends](https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-10/operations.html#backend-supplements)
 
 ### PyTorch
 - [torch.nn Operator Definitions](https://pytorch.org/docs/stable/nn.html)
@@ -124,9 +124,9 @@ It will provide more hint to the source PyTorch layer where the missing operator
         };
     } Qnn_Param_t;
     ```
-    The name value equals to the parameter name described in [Operator Definitions](https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-50/MasterOpDef.html), there are `epsilon`, `axes` for `LayerNorm` case.<br/>
+    The name value equals to the parameter name described in [Operator Definitions](https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-10/MasterOpDef.html), there are `epsilon`, `axes` for `LayerNorm` case.<br/>
 
-    If you find it hard to correlate missing operator with documentation, this [table](https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-50/SupportedOps.html) might be helpful for searching. In some cases, an exact match may not exist. Consider seeking for a math equivalent approach or notify maintainer for further analysis.
+    If you find it hard to correlate missing operator with documentation, this [table](https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-10/SupportedOps.html) might be helpful for searching. In some cases, an exact match may not exist. Consider seeking for a math equivalent approach or notify maintainer for further analysis.
 
 - **PyTorch**:<br/>
     We could also read the IO spec from [function declaration](https://github.com/pytorch/pytorch/blob/main/aten/src/ATen/native/layer_norm.cpp) mentioned in [PyTorch Documentation](#pytorch):
diff --git a/backends/qualcomm/debugger/utils.py b/backends/qualcomm/debugger/utils.py
index d6b91c83996..29daa1f8784 100644
--- a/backends/qualcomm/debugger/utils.py
+++ b/backends/qualcomm/debugger/utils.py
@@ -348,8 +348,8 @@ def generate_optrace(
         qnn_binary_file="forward_0.dlc",
     ):
         """
-        Generate Qnn HTP Optrace Profiling https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-50/htp_backend.html#qnn-htp-optrace-profiling
-        and QNN HTP Analysis Summary (QHAS) https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-50/htp_backend.html#qnn-htp-analysis-summary-qhas
+        Generate Qnn HTP Optrace Profiling https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-10/htp_backend.html#qnn-htp-optrace-profiling
+        and QNN HTP Analysis Summary (QHAS) https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-10/htp_backend.html#qnn-htp-analysis-summary-qhas
         . You can utilize the QAIRT Visualizer (https://pypi.org/project/qairt-visualizer/) to visualize the results from the files above.
         """
         graph_name, file_extension = os.path.splitext(qnn_binary_file)
diff --git a/backends/qualcomm/partition/qnn_partitioner.py b/backends/qualcomm/partition/qnn_partitioner.py
index 19e998f59a3..c57bad3cee3 100644
--- a/backends/qualcomm/partition/qnn_partitioner.py
+++ b/backends/qualcomm/partition/qnn_partitioner.py
@@ -8,7 +8,6 @@
 from collections import defaultdict
 from typing import Any, Callable, Dict, List, Optional, Tuple
 
-import executorch.backends.qualcomm.python.PyQnnManagerAdaptor as PyQnnManager
 import torch
 from executorch.backends.qualcomm.builders import node_visitor_manager
 from executorch.backends.qualcomm.builders.qnn_constants import OpContextLoader
@@ -21,6 +20,9 @@
     QCOM_BYPASS_NODE,
 )
 
+from executorch.backends.qualcomm.utils.qnn_manager_lifecycle import (
+    get_current_qnn_manager,
+)
 from executorch.exir.backend.backend_details import CompileSpec
 from executorch.exir.backend.canonical_partitioners.pattern_op_partitioner import (
     generate_partitions_from_list_of_nodes,
@@ -55,7 +57,8 @@ def __init__(
         skip_node_id_set: set = None,
         skip_node_op_set: set = None,
     ):
-        python_options = flatbuffer_to_option(compiler_specs[0].value)
+        option = generate_qnn_executorch_option(compiler_specs)
+        python_options = flatbuffer_to_option(option)
         self.node_visitors = node_visitor_manager.get_node_visitors(
             edge_program,
             op_package_infos=python_options.op_package_options.op_package_infos,
@@ -64,12 +67,10 @@ def __init__(
         self.skip_node_op_set = skip_node_op_set
         self.skip_node_id_set = skip_node_id_set
         self.nodes_to_wrappers = defaultdict(dict)
-        self.qnn_manager = PyQnnManager.QnnManager(
-            generate_qnn_executorch_option(compiler_specs)
+        self.qnn_manager = get_current_qnn_manager(
+            python_options.backend_options.backend_type, compiler_specs
         )
 
-        self.qnn_manager.Init()
-
     def is_node_supported(self, _, node: torch.fx.Node) -> bool:
         if node.op != "call_function" or node.target in not_supported_operator:
             return False
@@ -118,9 +119,6 @@ def is_node_supported(self, _, node: torch.fx.Node) -> bool:
         print(f"[QNN Partitioner Op Support]: {node.target.__name__} | {supported}")
         return supported
 
-    def __del__(self):
-        self.qnn_manager.Destroy()
-
 
 class QnnPartitioner(Partitioner):
     """
diff --git a/backends/qualcomm/qnn_preprocess.py b/backends/qualcomm/qnn_preprocess.py
index 4e9cda21d02..4bdcfaa3fd1 100644
--- a/backends/qualcomm/qnn_preprocess.py
+++ b/backends/qualcomm/qnn_preprocess.py
@@ -8,8 +8,6 @@
 from collections import defaultdict
 from typing import Dict, final, List
 
-import executorch.backends.qualcomm.python.PyQnnManagerAdaptor as PyQnnManager
-
 import torch  # noqa: F401
 from executorch.backends.qualcomm._passes.qnn_pass_manager import QnnPassManager
 from executorch.backends.qualcomm.builders.node_visitor_manager import get_node_visitors
@@ -20,7 +18,9 @@
 )
 from executorch.backends.qualcomm.serialization.qc_schema_serialize import (
     flatbuffer_to_option,
-    option_to_flatbuffer,
+)
+from executorch.backends.qualcomm.utils.qnn_manager_lifecycle import (
+    get_current_qnn_manager,
 )
 from executorch.exir.backend.backend_details import (
     BackendDetails,
@@ -30,6 +30,7 @@
 from torch.export.exported_program import ExportedProgram
 
 DEFAULT_DEBUG_HANDLE = 65535
+DEFAULT_GRAPH_NAME = "forward"
 
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.DEBUG)
@@ -99,9 +100,11 @@ def preprocess(
         compile_specs: List[CompileSpec],
     ) -> PreprocessResult:
         option = generate_qnn_executorch_option(compile_specs)
-        qnn_manager = PyQnnManager.QnnManager(option)
-        qnn_manager.Init()
         obj_options = flatbuffer_to_option(option)
+        qnn_manager = get_current_qnn_manager(
+            obj_options.backend_options.backend_type, compile_specs
+        )
+        qnn_manager.InitContext([DEFAULT_GRAPH_NAME])
         py_op_wrapper_list = QnnBackend._build_op_wrappers(
             edge_program,
             qnn_manager.IsTensorDump(),
@@ -118,7 +121,7 @@ def preprocess(
                 f"Record all QNN API calls from saver backend at: {obj_options.saver_output_dir}"
             )
         assert len(qnn_context_binary) != 0, "Failed to generate Qnn context binary."
-        qnn_manager.Destroy()
+        qnn_manager.DestroyContext()
         # For now, debug_handle_map is not used by QNN ExecuTorch
         return PreprocessResult(
             processed_bytes=bytes(qnn_context_binary),
@@ -132,12 +135,9 @@ def preprocess_multimethod(
     ) -> PreprocessResult:
         # TODO: refactor QnnManager to consume multiple compile_spec
         # take first compile_specs here for the same partitions
-        graph_name = list(edge_programs.keys())
+        graph_names = list(edge_programs.keys())
         compile_spec = list(compile_specs.values())[0][0]
-        # gather all graph names
         option = flatbuffer_to_option(compile_spec[0].value)
-        option.graph_name = graph_name
-        compile_spec[0].value = option_to_flatbuffer(option)
         # check if each graph has equal number of partitions
         num_sub_graphs = set()
         for edge_program in edge_programs.values():
@@ -149,15 +149,15 @@ def preprocess_multimethod(
 
         all_processed_results = {key: [] for key in edge_programs.keys()}
         num_sub_graphs = next(iter(num_sub_graphs))
+        qnn_manager = get_current_qnn_manager(
+            option.backend_options.backend_type, compile_spec
+        )
         for i in range(num_sub_graphs):
             # e.g. 2 methods (x, y) with 3 partitions
             #      > context_binary_0: [x.subgraph_0, y.subgraph_0]
             #      > context_binary_1: [x.subgraph_1, y.subgraph_1]
             #      > context_binary_2: [x.subgraph_2, y.subgraph_2]
-            qnn_manager = PyQnnManager.QnnManager(
-                generate_qnn_executorch_option(compile_spec)
-            )
-            qnn_manager.Init()
+            qnn_manager.InitContext(graph_names)
             py_op_wrapper_list, ctx_binary_list = [], []
             for j, programs in enumerate(edge_programs.values()):
                 logger.info(f"Processing Method({j}): ({i+1}/{num_sub_graphs})")
@@ -177,7 +177,9 @@ def preprocess_multimethod(
                     )
 
             if len(py_op_wrapper_list) == len(edge_programs.values()):
-                qnn_context_binary = qnn_manager.Compile(graph_name, py_op_wrapper_list)
+                qnn_context_binary = qnn_manager.Compile(
+                    graph_names, py_op_wrapper_list
+                )
                 if option.saver:
                     # TODO: Currently, only the first method is saved. Update this logic if saving multiple methods becomes necessary in the future.
                     exit(
@@ -186,7 +188,7 @@ def preprocess_multimethod(
                 assert (
                     len(qnn_context_binary) != 0
                 ), "Failed to generate Qnn context binary."
-                qnn_manager.Destroy()
+                qnn_manager.DestroyContext()
                 # methods should share the same context binary for current partition
                 for key in edge_programs.keys():
                     all_processed_results[key].append(
diff --git a/backends/qualcomm/quantizer/README.md b/backends/qualcomm/quantizer/README.md
index 6870ecc76ac..6954b6e05b7 100644
--- a/backends/qualcomm/quantizer/README.md
+++ b/backends/qualcomm/quantizer/README.md
@@ -9,7 +9,7 @@ Thank you for contributing to Qualcomm AI Engine Direct delegate for ExecuTorch.
 
 ## References
 ### Qualcomm AI Engine Direct
-- [Operator Definitions for HTP](https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-50/HtpOpDefSupplement.html)
+- [Operator Definitions for HTP](https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-10/HtpOpDefSupplement.html)
 
 ### PyTorch
 - [ATen Operator Definitions](https://github.com/pytorch/pytorch/tree/main/aten/src/ATen/native)
@@ -66,7 +66,7 @@ def annotate_xxx(node: Node, quantization_config: QuantizationConfig) -> None:
 - __quantization_config__: data structure describing quantization configurations for IO activation / weight / bias
 
 ### Example of Conv2d Annotation
-Conv2d accepts up to three input tensors: `input activation`, `kernel`, `bias`. There are constraints imposed by [Qualcomm AI Engine Direct Manual](https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-50/HtpOpDefSupplement.html#conv2d).<br/>
+Conv2d accepts up to three input tensors: `input activation`, `kernel`, `bias`. There are constraints imposed by [Qualcomm AI Engine Direct Manual](https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-10/HtpOpDefSupplement.html#conv2d).<br/>
 Take 8-bit fixed point as example:
 - __weight__: must be symmetrically quantized if per-channel observer is applied
 - __bias__: must have `QNN_DATATYPE_SFIXED_POINT_32` and be symmetrically quantized with expected encoding `scales = weight.scales * input.scale`, `offset = 0` if per-channel observer is applied.
@@ -147,13 +147,13 @@ Now, we can start to fill in the function body:
 
 - Update node's meta with framework compatible data structure
     ```python
-        node.meta[QUANT_ANNOTATION_KEY] = QuantizationAnnotation(
+        node.meta[Q_ANNOTATION_KEY] = QuantizationAnnotation(
             input_qspec_map=input_qspec_map,
             output_qspec=quantization_config.output_activation,
             _annotated=True,
         )
     ```
-    After done processing `input_qspec_map`, it's required to have it in node's meta with special tag (`QUANT_ANNOTATION_KEY`) for `convert_pt2e` to properly insert observers.
+    After done processing `input_qspec_map`, it's required to have it in node's meta with special tag (`Q_ANNOTATION_KEY`) for `convert_pt2e` to properly insert observers.
 
 ### Common Annotators
 For operators without extra parameters to be observed, there are pre-defined annotation method for convenience:
diff --git a/backends/qualcomm/runtime/QnnExecuTorchBackend.cpp b/backends/qualcomm/runtime/QnnExecuTorchBackend.cpp
index 988c4b84a68..41c2370e4cb 100644
--- a/backends/qualcomm/runtime/QnnExecuTorchBackend.cpp
+++ b/backends/qualcomm/runtime/QnnExecuTorchBackend.cpp
@@ -90,7 +90,11 @@ Result<DelegateHandle*> QnnExecuTorchBackend::init(
   }
 
   ET_CHECK_OR_RETURN_ERROR(
-      qnn_manager->Init() == Error::Ok,
+      qnn_manager->InitBackend() == Error::Ok,
+      Internal,
+      "Fail to initialize Qnn Manager");
+  ET_CHECK_OR_RETURN_ERROR(
+      qnn_manager->InitContext() == Error::Ok,
       Internal,
       "Fail to initialize Qnn Manager");
 
diff --git a/backends/qualcomm/runtime/QnnManager.cpp b/backends/qualcomm/runtime/QnnManager.cpp
index 5e3220f25d9..558f3ec0a10 100644
--- a/backends/qualcomm/runtime/QnnManager.cpp
+++ b/backends/qualcomm/runtime/QnnManager.cpp
@@ -54,15 +54,9 @@ QnnManager::~QnnManager() {
 QnnManager::QnnManager(
     const QnnExecuTorchOptions* options,
     const QnnExecuTorchContextBinary& qnn_executorch_context_binary)
-    : qnn_context_blob_(qnn_executorch_context_binary),
-      qnn_loaded_backend_(""),
-      // options' life cycle is decided by compiler specs which is
-      // kept by executorch runtime framework
-      // please pay attention to any potential seg fault
-      options_(options) {
+    : qnn_context_blob_(qnn_executorch_context_binary), options_(options) {
   QnnExecuTorchBackendType backend_type =
       options->backend_options()->backend_type();
-  std::string library_path = options->library_path()->str();
 
   if (get_option(options_->log_level()) >=
       QnnExecuTorchLogLevel::kLogLevelInfo) {
@@ -71,10 +65,8 @@ QnnManager::QnnManager(
         EnumNameQcomChipset(options_->soc_info()->soc_model()));
     QNN_EXECUTORCH_LOG_INFO(
         "backend_type: %s", EnumNameQnnExecuTorchBackendType(backend_type));
-    for (auto name : *options_->graph_name()) {
-      QNN_EXECUTORCH_LOG_INFO("graph_name: %s", name->c_str());
-    }
-    QNN_EXECUTORCH_LOG_INFO("library_path: %s", library_path.c_str());
+    QNN_EXECUTORCH_LOG_INFO(
+        "library_path: %s", options->library_path()->str().c_str());
     QNN_EXECUTORCH_LOG_INFO("dump intermediate outputs: %s", IsTensorDump());
     QNN_EXECUTORCH_LOG_INFO(
         "log_level: %s",
@@ -95,35 +87,13 @@ QnnManager::QnnManager(
         options_->op_package_options()->op_package_infos()->size());
   }
 
-  if (library_path.empty()) {
-    switch (backend_type) {
-      case QnnExecuTorchBackendType::kHtpBackend:
-        library_path = htp_library_name_;
-        break;
-      case QnnExecuTorchBackendType::kDspBackend:
-        library_path = dsp_library_name_;
-        break;
-      case QnnExecuTorchBackendType::kGpuBackend:
-        library_path = gpu_library_name_;
-        break;
-      default:
-        QNN_EXECUTORCH_LOG_ERROR("Unknown backend type: %d", backend_type);
-        break;
-    }
-  }
-  qnn_loaded_backend_ = QnnImplementation(library_path);
   backend_params_ptr_ = std::make_unique<BackendConfigParameters>();
+  backend_bundle_ptr_ = std::make_shared<QnnBackendBundle>();
 
   qnn_dlc_manager_ =
       std::make_shared<QnnDlcManager>(qnn_context_blob_, options_);
 }
 
-Error QnnManager::LoadQnnLibrary() {
-  auto config = GetImplementationConfig();
-  Error ret = qnn_loaded_backend_.Load(config.get());
-  return ret;
-}
-
 Error QnnManager::PreRegisterMem() {
   SharedBuffer& shared_buffer_manager = SharedBuffer::GetSharedBufferManager();
   for (const auto info : shared_buffer_manager.GetCustomMemTensorInfoSet()) {
@@ -299,15 +269,20 @@ Error QnnManager::RegisterCustomMem(
   return Error::Ok;
 }
 
-Error QnnManager::Init() {
+Error QnnManager::InitBackend() {
+  // Get or create the shared backend bundle
+  Error err = QnnBackendUnifiedRegistry::GetInstance().GetOrCreateBackendBundle(
+      options_, backend_bundle_ptr_);
   ET_CHECK_OR_RETURN_ERROR(
-      LoadQnnLibrary() == Error::Ok, Internal, "Fail to load Qnn library");
-  logger_ = std::make_unique<QnnLogger>(
-      qnn_loaded_backend_, LoggingCallback, get_option(options_->log_level()));
-  std::vector<std::string> graph_names;
-  for (auto name : *options_->graph_name()) {
-    graph_names.emplace_back(name->str());
-  }
+      err == Error::Ok,
+      Internal,
+      "Fail to get or create shared Qnn backend bundle. Error code: %d",
+      static_cast<int>(err));
+  return Error::Ok;
+}
+
+Error QnnManager::InitContext(
+    std::optional<std::vector<std::string>> graph_names) {
   if (backend_params_ptr_->backend_init_state_ ==
       BackendInitializeState::UNINITIALIZED) {
     QNN_EXECUTORCH_LOG_INFO(
@@ -315,8 +290,9 @@ Error QnnManager::Init() {
         "parameters for Qnn executorch backend type %d",
         options_->backend_options()->backend_type());
     backend_params_ptr_ = QnnBackendFactory().Create(
-        qnn_loaded_backend_,
-        logger_.get(),
+        backend_bundle_ptr_->implementation.get(),
+        backend_bundle_ptr_->qnn_backend_ptr.get(),
+        backend_bundle_ptr_->qnn_device_ptr.get(),
         qnn_context_blob_,
         options_,
         qnn_dlc_manager_.get());
@@ -324,20 +300,13 @@ Error QnnManager::Init() {
         backend_params_ptr_ != nullptr,
         Internal,
         "Failed to load Qnn backend.");
+    // Note: For online_prepare or deserialization, the graph name will be
+    // obtained from the binary.
     ET_CHECK_OR_RETURN_ERROR(
-        backend_params_ptr_->qnn_backend_cache_ptr_->Configure(graph_names) ==
-            Error::Ok,
+        backend_params_ptr_->qnn_backend_cache_ptr_->Configure(
+            graph_names.value_or(std::vector<std::string>{})) == Error::Ok,
         Internal,
         "Fail to configure Qnn backend cache");
-    ET_CHECK_OR_RETURN_ERROR(
-        backend_params_ptr_->qnn_backend_ptr_->Configure(
-            options_->op_package_options()) == Error::Ok,
-        Internal,
-        "Fail to configure Qnn backend");
-    ET_CHECK_OR_RETURN_ERROR(
-        backend_params_ptr_->qnn_device_ptr_->Configure() == Error::Ok,
-        Internal,
-        "Fail to configure Qnn device");
     ET_CHECK_OR_RETURN_ERROR(
         backend_params_ptr_->qnn_context_ptr_->Configure() == Error::Ok,
         Internal,
@@ -363,13 +332,15 @@ Error QnnManager::Init() {
 #endif
 
   if (IsOnlinePrepare()) {
+    // Check whether the QNN version supports the DLC format.
     Qnn_ApiVersion_t qnn_version = {QNN_VERSION_INIT};
-    qnn_loaded_backend_.GetQnnInterface().qnn_backend_get_api_version(
-        &qnn_version);
+    backend_bundle_ptr_->implementation->GetQnnInterface()
+        .qnn_backend_get_api_version(&qnn_version);
 
     ET_CHECK_OR_RETURN_ERROR(
-        qnn_dlc_manager_->SetUpDlcEnvironment(qnn_version.coreApiVersion) ==
-            Error::Ok,
+        qnn_dlc_manager_->SetUpDlcEnvironment(
+            qnn_version.coreApiVersion,
+            graph_names.value_or(std::vector<std::string>{})) == Error::Ok,
         Internal,
         "Fail to setup Dlc environment");
   }
@@ -514,13 +485,14 @@ Error QnnManager::ProfileExecuteData(
 }
 
 void QnnManager::Destroy() {
-  QNN_EXECUTORCH_LOG_INFO("Destroy Qnn backend parameters");
   backend_params_ptr_.reset(new BackendConfigParameters());
-  qnn_dlc_manager_->ResetBackendParams();
-  logger_.reset();
-  qnn_dlc_manager_->ResetLogger();
-  qnn_loaded_backend_.TerminateAllBackends();
-  qnn_dlc_manager_->TerminateAllBackends();
+  backend_bundle_ptr_.reset(new QnnBackendBundle());
+  qnn_dlc_manager_->Destroy();
+}
+
+void QnnManager::DestroyContext() {
+  backend_params_ptr_.reset(new BackendConfigParameters());
+  qnn_dlc_manager_->Destroy();
 }
 
 bool QnnManager::IsNodeSupportedByBackend(
@@ -540,7 +512,7 @@ bool QnnManager::IsNodeSupportedByBackend(
       }
     }
 
-    error = backend_params_ptr_->qnn_backend_ptr_->BackendValidateOpConfig(
+    error = backend_bundle_ptr_->qnn_backend_ptr->BackendValidateOpConfig(
         op_wrapper->GetOpConfig());
     if (error != QNN_SUCCESS) {
       QNN_EXECUTORCH_LOG_WARN(
diff --git a/backends/qualcomm/runtime/QnnManager.h b/backends/qualcomm/runtime/QnnManager.h
index c01a537f7bd..ec9973039bc 100644
--- a/backends/qualcomm/runtime/QnnManager.h
+++ b/backends/qualcomm/runtime/QnnManager.h
@@ -13,6 +13,7 @@
 #include <executorch/backends/qualcomm/runtime/Logging.h>
 #include <executorch/backends/qualcomm/runtime/QnnExecuTorch.h>
 #include <executorch/backends/qualcomm/runtime/backends/QnnBackendFactory.h>
+#include <executorch/backends/qualcomm/runtime/backends/QnnBackendUnifiedRegistry.h>
 #include <executorch/backends/qualcomm/runtime/backends/QnnDlcManager.h>
 #include <executorch/runtime/core/error.h>
 
@@ -30,7 +31,9 @@ class QnnManager {
       const QnnExecuTorchContextBinary& qnn_executorch_context_binary);
 
   ~QnnManager();
-  executorch::runtime::Error Init();
+  executorch::runtime::Error InitBackend();
+  executorch::runtime::Error InitContext(
+      std::optional<std::vector<std::string>> graph_names = std::nullopt);
   executorch::runtime::Error AllocateTensor(const std::string& graph_name);
   executorch::runtime::Error AllocateTensor(
       const std::string& graph_name,
@@ -47,7 +50,11 @@ class QnnManager {
       const std::string& graph_name,
       executorch::runtime::EventTracer* event_tracer);
 
+  // Destroy all QNN components and decrease reference count of shared QNN
+  // resource
   void Destroy();
+  // Only destroy non-shared QNN components
+  void DestroyContext();
 
   bool IsAvailable() {
     return true;
@@ -103,35 +110,11 @@ class QnnManager {
     return backend_params_ptr_->qnn_context_ptr_->GetGraphNames();
   }
 
-  std::string GetBinarySignature();
-
  private:
-  std::unique_ptr<const QnnSaver_Config_t*[]> GetImplementationConfig() {
-    if (options_->saver()) {
-      auto outputDirCfg = std::make_unique<QnnSaver_Config_t>();
-      outputDirCfg->option = QNN_SAVER_CONFIG_OPTION_OUTPUT_DIRECTORY;
-      outputDirCfg->outputDirectory = options_->saver_output_dir()->c_str();
-
-      auto saverCfg = std::make_unique<const QnnSaver_Config_t*[]>(2);
-      saverCfg[0] = outputDirCfg.release();
-      saverCfg[1] = nullptr;
-
-      return saverCfg;
-    } else {
-      return nullptr;
-    }
-  }
-
-  executorch::runtime::Error LoadQnnLibrary();
-
-  static constexpr const char* htp_library_name_ = "libQnnHtp.so";
-  static constexpr const char* gpu_library_name_ = "libQnnGpu.so";
-  static constexpr const char* dsp_library_name_ = "libQnnDsp.so";
-
   QnnExecuTorchContextBinary qnn_context_blob_;
   std::unique_ptr<BackendConfigParameters> backend_params_ptr_;
-  QnnImplementation qnn_loaded_backend_;
-  std::unique_ptr<QnnLogger> logger_;
+  std::shared_ptr<QnnBackendBundle>
+      backend_bundle_ptr_; // New member to hold shared resources
   const QnnExecuTorchOptions* options_;
   std::unordered_map<std::string, std::vector<std::shared_ptr<TensorWrapper>>>
       input_tensors_;
diff --git a/backends/qualcomm/runtime/backends/CMakeLists.txt b/backends/qualcomm/runtime/backends/CMakeLists.txt
index 6a44f3234c5..a0066ae4933 100644
--- a/backends/qualcomm/runtime/backends/CMakeLists.txt
+++ b/backends/qualcomm/runtime/backends/CMakeLists.txt
@@ -141,3 +141,10 @@ target_sources(
   PRIVATE
     ${CMAKE_CURRENT_LIST_DIR}/irbackend/${CMAKE_SYSTEM_PROCESSOR}/QnnDlcManager.cpp
 )
+
+# qnn_backend_unified_registry
+target_sources(
+  qnn_backend_unified_registry
+  PUBLIC ${CMAKE_CURRENT_LIST_DIR}/QnnBackendUnifiedRegistry.h
+  PRIVATE ${CMAKE_CURRENT_LIST_DIR}/QnnBackendUnifiedRegistry.cpp
+)
diff --git a/backends/qualcomm/runtime/backends/QnnBackendCommon.cpp b/backends/qualcomm/runtime/backends/QnnBackendCommon.cpp
index 960bbd9513e..81ec3ebde26 100644
--- a/backends/qualcomm/runtime/backends/QnnBackendCommon.cpp
+++ b/backends/qualcomm/runtime/backends/QnnBackendCommon.cpp
@@ -13,7 +13,7 @@ namespace qnn {
 using executorch::runtime::Error;
 
 QnnBackend::~QnnBackend() {
-  const QnnInterface& qnn_interface = implementation_.GetQnnInterface();
+  const QnnInterface& qnn_interface = implementation_->GetQnnInterface();
   Qnn_ErrorHandle_t error = QNN_SUCCESS;
   if (nullptr != handle_) {
     QNN_EXECUTORCH_LOG_INFO("Destroy Qnn backend");
@@ -34,7 +34,7 @@ void QnnBackend::BackendRegisterOpPackage(
     const flatbuffers::Vector<
         flatbuffers::Offset<qnn_delegate::QnnExecuTorchOpPackageInfo>>*
         op_packages_infos) {
-  const QnnInterface& qnn_interface = implementation_.GetQnnInterface();
+  const QnnInterface& qnn_interface = implementation_->GetQnnInterface();
   Qnn_ErrorHandle_t error = QNN_SUCCESS;
   QnnExecuTorchOpPackagePlatform current_platform =
       QnnExecuTorchOpPackagePlatform::UNKNOWN;
@@ -71,7 +71,7 @@ void QnnBackend::BackendRegisterOpPackage(
 Error QnnBackend::Configure(
     const QnnExecuTorchOpPackageOptions* op_package_options) {
   // create qnn backend
-  const QnnInterface& qnn_interface = implementation_.GetQnnInterface();
+  const QnnInterface& qnn_interface = implementation_->GetQnnInterface();
   Qnn_ErrorHandle_t error = QNN_SUCCESS;
 
   std::vector<const QnnBackend_Config_t*> temp_backend_config;
@@ -102,7 +102,7 @@ Error QnnBackend::Configure(
 }
 
 Error QnnBackend::VerifyQNNSDKVersion() {
-  const QnnInterface& qnn_interface = implementation_.GetQnnInterface();
+  const QnnInterface& qnn_interface = implementation_->GetQnnInterface();
 
   Qnn_ApiVersion_t qnn_version = {QNN_VERSION_INIT};
   Qnn_ErrorHandle_t error =
diff --git a/backends/qualcomm/runtime/backends/QnnBackendCommon.h b/backends/qualcomm/runtime/backends/QnnBackendCommon.h
index a66119dab22..e146a67d772 100644
--- a/backends/qualcomm/runtime/backends/QnnBackendCommon.h
+++ b/backends/qualcomm/runtime/backends/QnnBackendCommon.h
@@ -27,10 +27,11 @@ namespace qnn {
 // qnn backend
 class QnnBackend {
  public:
-  explicit QnnBackend(
-      const QnnImplementation& implementation,
-      QnnLogger* logger)
+  explicit QnnBackend(QnnImplementation* implementation, QnnLogger* logger)
       : handle_(nullptr), implementation_(implementation), logger_(logger) {}
+  QnnBackend(const QnnBackend&) = delete; // Delete copy constructor
+  QnnBackend& operator=(const QnnBackend&) =
+      delete; // Delete assignment operator
 
   virtual ~QnnBackend();
   virtual bool IsProfileEventTypeParentOfNodeTime(
@@ -42,7 +43,7 @@ class QnnBackend {
       const QnnExecuTorchOpPackageOptions* op_package_options);
 
   Qnn_ErrorHandle_t BackendValidateOpConfig(const Qnn_OpConfig_t& op_config) {
-    return implementation_.GetQnnInterface().qnn_backend_validate_op_config(
+    return implementation_->GetQnnInterface().qnn_backend_validate_op_config(
         handle_, op_config);
   };
 
@@ -65,7 +66,7 @@ class QnnBackend {
           flatbuffers::Offset<qnn_delegate::QnnExecuTorchOpPackageInfo>>*
           op_packages_info);
   Qnn_BackendHandle_t handle_;
-  const QnnImplementation& implementation_;
+  QnnImplementation* implementation_;
   QnnOpPackageManager op_package_manager_;
   QnnLogger* logger_;
   executorch::runtime::Error VersionChecker(
diff --git a/backends/qualcomm/runtime/backends/QnnBackendFactory.cpp b/backends/qualcomm/runtime/backends/QnnBackendFactory.cpp
index e7e9db6fed8..0f0524d739e 100644
--- a/backends/qualcomm/runtime/backends/QnnBackendFactory.cpp
+++ b/backends/qualcomm/runtime/backends/QnnBackendFactory.cpp
@@ -16,8 +16,9 @@ namespace qnn {
 using executorch::runtime::Error;
 
 std::unique_ptr<BackendConfigParameters> QnnBackendFactory::Create(
-    const QnnImplementation& implementation,
-    QnnLogger* logger,
+    QnnImplementation* implementation_ptr,
+    QnnBackend* qnn_backend_ptr,
+    QnnDevice* qnn_device_ptr,
     const QnnExecuTorchContextBinary& qnn_context_blob,
     const QnnExecuTorchOptions* options,
     QnnDlcManager* qnn_dlc_manager) {
@@ -26,15 +27,8 @@ std::unique_ptr<BackendConfigParameters> QnnBackendFactory::Create(
   switch (options->backend_options()->backend_type()) {
     case QnnExecuTorchBackendType::kHtpBackend: {
       auto htp_options = options->backend_options()->htp_options();
-      const std::string skel_library_dir =
-          htp_options->skel_library_dir()->str();
-      if (!skel_library_dir.empty()) {
-        setenv("ADSP_LIBRARY_PATH", skel_library_dir.c_str(), /*overwrite=*/1);
-      }
       if (get_option(options->log_level()) >=
           QnnExecuTorchLogLevel::kLogLevelInfo) {
-        QNN_EXECUTORCH_LOG_INFO(
-            "skel_library_dir: %s", skel_library_dir.c_str());
         QNN_EXECUTORCH_LOG_INFO(
             "htp_arch in htp_info: %s",
             EnumNameHtpArch(options->soc_info()->htp_info()->htp_arch()));
@@ -56,32 +50,26 @@ std::unique_ptr<BackendConfigParameters> QnnBackendFactory::Create(
         QNN_EXECUTORCH_LOG_INFO(
             "use_fold_relu in htp_options: %d", htp_options->use_fold_relu());
       }
-      backend_params->qnn_backend_ptr_ =
-          std::make_unique<HtpBackend>(implementation, logger);
-
-      backend_params->qnn_device_ptr_ = std::make_unique<HtpDevice>(
-          implementation, logger, options->soc_info(), htp_options);
-
       backend_params->qnn_backend_cache_ptr_ =
           std::make_unique<HtpBackendCache>(qnn_context_blob);
 
       backend_params->qnn_context_ptr_ = std::make_unique<HtpContext>(
-          implementation,
-          backend_params->qnn_backend_ptr_.get(),
-          backend_params->qnn_device_ptr_.get(),
+          implementation_ptr,
+          qnn_backend_ptr,
+          qnn_device_ptr,
           backend_params->qnn_backend_cache_ptr_.get(),
           htp_options,
           qnn_dlc_manager);
 
       backend_params->qnn_graph_ptr_ = std::make_unique<HtpGraph>(
-          implementation,
-          backend_params->qnn_backend_ptr_.get(),
+          implementation_ptr,
+          qnn_backend_ptr,
           backend_params->qnn_context_ptr_.get(),
           get_option(options->profile_level()),
           options->soc_info(),
           htp_options);
       backend_params->qnn_mem_manager_ptr_ = std::make_unique<QnnMemManager>(
-          implementation,
+          implementation_ptr,
           backend_params->qnn_context_ptr_.get(),
           get_option(options->log_level()));
       backend_params->backend_init_state_ = BackendInitializeState::INITIALIZED;
@@ -93,11 +81,7 @@ std::unique_ptr<BackendConfigParameters> QnnBackendFactory::Create(
       return nullptr;
   }
 
-  if (backend_params->qnn_backend_ptr_->VerifyQNNSDKVersion() == Error::Ok) {
-    return backend_params;
-  }
-
-  return nullptr;
+  return backend_params;
 }
 } // namespace qnn
 } // namespace backends
diff --git a/backends/qualcomm/runtime/backends/QnnBackendFactory.h b/backends/qualcomm/runtime/backends/QnnBackendFactory.h
index 3d78a36b9f0..8be3e5e448f 100644
--- a/backends/qualcomm/runtime/backends/QnnBackendFactory.h
+++ b/backends/qualcomm/runtime/backends/QnnBackendFactory.h
@@ -31,22 +31,18 @@ namespace qnn {
 class QnnDlcManager;
 typedef enum { UNINITIALIZED, INITIALIZED } BackendInitializeState;
 
-// @brief Struct containing all handles for a given QNN backend
+// @brief Struct containing non-shared handles for a given QNN backend
 typedef struct BackendConfigParameters {
-  std::unique_ptr<QnnBackend> qnn_backend_ptr_;
   BackendInitializeState backend_init_state_;
   std::unique_ptr<QnnContext> qnn_context_ptr_;
-  std::unique_ptr<QnnDevice> qnn_device_ptr_;
   std::unique_ptr<QnnGraph> qnn_graph_ptr_;
   std::unique_ptr<QnnMemManager> qnn_mem_manager_ptr_;
   std::unique_ptr<QnnBackendCache> qnn_backend_cache_ptr_;
 
   // Default ctor
   BackendConfigParameters()
-      : qnn_backend_ptr_(nullptr),
-        backend_init_state_(BackendInitializeState::UNINITIALIZED),
+      : backend_init_state_(BackendInitializeState::UNINITIALIZED),
         qnn_context_ptr_(nullptr),
-        qnn_device_ptr_(nullptr),
         qnn_graph_ptr_(nullptr),
         qnn_mem_manager_ptr_(nullptr),
         qnn_backend_cache_ptr_(nullptr) {}
@@ -56,8 +52,6 @@ typedef struct BackendConfigParameters {
     qnn_backend_cache_ptr_.reset();
     qnn_mem_manager_ptr_.reset();
     qnn_context_ptr_.reset();
-    qnn_device_ptr_.reset();
-    qnn_backend_ptr_.reset();
     backend_init_state_ = BackendInitializeState::UNINITIALIZED;
   }
 
@@ -66,8 +60,9 @@ typedef struct BackendConfigParameters {
 class QnnBackendFactory {
  public:
   std::unique_ptr<BackendConfigParameters> Create(
-      const QnnImplementation& implementation,
-      QnnLogger* logger,
+      QnnImplementation* implementation,
+      QnnBackend* qnn_backend_ptr,
+      QnnDevice* qnn_device_ptr,
       const QnnExecuTorchContextBinary& qnn_context_blob,
       const QnnExecuTorchOptions* options,
       QnnDlcManager* qnn_dlc_manager);
diff --git a/backends/qualcomm/runtime/backends/QnnBackendUnifiedRegistry.cpp b/backends/qualcomm/runtime/backends/QnnBackendUnifiedRegistry.cpp
new file mode 100644
index 00000000000..73bc3ae9710
--- /dev/null
+++ b/backends/qualcomm/runtime/backends/QnnBackendUnifiedRegistry.cpp
@@ -0,0 +1,153 @@
+/*
+ * Copyright (c) Qualcomm Innovation Center, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+#include <executorch/backends/qualcomm/runtime/Logging.h>
+#include <executorch/backends/qualcomm/runtime/QnnExecuTorch.h>
+#include <executorch/backends/qualcomm/runtime/backends/QnnBackendUnifiedRegistry.h>
+#include <executorch/backends/qualcomm/runtime/backends/QnnLogger.h>
+#include <executorch/backends/qualcomm/runtime/backends/htpbackend/HtpBackend.h>
+#include <executorch/backends/qualcomm/runtime/backends/htpbackend/HtpDevice.h>
+
+#include <string>
+
+namespace executorch {
+namespace backends {
+namespace qnn {
+using executorch::runtime::Error;
+
+// Static instance for the singleton
+QnnBackendUnifiedRegistry& QnnBackendUnifiedRegistry::GetInstance() {
+  static QnnBackendUnifiedRegistry instance;
+  return instance;
+}
+
+// Private constructor
+QnnBackendUnifiedRegistry::QnnBackendUnifiedRegistry() = default;
+
+// Destructor
+QnnBackendUnifiedRegistry::~QnnBackendUnifiedRegistry() {
+  CleanupExpired();
+}
+
+Error QnnBackendUnifiedRegistry::GetOrCreateBackendBundle(
+    const QnnExecuTorchOptions* options,
+    std::shared_ptr<QnnBackendBundle>& bundle) {
+  std::lock_guard<std::mutex> lock(mutex_);
+
+  // Extract relevant parameters from options for creation and validation
+  std::string current_lib_path = options->library_path()->str();
+  QnnExecuTorchLogLevel current_log_level = get_option(options->log_level());
+  QnnExecuTorchBackendType backend_type =
+      options->backend_options()->backend_type();
+
+  if (current_lib_path.empty()) {
+    switch (backend_type) {
+      case QnnExecuTorchBackendType::kHtpBackend: {
+        current_lib_path = htp_library_name_;
+        break;
+      }
+      case QnnExecuTorchBackendType::kGpuBackend:
+      case QnnExecuTorchBackendType::kDspBackend:
+      case QnnExecuTorchBackendType::kUndefinedBackend:
+      default:
+        QNN_EXECUTORCH_LOG_ERROR(
+            "Unsupported backend type: %s",
+            EnumNameQnnExecuTorchBackendType(backend_type));
+        return Error::NotFound;
+    }
+  }
+
+  // Check if resources already exist
+  auto it = qnn_backend_bundles_map_.find(backend_type);
+  if (it != qnn_backend_bundles_map_.end()) {
+    // Create new shared_ptr that shares ownership of the managed object.
+    if (auto existing_bundle = it->second.lock()) {
+      bundle = existing_bundle;
+      if (bundle->qnn_logger_ptr->GetLogLevel() != current_log_level) {
+        bundle->qnn_logger_ptr = std::make_unique<QnnLogger>(
+            bundle->implementation.get(), LoggingCallback, current_log_level);
+      }
+      QNN_EXECUTORCH_LOG_INFO(
+          "Use cached backend bundle for current backend: %s",
+          EnumNameQnnExecuTorchBackendType(backend_type));
+      return Error::Ok;
+    }
+  }
+
+  QNN_EXECUTORCH_LOG_INFO("Creating new backend bundle.");
+
+  // 1. Create QnnImplementation and load qnn library
+  std::unique_ptr<QnnImplementation> implementation =
+      std::make_unique<QnnImplementation>(current_lib_path);
+  auto config = GetImplementationConfig(options);
+  Error ret = implementation->Load(config.get());
+  ET_CHECK_OR_RETURN_ERROR(
+      ret == Error::Ok, Internal, "Fail to load Qnn library");
+
+  // 2. Create QnnLogger
+  std::unique_ptr<QnnLogger> logger = std::make_unique<QnnLogger>(
+      implementation.get(), LoggingCallback, current_log_level);
+
+  // 3. Create QnnBackend (specific type based on options)
+  // 4. Create QnnDevice (specific type based on options)
+  std::unique_ptr<QnnBackend> backend = nullptr;
+  std::unique_ptr<QnnDevice> device = nullptr;
+
+  switch (backend_type) {
+    case QnnExecuTorchBackendType::kHtpBackend: {
+      auto htp_options = options->backend_options()->htp_options();
+      backend =
+          std::make_unique<HtpBackend>(implementation.get(), logger.get());
+      ET_CHECK_OR_RETURN_ERROR(
+          backend->Configure(options->op_package_options()) == Error::Ok,
+          Internal,
+          "Fail to configure Qnn backend");
+
+      device = std::make_unique<HtpDevice>(
+          implementation.get(), logger.get(), options->soc_info(), htp_options);
+      ET_CHECK_OR_RETURN_ERROR(
+          device->Configure() == Error::Ok,
+          Internal,
+          "Fail to configure Qnn device");
+      break;
+    }
+    case QnnExecuTorchBackendType::kGpuBackend:
+    case QnnExecuTorchBackendType::kDspBackend:
+    case QnnExecuTorchBackendType::kUndefinedBackend:
+    default:
+      return Error::NotFound;
+  }
+  if (backend->VerifyQNNSDKVersion() != Error::Ok) {
+    return Error::Internal;
+  }
+
+  bundle->implementation = std::move(implementation);
+  bundle->qnn_logger_ptr = std::move(logger);
+  bundle->qnn_backend_ptr = std::move(backend);
+  bundle->qnn_device_ptr = std::move(device);
+  qnn_backend_bundles_map_.emplace(
+      backend_type, bundle); // Store weak_ptr to the bundle
+
+  return Error::Ok;
+}
+
+void QnnBackendUnifiedRegistry::CleanupExpired() {
+  std::lock_guard<std::mutex> lock(mutex_);
+
+  for (auto it = qnn_backend_bundles_map_.begin();
+       it != qnn_backend_bundles_map_.end();) {
+    if (it->second.expired()) {
+      it = qnn_backend_bundles_map_.erase(it);
+    } else {
+      ++it;
+    }
+  }
+}
+
+} // namespace qnn
+} // namespace backends
+} // namespace executorch
diff --git a/backends/qualcomm/runtime/backends/QnnBackendUnifiedRegistry.h b/backends/qualcomm/runtime/backends/QnnBackendUnifiedRegistry.h
new file mode 100644
index 00000000000..b2549a3356c
--- /dev/null
+++ b/backends/qualcomm/runtime/backends/QnnBackendUnifiedRegistry.h
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) Qualcomm Innovation Center, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+#pragma once
+
+#include <executorch/backends/qualcomm/runtime/QnnExecuTorch.h>
+#include <executorch/backends/qualcomm/runtime/backends/QnnBackendCommon.h>
+#include <executorch/backends/qualcomm/runtime/backends/QnnDeviceCommon.h>
+#include <executorch/backends/qualcomm/runtime/backends/QnnImplementation.h>
+#include <executorch/backends/qualcomm/runtime/backends/QnnLogger.h>
+#include <executorch/runtime/core/error.h>
+
+#include <memory>
+#include <mutex>
+#include <string>
+
+namespace executorch {
+namespace backends {
+namespace qnn {
+
+// A bundle struct to hold all shared QNN backend resources
+struct QnnBackendBundle {
+  std::unique_ptr<QnnImplementation> implementation;
+  std::unique_ptr<QnnLogger> qnn_logger_ptr;
+  std::unique_ptr<QnnBackend> qnn_backend_ptr;
+  std::unique_ptr<QnnDevice> qnn_device_ptr;
+
+  // Default ctor
+  QnnBackendBundle()
+      : implementation(nullptr),
+        qnn_logger_ptr(nullptr),
+        qnn_backend_ptr(nullptr),
+        qnn_device_ptr(nullptr) {}
+  // Default dtor
+  ~QnnBackendBundle() {
+    qnn_device_ptr.reset();
+    qnn_backend_ptr.reset();
+    qnn_logger_ptr.reset();
+    implementation.reset();
+  }
+};
+
+class QnnBackendUnifiedRegistry {
+  // Singleton class to manage shared QNN backend resources. It ensures that
+  // only one instance of the registry exists throughout the application's
+  // lifetime. The registry maintains a map of backend bundles indexed by
+  // backend_type. Each bundle contains QnnImplentation, QnnLogger, QnnBackend,
+  // and QnnDevice objects for a specific backend type. The registry provides
+  // methods to get or create backend bundles, ensuring that resources are
+  // properly managed and reused when possible. It also includes a cleanup
+  // mechanism to remove expired bundles.
+ public:
+  static QnnBackendUnifiedRegistry& GetInstance();
+
+  executorch::runtime::Error GetOrCreateBackendBundle(
+      const QnnExecuTorchOptions* options,
+      std::shared_ptr<QnnBackendBundle>& bundle);
+
+  void CleanupExpired();
+
+ private:
+  QnnBackendUnifiedRegistry();
+  ~QnnBackendUnifiedRegistry();
+
+  // Delete copy constructor and assignment operator
+  QnnBackendUnifiedRegistry(const QnnBackendUnifiedRegistry&) = delete;
+  QnnBackendUnifiedRegistry& operator=(const QnnBackendUnifiedRegistry&) =
+      delete;
+
+  static constexpr const char* htp_library_name_ = "libQnnHtp.so";
+  static constexpr const char* gpu_library_name_ = "libQnnGpu.so";
+  static constexpr const char* dsp_library_name_ = "libQnnDsp.so";
+
+  std::unique_ptr<const QnnSaver_Config_t*[]> GetImplementationConfig(
+      const QnnExecuTorchOptions* options) {
+    if (options->saver()) {
+      auto outputDirCfg = std::make_unique<QnnSaver_Config_t>();
+      outputDirCfg->option = QNN_SAVER_CONFIG_OPTION_OUTPUT_DIRECTORY;
+      outputDirCfg->outputDirectory = options->saver_output_dir()->c_str();
+
+      auto saverCfg = std::make_unique<const QnnSaver_Config_t*[]>(2);
+      saverCfg[0] = outputDirCfg.release();
+      saverCfg[1] = nullptr;
+
+      return saverCfg;
+    } else {
+      return nullptr;
+    }
+  }
+
+  // Stores the collection of shared resources, with backend_type being used as
+  // the key.
+  std::unordered_map<QnnExecuTorchBackendType, std::weak_ptr<QnnBackendBundle>>
+      qnn_backend_bundles_map_;
+
+  std::mutex mutex_; // Protects access to resources and ensures atomic
+                     // creation/destruction
+};
+
+} // namespace qnn
+} // namespace backends
+} // namespace executorch
diff --git a/backends/qualcomm/runtime/backends/QnnContextCommon.cpp b/backends/qualcomm/runtime/backends/QnnContextCommon.cpp
index ee49b10215a..e16a173db6c 100644
--- a/backends/qualcomm/runtime/backends/QnnContextCommon.cpp
+++ b/backends/qualcomm/runtime/backends/QnnContextCommon.cpp
@@ -14,7 +14,7 @@ namespace backends {
 namespace qnn {
 
 QnnContext::~QnnContext() {
-  const QnnInterface& qnn_interface = implementation_.GetQnnInterface();
+  const QnnInterface& qnn_interface = implementation_->GetQnnInterface();
   Qnn_ErrorHandle_t error = QNN_SUCCESS;
   if (handle_ != nullptr) {
     QNN_EXECUTORCH_LOG_INFO("Destroy Qnn context");
@@ -33,7 +33,7 @@ QnnContext::~QnnContext() {
 
 Error QnnContext::Configure() {
   // create qnn context
-  const QnnInterface& qnn_interface = implementation_.GetQnnInterface();
+  const QnnInterface& qnn_interface = implementation_->GetQnnInterface();
   Qnn_ErrorHandle_t error = QNN_SUCCESS;
 
   std::vector<const QnnContext_Config_t*> temp_context_config;
@@ -95,7 +95,7 @@ Error QnnContext::Configure() {
 
 Error QnnContext::GetContextBinary(
     QnnExecuTorchContextBinary& qnn_executorch_context_binary) {
-  const QnnInterface& qnn_interface = implementation_.GetQnnInterface();
+  const QnnInterface& qnn_interface = implementation_->GetQnnInterface();
   Qnn_ContextBinarySize_t binary_size = 0;
   Qnn_ContextBinarySize_t bytes_written = 0;
   Qnn_ErrorHandle_t error =
diff --git a/backends/qualcomm/runtime/backends/QnnContextCommon.h b/backends/qualcomm/runtime/backends/QnnContextCommon.h
index 0e9e12ef544..7d507a4a50c 100644
--- a/backends/qualcomm/runtime/backends/QnnContextCommon.h
+++ b/backends/qualcomm/runtime/backends/QnnContextCommon.h
@@ -24,7 +24,7 @@ class QnnDlcManager;
 class QnnContext {
  public:
   explicit QnnContext(
-      const QnnImplementation& implementation,
+      QnnImplementation* implementation,
       QnnBackend* backend,
       QnnDevice* device,
       QnnBackendCache* cache,
@@ -74,7 +74,7 @@ class QnnContext {
 
  private:
   Qnn_ContextHandle_t handle_;
-  const QnnImplementation& implementation_;
+  QnnImplementation* implementation_;
   QnnBackend* backend_;
   QnnDevice* device_;
   QnnBackendCache* cache_;
diff --git a/backends/qualcomm/runtime/backends/QnnDeviceCommon.cpp b/backends/qualcomm/runtime/backends/QnnDeviceCommon.cpp
index 93d705efd3e..0280ec4f383 100644
--- a/backends/qualcomm/runtime/backends/QnnDeviceCommon.cpp
+++ b/backends/qualcomm/runtime/backends/QnnDeviceCommon.cpp
@@ -13,7 +13,7 @@ namespace qnn {
 using executorch::runtime::Error;
 
 QnnDevice::~QnnDevice() {
-  const QnnInterface& qnn_interface = implementation_.GetQnnInterface();
+  const QnnInterface& qnn_interface = implementation_->GetQnnInterface();
   Qnn_ErrorHandle_t error = QNN_SUCCESS;
   if (nullptr != handle_) {
     QNN_EXECUTORCH_LOG_INFO("Destroy Qnn device");
@@ -32,7 +32,7 @@ QnnDevice::~QnnDevice() {
 
 Error QnnDevice::Configure() {
   // create qnn device
-  const QnnInterface& qnn_interface = implementation_.GetQnnInterface();
+  const QnnInterface& qnn_interface = implementation_->GetQnnInterface();
   Qnn_ErrorHandle_t error = QNN_SUCCESS;
 
   std::vector<const QnnDevice_Config_t*> temp_device_config;
diff --git a/backends/qualcomm/runtime/backends/QnnDeviceCommon.h b/backends/qualcomm/runtime/backends/QnnDeviceCommon.h
index 85de00f8623..6736e4a49ca 100644
--- a/backends/qualcomm/runtime/backends/QnnDeviceCommon.h
+++ b/backends/qualcomm/runtime/backends/QnnDeviceCommon.h
@@ -20,8 +20,10 @@ namespace backends {
 namespace qnn {
 class QnnDevice {
  public:
-  explicit QnnDevice(const QnnImplementation& implementation, QnnLogger* logger)
+  explicit QnnDevice(QnnImplementation* implementation, QnnLogger* logger)
       : implementation_(implementation), handle_(nullptr), logger_(logger) {}
+  QnnDevice(const QnnDevice&) = delete; // Delete copy constructor
+  QnnDevice& operator=(const QnnDevice&) = delete; // Delete assignment operator
 
   virtual ~QnnDevice();
 
@@ -40,7 +42,7 @@ class QnnDevice {
   virtual executorch::runtime::Error AfterCreateDevice() {
     return executorch::runtime::Error::Ok;
   };
-  const QnnImplementation& implementation_;
+  QnnImplementation* implementation_;
 
  private:
   Qnn_DeviceHandle_t handle_;
diff --git a/backends/qualcomm/runtime/backends/QnnDlcManager.h b/backends/qualcomm/runtime/backends/QnnDlcManager.h
index a57906df4e3..1d9312464e2 100644
--- a/backends/qualcomm/runtime/backends/QnnDlcManager.h
+++ b/backends/qualcomm/runtime/backends/QnnDlcManager.h
@@ -10,6 +10,7 @@
 
 #include <QnnTypes.h>
 #include <executorch/backends/qualcomm/runtime/backends/QnnBackendFactory.h>
+#include <executorch/backends/qualcomm/runtime/backends/QnnBackendUnifiedRegistry.h>
 #include <executorch/backends/qualcomm/runtime/backends/irbackend/IrContext.h>
 
 #include "QnnWrapperUtils.hpp"
@@ -35,23 +36,23 @@ class QnnDlcManager {
 
   std::unique_ptr<BackendConfigParameters> backend_params_ptr_ =
       std::make_unique<BackendConfigParameters>();
+  std::unique_ptr<QnnBackendBundle> backend_bundle_ptr_ =
+      std::make_unique<QnnBackendBundle>();
 
-  void ResetBackendParams();
-  void ResetLogger();
-  void TerminateAllBackends();
+  void Destroy();
 
-  Error SetUpDlcEnvironment(const Qnn_Version_t& coreApiVersion);
+  Error SetUpDlcEnvironment(
+      const Qnn_Version_t& coreApiVersion,
+      const std::vector<std::string>& graph_names);
 
   Error RegisterGraphsFromDLC(
-      const QnnImplementation& implementation,
+      QnnImplementation* implementation,
       QnnBackend* backend,
       QnnContext* context,
       QnnBackendCache* cache);
 
  private:
   static constexpr const char* library_name_ = "libQnnIr.so";
-  QnnImplementation qnn_loaded_backend_;
-  std::unique_ptr<QnnLogger> logger_;
 
   const QnnExecuTorchContextBinary& qnn_context_blob_;
   const QnnExecuTorchOptions* options_;
@@ -64,7 +65,7 @@ class QnnDlcManager {
 
   Error Create();
 
-  Error Configure();
+  Error Configure(const std::vector<std::string>& graph_names);
 };
 } // namespace qnn
 } // namespace backends
diff --git a/backends/qualcomm/runtime/backends/QnnFunctionInterface.h b/backends/qualcomm/runtime/backends/QnnFunctionInterface.h
index 548c363f388..0e1e4727aa3 100644
--- a/backends/qualcomm/runtime/backends/QnnFunctionInterface.h
+++ b/backends/qualcomm/runtime/backends/QnnFunctionInterface.h
@@ -105,6 +105,9 @@ class QnnInterface {
   const QNN_INTERFACE_VER_TYPE& GetInterfaceVer() const {
     return qnn_interface_->QNN_INTERFACE_VER_NAME;
   }
+  void Unload() {
+    qnn_interface_ = nullptr;
+  }
 
  private:
   // --------- QnnInterface ---------
diff --git a/backends/qualcomm/runtime/backends/QnnGraphCommon.cpp b/backends/qualcomm/runtime/backends/QnnGraphCommon.cpp
index 9fe81f4cf54..44bf11bc0f5 100644
--- a/backends/qualcomm/runtime/backends/QnnGraphCommon.cpp
+++ b/backends/qualcomm/runtime/backends/QnnGraphCommon.cpp
@@ -14,7 +14,7 @@ using executorch::runtime::Error;
 
 Error QnnGraph::Configure(const std::string& graph_name) {
   // create qnn backend
-  const QnnInterface& qnn_interface = implementation_.GetQnnInterface();
+  const QnnInterface& qnn_interface = implementation_->GetQnnInterface();
   Qnn_ErrorHandle_t error = QNN_SUCCESS;
   std::vector<const QnnGraph_Config_t*> temp_graph_config;
   ET_CHECK_OR_RETURN_ERROR(
@@ -81,7 +81,7 @@ Qnn_ErrorHandle_t QnnGraph::GraphExecute(
     return QNN_COMMON_ERROR_GENERAL;
   }
 
-  return implementation_.GetQnnInterface().qnn_graph_execute(
+  return implementation_->GetQnnInterface().qnn_graph_execute(
       handle_[graph_name],
       input_tensor_structs.data(),
       input_tensor_structs.size(),
@@ -94,7 +94,7 @@ Qnn_ErrorHandle_t QnnGraph::GraphExecute(
 Error QnnGraph::EnsureTensorInQnnGraph(
     const std::string& graph_name,
     const std::shared_ptr<TensorWrapper>& tensor_wrapper) {
-  const QnnInterface& qnn_interface = implementation_.GetQnnInterface();
+  const QnnInterface& qnn_interface = implementation_->GetQnnInterface();
   Qnn_ErrorHandle_t error = QNN_SUCCESS;
 
   if (!tensor_wrapper->IsTensorCreated()) {
diff --git a/backends/qualcomm/runtime/backends/QnnGraphCommon.h b/backends/qualcomm/runtime/backends/QnnGraphCommon.h
index 33f903dae41..fbb5ab80140 100644
--- a/backends/qualcomm/runtime/backends/QnnGraphCommon.h
+++ b/backends/qualcomm/runtime/backends/QnnGraphCommon.h
@@ -23,7 +23,7 @@ namespace qnn {
 class QnnGraph {
  public:
   explicit QnnGraph(
-      const QnnImplementation& implementation,
+      QnnImplementation* implementation,
       QnnBackend* backend,
       QnnContext* context,
       const QnnExecuTorchProfileLevel& profile_level)
@@ -44,7 +44,7 @@ class QnnGraph {
   Qnn_ErrorHandle_t GraphAddNode(
       const std::string& graph_name,
       const Qnn_OpConfig_t& op_config) {
-    return implementation_.GetQnnInterface().qnn_graph_add_node(
+    return implementation_->GetQnnInterface().qnn_graph_add_node(
         handle_[graph_name], op_config);
   };
   executorch::runtime::Error EnsureTensorInQnnGraph(
@@ -52,7 +52,7 @@ class QnnGraph {
       const std::shared_ptr<TensorWrapper>& tensor_wrapper);
 
   Qnn_ErrorHandle_t GraphFinalize(const std::string& graph_name) {
-    return implementation_.GetQnnInterface().qnn_graph_finalize(
+    return implementation_->GetQnnInterface().qnn_graph_finalize(
         handle_[graph_name],
         profile_[graph_name]->GetHandle(),
         nullptr /* signal_handle */);
@@ -84,7 +84,7 @@ class QnnGraph {
 
  private:
   std::unordered_map<std::string, Qnn_GraphHandle_t> handle_;
-  const QnnImplementation& implementation_;
+  QnnImplementation* implementation_;
   QnnBackend* backend_;
   QnnContext* context_;
   QnnExecuTorchProfileLevel profile_level_;
diff --git a/backends/qualcomm/runtime/backends/QnnImplementation.cpp b/backends/qualcomm/runtime/backends/QnnImplementation.cpp
index 7083f2bef30..246800791e6 100644
--- a/backends/qualcomm/runtime/backends/QnnImplementation.cpp
+++ b/backends/qualcomm/runtime/backends/QnnImplementation.cpp
@@ -6,6 +6,7 @@
  * LICENSE file in the root directory of this source tree.
  */
 #include <executorch/backends/qualcomm/runtime/backends/QnnImplementation.h>
+#include <memory>
 #include "QnnInterface.h"
 namespace executorch {
 namespace backends {
@@ -13,6 +14,14 @@ namespace qnn {
 
 using executorch::runtime::Error;
 
+struct DlCloser {
+  int operator()(void* handle) {
+    if (handle == nullptr)
+      return 0;
+    return dlclose(handle);
+  }
+};
+
 Error QnnImplementation::InitBackend(
     void* const lib_handle,
     const QnnSaver_Config_t** saver_config) {
@@ -33,47 +42,39 @@ Error QnnImplementation::InitBackend(
   return Error::Ok;
 }
 
-// instantiate static members
-// NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
-std::unordered_map<std::string, QnnImplementation::BackendIdType>
-    QnnImplementation::lib_path_to_backend_id_;
-// NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
-std::unordered_map<QnnImplementation::BackendIdType, const QnnInterface_t*>
-    QnnImplementation::loaded_backend_;
-// NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
-std::unordered_map<QnnImplementation::BackendIdType, void*>
-    QnnImplementation::loaded_lib_handle_;
-// NOLINTNEXTLINE(fuchsia-statically-constructed-objects)
-std::mutex QnnImplementation::be_init_mutex_;
-
-Error QnnImplementation::StartBackend(
+QnnImplementation::~QnnImplementation() {
+  Unload();
+}
+
+const QnnInterface_t* QnnImplementation::StartBackend(
     const std::string& lib_path,
     const QnnSaver_Config_t** saver_config) {
   Qnn_ErrorHandle_t error = QNN_SUCCESS;
-  void* lib_handle = nullptr;
   // If the library is already loaded, return the handle.
-  lib_handle = dlopen(lib_path.c_str(), RTLD_NOW | RTLD_NOLOAD);
+  std::unique_ptr<void, DlCloser> lib_handle(
+      dlopen(lib_path.c_str(), RTLD_NOW | RTLD_NOLOAD));
   if (!lib_handle) {
-    lib_handle = dlopen(lib_path.c_str(), RTLD_NOW | RTLD_GLOBAL);
+    lib_handle = std::unique_ptr<void, DlCloser>(
+        dlopen(lib_path.c_str(), RTLD_NOW | RTLD_GLOBAL));
   }
   if (lib_handle == nullptr) {
     QNN_EXECUTORCH_LOG_ERROR(
         "Cannot Open QNN library %s, with error: %s",
         lib_path.c_str(),
         dlerror());
-    return Error::Internal;
+    return nullptr;
   }
 
   // load get_provider function
   auto get_providers = loadQnnFunction<QnnInterfaceGetProvidersFn*>(
-      lib_handle, "QnnInterface_getProviders");
+      lib_handle.get(), "QnnInterface_getProviders");
 
   if (get_providers == nullptr) {
     QNN_EXECUTORCH_LOG_ERROR(
         "QnnImplementation::Load Cannot load symbol "
         "QnnInterface_getProviders : %s",
         dlerror());
-    return Error::Internal;
+    return nullptr;
   }
 
   // Get QnnInterface Providers
@@ -85,7 +86,7 @@ Error QnnImplementation::StartBackend(
     QNN_EXECUTORCH_LOG_ERROR(
         "Qnn Interface failed to get providers. Error %d",
         QNN_GET_ERROR_CODE(error));
-    return Error::Internal;
+    return nullptr;
   }
 
   if (num_providers != required_num_providers_) {
@@ -94,115 +95,47 @@ Error QnnImplementation::StartBackend(
         "%d instead of required %d",
         num_providers,
         required_num_providers_);
-    return Error::Internal;
-  }
-
-  BackendIdType backend_id = provider_list[0]->backendId;
-
-  // store everything
-  lib_path_to_backend_id_[lib_path] = backend_id;
-
-  // we use lib_path as the first unique key.
-  // Users can get wrong like, he or she assigns
-  //   library_path=libQnnHtp_1.so
-  //   library_path=libQnnHtp_2.so
-  // for different QnnBackend instances.
-  // So we warning out here.
-  if (loaded_backend_.count(backend_id) > 0) {
-    QNN_EXECUTORCH_LOG_WARN(
-        "lib_path %s is loaded, but backend %d "
-        "already exists. Overwriting previous loaded backend...",
-        lib_path.c_str(),
-        backend_id);
-  }
-  loaded_backend_[backend_id] = provider_list[0];
-
-  if (loaded_lib_handle_.count(backend_id) > 0) {
-    QNN_EXECUTORCH_LOG_WARN("closing %pK...", loaded_lib_handle_[backend_id]);
-
-    int dlclose_error = dlclose(loaded_lib_handle_[backend_id]);
-    if (dlclose_error != 0) {
-      QNN_EXECUTORCH_LOG_WARN(
-          "Sadly, fail to close %pK with error %s",
-          loaded_lib_handle_[backend_id],
-          dlerror());
-    }
+    return nullptr;
   }
-  loaded_lib_handle_[backend_id] = lib_handle;
 
   // Saver backend need initialization.
-  Error be_init_st = InitBackend(loaded_lib_handle_[backend_id], saver_config);
+  Error be_init_st = InitBackend(lib_handle.get(), saver_config);
 
   if (be_init_st != Error::Ok) {
-    // backend init fails. clear things
-    lib_path_to_backend_id_.erase(lib_path);
-    loaded_backend_.erase(backend_id);
-
-    int dlclose_error = dlclose(loaded_lib_handle_[backend_id]);
-    if (dlclose_error != 0) {
-      QNN_EXECUTORCH_LOG_WARN(
-          "fail to close %pK after backend-init "
-          "failure, with error %s",
-          loaded_lib_handle_[backend_id],
-          dlerror());
-    }
-
-    loaded_lib_handle_.erase(backend_id);
-    return be_init_st;
+    return nullptr;
   }
 
-  return Error::Ok;
+  // hold the lib_handle
+  lib_handle_ = lib_handle.release();
+  return provider_list[0];
 }
 
-Error QnnImplementation::TerminateAllBackends() {
-  Error ret_status = Error::Ok;
+Error QnnImplementation::Unload() {
+  qnn_interface_.Unload();
 
-  loaded_backend_.clear();
-
-  for (auto& it : loaded_lib_handle_) {
-    int dlclose_error = dlclose(it.second);
-    if (dlclose_error != 0) {
-      QNN_EXECUTORCH_LOG_ERROR(
-          "Fail to close QNN backend %d with error %s", it.first, dlerror());
-      ret_status = Error::Internal;
-    }
+  if (lib_handle_ == nullptr) {
+    return Error::Ok;
   }
-  loaded_lib_handle_.clear();
-  lib_path_to_backend_id_.clear();
 
-  return ret_status;
+  int dlclose_error = dlclose(lib_handle_);
+  if (dlclose_error != 0) {
+    QNN_EXECUTORCH_LOG_ERROR(
+        "Fail to close QNN backend %s with error %s",
+        lib_path_.c_str(),
+        dlerror());
+    return Error::Internal;
+  }
+  lib_handle_ = nullptr;
+  return Error::Ok;
 }
 
 Error QnnImplementation::Load(const QnnSaver_Config_t** saver_config) {
-  BackendIdType backend_id = QNN_BACKEND_ID_NULL;
-  {
-    const std::lock_guard<std::mutex> lock(be_init_mutex_);
-
-    if (lib_path_to_backend_id_.count(lib_path_) == 0) {
-      Error st = StartBackend(lib_path_, saver_config);
-      ET_CHECK_OR_RETURN_ERROR(
-          st == Error::Ok, Internal, "Fail to start backend");
-    }
-
-    // Get backend ID
-    backend_id = lib_path_to_backend_id_[lib_path_];
-
-    // really don't expect.
-    if (loaded_backend_.count(backend_id) == 0 ||
-        loaded_lib_handle_.count(backend_id) == 0) {
-      QNN_EXECUTORCH_LOG_ERROR(
-          "library %s is loaded but "
-          "loaded backend count=%zu, "
-          "loaded lib_handle count=%zu",
-          lib_path_.c_str(),
-          loaded_backend_.count(backend_id),
-          loaded_lib_handle_.count(backend_id));
-      return Error::Internal;
-    }
-  } // be_init_mutex_ release.
+  const QnnInterface_t* p_qnn_intf = StartBackend(lib_path_, saver_config);
+  ET_CHECK_OR_RETURN_ERROR(
+      p_qnn_intf != nullptr, Internal, "Fail to start backend");
 
   // Connect QnnInterface
-  qnn_interface_.SetQnnInterface(loaded_backend_[backend_id]);
+  qnn_interface_.SetQnnInterface(p_qnn_intf);
 
   return Error::Ok;
 }
diff --git a/backends/qualcomm/runtime/backends/QnnImplementation.h b/backends/qualcomm/runtime/backends/QnnImplementation.h
index a49ee6516fc..3059166523d 100644
--- a/backends/qualcomm/runtime/backends/QnnImplementation.h
+++ b/backends/qualcomm/runtime/backends/QnnImplementation.h
@@ -11,9 +11,7 @@
 #include <executorch/backends/qualcomm/runtime/backends/QnnFunctionInterface.h>
 
 #include <dlfcn.h>
-#include <mutex>
 #include <string>
-#include <unordered_map>
 namespace executorch {
 namespace backends {
 namespace qnn {
@@ -29,32 +27,32 @@ class QnnImplementation {
 
   explicit QnnImplementation(std::string lib_path)
       : lib_path_(std::move(lib_path)){};
+  QnnImplementation(const QnnImplementation&) =
+      delete; // Delete copy constructor
+  QnnImplementation& operator=(const QnnImplementation&) =
+      delete; // Delete assignment operator
+  ~QnnImplementation();
 
   executorch::runtime::Error Load(const QnnSaver_Config_t** saver_config);
 
   const QnnInterface& GetQnnInterface() const;
 
-  executorch::runtime::Error TerminateAllBackends();
+  executorch::runtime::Error Unload();
 
  private:
-  static constexpr const int required_num_providers_{1};
+  static constexpr int required_num_providers_{1};
 
-  static executorch::runtime::Error StartBackend(
+  const QnnInterface_t* StartBackend(
       const std::string& lib_path,
       const QnnSaver_Config_t** saver_config);
 
-  static executorch::runtime::Error InitBackend(
+  executorch::runtime::Error InitBackend(
       void* const lib_handle,
       const QnnSaver_Config_t** saver_config);
 
   std::string lib_path_;
+  void* lib_handle_{nullptr};
   QnnInterface qnn_interface_;
-
-  static std::unordered_map<std::string, BackendIdType> lib_path_to_backend_id_;
-  static std::unordered_map<BackendIdType, const QnnInterface_t*>
-      loaded_backend_;
-  static std::unordered_map<BackendIdType, void*> loaded_lib_handle_;
-  static std::mutex be_init_mutex_;
 };
 } // namespace qnn
 } // namespace backends
diff --git a/backends/qualcomm/runtime/backends/QnnLogger.cpp b/backends/qualcomm/runtime/backends/QnnLogger.cpp
index 5b86894d874..fec6d426c04 100644
--- a/backends/qualcomm/runtime/backends/QnnLogger.cpp
+++ b/backends/qualcomm/runtime/backends/QnnLogger.cpp
@@ -40,11 +40,11 @@ void LoggingCallback(
   QNN_EXECUTORCH_LOG(log_level, buffer);
 }
 QnnLogger::QnnLogger(
-    const QnnImplementation& implementation,
+    QnnImplementation* implementation,
     QnnLog_Callback_t callback,
     QnnExecuTorchLogLevel log_level)
-    : handle_(nullptr), implementation_(implementation) {
-  const QnnInterface& qnn_interface = implementation.GetQnnInterface();
+    : handle_(nullptr), implementation_(implementation), log_level_(log_level) {
+  const QnnInterface& qnn_interface = implementation->GetQnnInterface();
 
   QnnLog_Level_t qnn_log_level = QNN_LOG_LEVEL_ERROR;
   if (log_level > QnnExecuTorchLogLevel::kLogOff) {
@@ -86,7 +86,7 @@ QnnLogger::QnnLogger(
 }
 
 QnnLogger::~QnnLogger() {
-  const QnnInterface& qnn_interface = implementation_.GetQnnInterface();
+  const QnnInterface& qnn_interface = implementation_->GetQnnInterface();
   if (handle_ != nullptr) {
     Qnn_ErrorHandle_t error = qnn_interface.qnn_log_free(handle_);
     if (error != QNN_SUCCESS) {
diff --git a/backends/qualcomm/runtime/backends/QnnLogger.h b/backends/qualcomm/runtime/backends/QnnLogger.h
index 80be4f61c59..d329ab94407 100644
--- a/backends/qualcomm/runtime/backends/QnnLogger.h
+++ b/backends/qualcomm/runtime/backends/QnnLogger.h
@@ -21,18 +21,25 @@ void LoggingCallback(
 class QnnLogger {
  public:
   explicit QnnLogger(
-      const QnnImplementation& implementation,
+      QnnImplementation* implementation,
       QnnLog_Callback_t callback,
       QnnExecuTorchLogLevel log_level);
+  QnnLogger(const QnnLogger&) = delete; // Delete copy constructor
+  QnnLogger& operator=(const QnnLogger&) = delete; // Delete assignment operator
   ~QnnLogger();
 
   Qnn_LogHandle_t GetHandle() {
     return handle_;
   }
 
+  QnnExecuTorchLogLevel GetLogLevel() {
+    return log_level_;
+  }
+
  private:
   Qnn_LogHandle_t handle_;
-  const QnnImplementation& implementation_;
+  QnnImplementation* implementation_;
+  QnnExecuTorchLogLevel log_level_;
 };
 } // namespace qnn
 } // namespace backends
diff --git a/backends/qualcomm/runtime/backends/QnnMemManager.cpp b/backends/qualcomm/runtime/backends/QnnMemManager.cpp
index 3b99dd10868..f0d6096d29a 100644
--- a/backends/qualcomm/runtime/backends/QnnMemManager.cpp
+++ b/backends/qualcomm/runtime/backends/QnnMemManager.cpp
@@ -25,7 +25,7 @@ Error QnnMemManager::RegisterIonMem(
     const std::shared_ptr<TensorWrapper>& tensor_wrapper,
     int32_t mem_fd,
     void* mem_ptr) {
-  const QnnInterface& qnn_interface = implementation_.GetQnnInterface();
+  const QnnInterface& qnn_interface = implementation_->GetQnnInterface();
   Qnn_MemDescriptor_t descriptor = {
       {tensor_wrapper->GetRank(), tensor_wrapper->GetDims(), nullptr},
       tensor_wrapper->GetDataType(),
@@ -66,7 +66,7 @@ Error QnnMemManager::RegisterCustomMem(
     size_t total_custom_mem_size,
     size_t tensor_offset,
     const CustomMemTensorInfo& info) {
-  const QnnInterface& qnn_interface = implementation_.GetQnnInterface();
+  const QnnInterface& qnn_interface = implementation_->GetQnnInterface();
   Qnn_MemDescriptor_t descriptor = {
       {tensor_wrapper->GetRank(), tensor_wrapper->GetDims(), nullptr},
       tensor_wrapper->GetDataType(),
@@ -113,7 +113,7 @@ Error QnnMemManager::PreRegisterCustomMemHandle(
     size_t total_custom_mem_size,
     size_t tensor_offset,
     const CustomMemTensorInfo& info) {
-  const QnnInterface& qnn_interface = implementation_.GetQnnInterface();
+  const QnnInterface& qnn_interface = implementation_->GetQnnInterface();
   Qnn_MemDescriptor_t descriptor = {
       {info.rank, info.shape, nullptr},
       scalar_type_to_qnn_dtype_[info.dtype],
@@ -165,7 +165,7 @@ Error QnnMemManager::SetMemHandle(
 }
 
 void QnnMemManager::DeRegisterMem() {
-  const QnnInterface& qnn_interface = implementation_.GetQnnInterface();
+  const QnnInterface& qnn_interface = implementation_->GetQnnInterface();
   Qnn_ErrorHandle_t error = QNN_SUCCESS;
 
   for (auto& it : registered_map_) {
diff --git a/backends/qualcomm/runtime/backends/QnnMemManager.h b/backends/qualcomm/runtime/backends/QnnMemManager.h
index 6a7f00b016a..0c5b3476b33 100644
--- a/backends/qualcomm/runtime/backends/QnnMemManager.h
+++ b/backends/qualcomm/runtime/backends/QnnMemManager.h
@@ -20,7 +20,7 @@ namespace qnn {
 class QnnMemManager {
  public:
   explicit QnnMemManager(
-      const QnnImplementation& implementation,
+      QnnImplementation* implementation,
       QnnContext* context,
       QnnExecuTorchLogLevel log_level)
       : implementation_(implementation),
@@ -65,7 +65,7 @@ class QnnMemManager {
  private:
   void DeRegisterMem();
 
-  const QnnImplementation& implementation_;
+  QnnImplementation* implementation_;
   QnnContext* context_;
   QnnExecuTorchLogLevel log_level_;
   // Store the registered Qnn_MemHandle_t for de-registration
diff --git a/backends/qualcomm/runtime/backends/QnnProfiler.cpp b/backends/qualcomm/runtime/backends/QnnProfiler.cpp
index fd580867db5..8345434a145 100644
--- a/backends/qualcomm/runtime/backends/QnnProfiler.cpp
+++ b/backends/qualcomm/runtime/backends/QnnProfiler.cpp
@@ -13,12 +13,12 @@ namespace backends {
 namespace qnn {
 
 QnnProfile::QnnProfile(
-    const QnnImplementation& implementation,
+    QnnImplementation* implementation,
     QnnBackend* backend,
     const QnnExecuTorchProfileLevel& profile_level)
     : handle_(nullptr), implementation_(implementation), backend_(backend) {
   if (profile_level != QnnExecuTorchProfileLevel::kProfileOff) {
-    const QnnInterface& qnn_interface = implementation_.GetQnnInterface();
+    const QnnInterface& qnn_interface = implementation_->GetQnnInterface();
 
     QnnProfile_Level_t qnnProfileLevel = 0;
     if (profile_level == QnnExecuTorchProfileLevel::kProfileBasic) {
@@ -72,7 +72,7 @@ QnnProfile::QnnProfile(
 
 Qnn_ErrorHandle_t QnnProfile::ProfileData(
     executorch::runtime::EventTracer* event_tracer) {
-  const QnnInterface& qnn_interface = implementation_.GetQnnInterface();
+  const QnnInterface& qnn_interface = implementation_->GetQnnInterface();
   const QnnProfile_EventId_t* events_ptr = nullptr;
   const QnnProfile_EventId_t* sub_events_ptr = nullptr;
   std::uint32_t num_events = 0;
@@ -167,7 +167,7 @@ Qnn_ErrorHandle_t QnnProfile::ProfileData(
 }
 
 QnnProfile::~QnnProfile() {
-  const QnnInterface& qnn_interface = implementation_.GetQnnInterface();
+  const QnnInterface& qnn_interface = implementation_->GetQnnInterface();
   if (handle_ != nullptr) {
     Qnn_ErrorHandle_t error = qnn_interface.qnn_profile_free(handle_);
     if (error != QNN_SUCCESS) {
diff --git a/backends/qualcomm/runtime/backends/QnnProfiler.h b/backends/qualcomm/runtime/backends/QnnProfiler.h
index e21385aca7d..de8fbd1d9d5 100644
--- a/backends/qualcomm/runtime/backends/QnnProfiler.h
+++ b/backends/qualcomm/runtime/backends/QnnProfiler.h
@@ -19,7 +19,7 @@ namespace qnn {
 class QnnProfile {
  public:
   explicit QnnProfile(
-      const QnnImplementation& implementation,
+      QnnImplementation* implementation,
       QnnBackend* backend,
       const QnnExecuTorchProfileLevel& profile_level);
   ~QnnProfile();
@@ -31,7 +31,7 @@ class QnnProfile {
 
  private:
   Qnn_ProfileHandle_t handle_;
-  const QnnImplementation& implementation_;
+  QnnImplementation* implementation_;
   QnnBackend* backend_;
 };
 } // namespace qnn
diff --git a/backends/qualcomm/runtime/backends/htpbackend/HtpBackend.h b/backends/qualcomm/runtime/backends/htpbackend/HtpBackend.h
index 5b5b1586cdb..3e3f727ecea 100644
--- a/backends/qualcomm/runtime/backends/htpbackend/HtpBackend.h
+++ b/backends/qualcomm/runtime/backends/htpbackend/HtpBackend.h
@@ -16,7 +16,7 @@ namespace backends {
 namespace qnn {
 class HtpBackend : public QnnBackend {
  public:
-  HtpBackend(const QnnImplementation& implementation, QnnLogger* logger)
+  HtpBackend(QnnImplementation* implementation, QnnLogger* logger)
       : QnnBackend(implementation, logger) {}
   ~HtpBackend() {}
 
diff --git a/backends/qualcomm/runtime/backends/htpbackend/HtpContext.h b/backends/qualcomm/runtime/backends/htpbackend/HtpContext.h
index 88660db080a..d80ef95db4a 100644
--- a/backends/qualcomm/runtime/backends/htpbackend/HtpContext.h
+++ b/backends/qualcomm/runtime/backends/htpbackend/HtpContext.h
@@ -20,7 +20,7 @@ class QnnDlcManager;
 class HtpContext : public QnnContext {
  public:
   HtpContext(
-      const QnnImplementation& implementation,
+      QnnImplementation* implementation,
       QnnBackend* backend,
       QnnDevice* device,
       QnnBackendCache* cache,
diff --git a/backends/qualcomm/runtime/backends/htpbackend/HtpDevice.cpp b/backends/qualcomm/runtime/backends/htpbackend/HtpDevice.cpp
index 35a20048fc5..9c9713bae09 100644
--- a/backends/qualcomm/runtime/backends/htpbackend/HtpDevice.cpp
+++ b/backends/qualcomm/runtime/backends/htpbackend/HtpDevice.cpp
@@ -376,7 +376,7 @@ void HtpDevice::ReleasePerformanceVote() {
 
 Error HtpDevice::AfterCreateDevice() {
   if (IsPerfModeEnabled()) {
-    const QnnInterface& qnn_interface = implementation_.GetQnnInterface();
+    const QnnInterface& qnn_interface = implementation_->GetQnnInterface();
     Qnn_ErrorHandle_t error = QNN_SUCCESS;
 
     // Get htp_perf_infra
diff --git a/backends/qualcomm/runtime/backends/htpbackend/HtpDevice.h b/backends/qualcomm/runtime/backends/htpbackend/HtpDevice.h
index 9052deb6b52..5bfb4aa4b79 100644
--- a/backends/qualcomm/runtime/backends/htpbackend/HtpDevice.h
+++ b/backends/qualcomm/runtime/backends/htpbackend/HtpDevice.h
@@ -24,7 +24,7 @@ namespace qnn {
 class HtpDevice : public QnnDevice {
  public:
   HtpDevice(
-      const QnnImplementation& implementation,
+      QnnImplementation* implementation,
       QnnLogger* logger,
       const SocInfo* soc_info,
       const QnnExecuTorchHtpBackendOptions* htp_options)
diff --git a/backends/qualcomm/runtime/backends/htpbackend/HtpGraph.h b/backends/qualcomm/runtime/backends/htpbackend/HtpGraph.h
index c3add50d08b..e253643bba5 100644
--- a/backends/qualcomm/runtime/backends/htpbackend/HtpGraph.h
+++ b/backends/qualcomm/runtime/backends/htpbackend/HtpGraph.h
@@ -19,7 +19,7 @@ namespace qnn {
 class HtpGraph : public QnnGraph {
  public:
   HtpGraph(
-      const QnnImplementation& implementation,
+      QnnImplementation* implementation,
       QnnBackend* backend,
       QnnContext* context,
       const QnnExecuTorchProfileLevel& profile_level,
diff --git a/backends/qualcomm/runtime/backends/irbackend/IrBackend.h b/backends/qualcomm/runtime/backends/irbackend/IrBackend.h
index ddeb3a24460..72bb59c84f9 100644
--- a/backends/qualcomm/runtime/backends/irbackend/IrBackend.h
+++ b/backends/qualcomm/runtime/backends/irbackend/IrBackend.h
@@ -18,7 +18,7 @@ namespace backends {
 namespace qnn {
 class IrBackend : public QnnBackend {
  public:
-  IrBackend(const QnnImplementation& implementation, QnnLogger* logger)
+  IrBackend(QnnImplementation* implementation, QnnLogger* logger)
       : QnnBackend(implementation, logger) {}
   ~IrBackend() {}
 
diff --git a/backends/qualcomm/runtime/backends/irbackend/aarch64/QnnDlcManager.cpp b/backends/qualcomm/runtime/backends/irbackend/aarch64/QnnDlcManager.cpp
index d8c09dabcbe..6512b5730b5 100644
--- a/backends/qualcomm/runtime/backends/irbackend/aarch64/QnnDlcManager.cpp
+++ b/backends/qualcomm/runtime/backends/irbackend/aarch64/QnnDlcManager.cpp
@@ -19,9 +19,7 @@ namespace qnn {
 QnnDlcManager::QnnDlcManager(
     const QnnExecuTorchContextBinary& qnn_context_blob,
     const QnnExecuTorchOptions* options)
-    : qnn_loaded_backend_(""),
-      qnn_context_blob_(qnn_context_blob),
-      options_(options) {
+    : qnn_context_blob_(qnn_context_blob), options_(options) {
   if (options_ == nullptr) {
     QNN_EXECUTORCH_LOG_ERROR(
         "Fail to create QnnDlcManager, options is nullptr");
@@ -36,16 +34,18 @@ Error QnnDlcManager::Create() {
   return Error::Ok;
 }
 
-Error QnnDlcManager::Configure() {
+Error QnnDlcManager::Configure(const std::vector<std::string>& graph_names) {
   return Error::Ok;
 }
 
-Error QnnDlcManager::SetUpDlcEnvironment(const Qnn_Version_t& coreApiVersion) {
+Error QnnDlcManager::SetUpDlcEnvironment(
+    const Qnn_Version_t& coreApiVersion,
+    const std::vector<std::string>& graph_names) {
   return Error::Ok;
 }
 
 Error QnnDlcManager::RegisterGraphsFromDLC(
-    const QnnImplementation& implementation,
+    QnnImplementation* implementation,
     QnnBackend* backend,
     QnnContext* context,
     QnnBackendCache* cache) {
@@ -103,7 +103,7 @@ Error QnnDlcManager::RegisterGraphsFromDLC(
   snprintf(dlc_path, sizeof(dlc_path), "/proc/self/fd/%d", fd);
 
   const QNN_INTERFACE_VER_TYPE& interfaceVer =
-      implementation.GetQnnInterface().GetInterfaceVer();
+      implementation->GetQnnInterface().GetInterfaceVer();
 
   if (composeGraphsFromDlc(
           /*backendHandle=*/backend->GetHandle(),
@@ -133,9 +133,7 @@ Error QnnDlcManager::RegisterGraphsFromDLC(
   return Error::Ok;
 }
 
-void QnnDlcManager::ResetBackendParams() {}
-void QnnDlcManager::ResetLogger() {}
-void QnnDlcManager::TerminateAllBackends() {}
+void QnnDlcManager::Destroy() {}
 
 } // namespace qnn
 } // namespace backends
diff --git a/backends/qualcomm/runtime/backends/irbackend/x86_64/QnnDlcManager.cpp b/backends/qualcomm/runtime/backends/irbackend/x86_64/QnnDlcManager.cpp
index 280751cf160..f3300026b64 100644
--- a/backends/qualcomm/runtime/backends/irbackend/x86_64/QnnDlcManager.cpp
+++ b/backends/qualcomm/runtime/backends/irbackend/x86_64/QnnDlcManager.cpp
@@ -16,9 +16,7 @@ namespace qnn {
 QnnDlcManager::QnnDlcManager(
     const QnnExecuTorchContextBinary& qnn_context_blob,
     const QnnExecuTorchOptions* options)
-    : qnn_loaded_backend_(""),
-      qnn_context_blob_(qnn_context_blob),
-      options_(options) {
+    : qnn_context_blob_(qnn_context_blob), options_(options) {
   if (options_ == nullptr) {
     QNN_EXECUTORCH_LOG_ERROR(
         "Fail to create QnnDlcManager, options is nullptr");
@@ -26,52 +24,51 @@ QnnDlcManager::QnnDlcManager(
 }
 
 Error QnnDlcManager::LoadQnnIrLibrary() {
-  qnn_loaded_backend_ = QnnImplementation(library_name_);
-  Error ret = qnn_loaded_backend_.Load(nullptr);
+  backend_bundle_ptr_->implementation =
+      std::make_unique<QnnImplementation>(library_name_);
+  Error ret = backend_bundle_ptr_->implementation->Load(nullptr);
   return ret;
 }
 
 Error QnnDlcManager::Create() {
-  backend_params_ptr_->qnn_backend_ptr_ =
-      std::make_unique<IrBackend>(qnn_loaded_backend_, logger_.get());
+  backend_bundle_ptr_->qnn_backend_ptr = std::make_unique<IrBackend>(
+      backend_bundle_ptr_->implementation.get(),
+      backend_bundle_ptr_->qnn_logger_ptr.get());
 
-  backend_params_ptr_->qnn_device_ptr_ =
-      std::make_unique<QnnDevice>(qnn_loaded_backend_, logger_.get());
+  backend_bundle_ptr_->qnn_device_ptr = std::make_unique<QnnDevice>(
+      backend_bundle_ptr_->implementation.get(),
+      backend_bundle_ptr_->qnn_logger_ptr.get());
 
   backend_params_ptr_->qnn_backend_cache_ptr_ =
       std::make_unique<QnnBackendCache>(qnn_context_blob_);
 
   backend_params_ptr_->qnn_context_ptr_ = std::make_unique<IrContext>(
-      qnn_loaded_backend_,
-      backend_params_ptr_->qnn_backend_ptr_.get(),
-      backend_params_ptr_->qnn_device_ptr_.get(),
+      backend_bundle_ptr_->implementation.get(),
+      backend_bundle_ptr_->qnn_backend_ptr.get(),
+      backend_bundle_ptr_->qnn_device_ptr.get(),
       backend_params_ptr_->qnn_backend_cache_ptr_.get(),
       nullptr);
 
   backend_params_ptr_->qnn_graph_ptr_ = std::make_unique<QnnGraph>(
-      qnn_loaded_backend_,
-      backend_params_ptr_->qnn_backend_ptr_.get(),
+      backend_bundle_ptr_->implementation.get(),
+      backend_bundle_ptr_->qnn_backend_ptr.get(),
       backend_params_ptr_->qnn_context_ptr_.get(),
       get_option(options_->profile_level()));
   backend_params_ptr_->backend_init_state_ =
       BackendInitializeState::INITIALIZED;
-  return backend_params_ptr_->qnn_backend_ptr_->VerifyQNNSDKVersion();
+  return backend_bundle_ptr_->qnn_backend_ptr->VerifyQNNSDKVersion();
 }
 
-Error QnnDlcManager::Configure() {
+Error QnnDlcManager::Configure(const std::vector<std::string>& graph_names) {
   ET_CHECK_OR_RETURN_ERROR(
       backend_params_ptr_ != nullptr, Internal, "Failed to load Qnn backend.");
-  std::vector<std::string> graph_names;
-  for (auto name : *options_->graph_name()) {
-    graph_names.emplace_back(name->str());
-  }
   ET_CHECK_OR_RETURN_ERROR(
       backend_params_ptr_->qnn_backend_cache_ptr_->Configure(graph_names) ==
           Error::Ok,
       Internal,
       "Fail to configure Qnn backend cache");
   ET_CHECK_OR_RETURN_ERROR(
-      backend_params_ptr_->qnn_backend_ptr_->Configure(
+      backend_bundle_ptr_->qnn_backend_ptr->Configure(
           options_->op_package_options()) == Error::Ok,
       Internal,
       "Fail to configure Qnn backend");
@@ -92,7 +89,9 @@ Error QnnDlcManager::Configure() {
   return Error::Ok;
 }
 
-Error QnnDlcManager::SetUpDlcEnvironment(const Qnn_Version_t& coreApiVersion) {
+Error QnnDlcManager::SetUpDlcEnvironment(
+    const Qnn_Version_t& coreApiVersion,
+    const std::vector<std::string>& graph_names) {
   ET_CHECK_MSG(
       (coreApiVersion.major >= 2 && coreApiVersion.minor >= 23),
       "Qnn API version %u.%u.%u is not supported for Qnn IR backend, The minimum supported version is 2.23.0 or QNN_SDK version 2.30.0",
@@ -105,36 +104,33 @@ Error QnnDlcManager::SetUpDlcEnvironment(const Qnn_Version_t& coreApiVersion) {
       Internal,
       "Fail to Load Qnn IR library.");
 
-  logger_ = std::make_unique<QnnLogger>(
-      qnn_loaded_backend_, LoggingCallback, get_option(options_->log_level()));
+  backend_bundle_ptr_->qnn_logger_ptr = std::make_unique<QnnLogger>(
+      backend_bundle_ptr_->implementation.get(),
+      LoggingCallback,
+      get_option(options_->log_level()));
 
   ET_CHECK_OR_RETURN_ERROR(
       Create() == Error::Ok, Internal, "Failed to load Qnn IR backend.");
 
   ET_CHECK_OR_RETURN_ERROR(
-      Configure() == Error::Ok, Internal, "Fail to configure IR backend.");
+      Configure(graph_names) == Error::Ok,
+      Internal,
+      "Fail to configure IR backend.");
 
   return Error::Ok;
 }
 
 Error QnnDlcManager::RegisterGraphsFromDLC(
-    const QnnImplementation& implementation,
+    QnnImplementation* implementation,
     QnnBackend* backend,
     QnnContext* context,
     QnnBackendCache* cache) {
   return Error::Ok;
 }
 
-void QnnDlcManager::ResetBackendParams() {
+void QnnDlcManager::Destroy() {
   backend_params_ptr_.reset(new BackendConfigParameters());
-}
-
-void QnnDlcManager::ResetLogger() {
-  logger_.reset();
-}
-
-void QnnDlcManager::TerminateAllBackends() {
-  qnn_loaded_backend_.TerminateAllBackends();
+  backend_bundle_ptr_.reset(new QnnBackendBundle());
 }
 
 } // namespace qnn
diff --git a/backends/qualcomm/serialization/qc_compiler_spec.fbs b/backends/qualcomm/serialization/qc_compiler_spec.fbs
index 85affe3464d..4fb55ccbcdc 100644
--- a/backends/qualcomm/serialization/qc_compiler_spec.fbs
+++ b/backends/qualcomm/serialization/qc_compiler_spec.fbs
@@ -110,10 +110,6 @@ table QnnExecuTorchHtpBackendOptions {
   /// Signed or unsigned HTP PD session. The default PD session is unsigned.
   pd_session:QnnExecuTorchHtpPdSession;
 
-  /// Optional parameter specifying the directory of QNN Skel library. Only
-  /// useful for backends which have a Skel library.
-  skel_library_dir:string;
-
   /// With using conv hmx with short depths, we might have better performance,
   /// but convolution that have short depth and/or weights that are not
   /// symmetric could exhibit inaccurate results.
@@ -217,10 +213,6 @@ table QnnExecuTorchOptions {
   /// Optional backend specific options for the HTP backend.
   backend_options:QnnExecuTorchBackendOptions;
 
-  /// Optional parameter to create qnn graph if QNN context blob is not given
-  /// It could be a list of names only when doing weight-sharing lowering
-  graph_name:[string];
-
   /// Optional parameter to override the QNN backend library.
   library_path:string;
 
diff --git a/backends/qualcomm/serialization/qc_schema.py b/backends/qualcomm/serialization/qc_schema.py
index c188c555c41..02ccfd404aa 100644
--- a/backends/qualcomm/serialization/qc_schema.py
+++ b/backends/qualcomm/serialization/qc_schema.py
@@ -122,7 +122,6 @@ class QnnExecuTorchHtpBackendOptions:
     )
     precision: QnnExecuTorchHtpPrecision = QnnExecuTorchHtpPrecision.kHtpQuantized
     pd_session: QnnExecuTorchHtpPdSession = QnnExecuTorchHtpPdSession.kHtpUnsignedPd
-    skel_library_dir: str = ""
     use_conv_hmx: bool = True
     use_dlbc: bool = False
     use_fold_relu: bool = True
@@ -188,7 +187,6 @@ class QnnExecuTorchOpPackageOptions:
 class QnnExecuTorchOptions:
     soc_info: SocInfo
     backend_options: QnnExecuTorchBackendOptions
-    graph_name: List[str] = field(default_factory=lambda: ["forward"])
     library_path: str = ""
     log_level: QnnExecuTorchLogLevel = QnnExecuTorchLogLevel.kLogOff
     online_prepare: bool = False
diff --git a/backends/qualcomm/tests/test_qnn_delegate.py b/backends/qualcomm/tests/test_qnn_delegate.py
index a9403f98b17..269d3fbe334 100644
--- a/backends/qualcomm/tests/test_qnn_delegate.py
+++ b/backends/qualcomm/tests/test_qnn_delegate.py
@@ -4531,10 +4531,8 @@ def test_qnn_backend_multi_graphs(self):
             generate_qnn_executorch_compiler_spec(
                 soc_model=self.chipset_table[TestQNN.model],
                 backend_options=backend_options,
-                graph_name=graph_name,
             )
-            for graph_name in graph_names
-        ]
+        ] * len(graph_names)
 
         modules_dict = {}
         sample_inputs_dict = {}
@@ -4739,11 +4737,7 @@ def test_qnn_backend_context_extraction(self):
             lowered_module = edge_prog_mgr.exported_program().graph_module._modules[
                 "lowered_module_0"
             ]
-            qnn_mgr = PyQnnManagerAdaptor.QnnManager(
-                lowered_module.compile_specs[0].value
-            )
-            qnn_mgr.Init()
-            binary = qnn_mgr.StripProtocol(lowered_module.processed_bytes)
+            binary = PyQnnManagerAdaptor.StripProtocol(lowered_module.processed_bytes)
             validate(binary)
 
     def test_qnn_backend_dump_context_from_pte(self):
@@ -5348,10 +5342,8 @@ def test_qnn_backend_multi_graphs(self):
             generate_qnn_executorch_compiler_spec(
                 soc_model=self.chipset_table[TestQNN.model],
                 backend_options=backend_options,
-                graph_name=graph_name,
             )
-            for graph_name in graph_names
-        ]
+        ] * len(graph_names)
         modules_dict = {}
         sample_inputs_dict = {}
         compiler_specs_dict = {}
@@ -5566,11 +5558,7 @@ def test_qnn_backend_context_extraction(self):
             lowered_module = edge_prog_mgr.exported_program().graph_module._modules[
                 "lowered_module_0"
             ]
-            qnn_mgr = PyQnnManagerAdaptor.QnnManager(
-                lowered_module.compile_specs[0].value
-            )
-            qnn_mgr.Init()
-            binary = qnn_mgr.StripProtocol(lowered_module.processed_bytes)
+            binary = PyQnnManagerAdaptor.StripProtocol(lowered_module.processed_bytes)
             validate(binary)
 
     def test_qnn_backend_dump_context_from_pte(self):
@@ -8376,6 +8364,8 @@ def test_cli(self):
                 "--input_list",
                 f"{tmp_dir}/input_list",
             ]
+            if self.host:
+                cmds.extend(["--host", self.host])
             subprocess.run(cmds, stdout=subprocess.DEVNULL)
             self.assertTrue(os.path.isfile(f"{tmp_dir}/e_out/output_0_0.pt"))
 
diff --git a/backends/qualcomm/utils/qnn_manager_lifecycle.py b/backends/qualcomm/utils/qnn_manager_lifecycle.py
new file mode 100644
index 00000000000..2e1ba7fd2d7
--- /dev/null
+++ b/backends/qualcomm/utils/qnn_manager_lifecycle.py
@@ -0,0 +1,88 @@
+import contextlib
+import logging
+import threading
+from typing import Dict, List
+
+import executorch.backends.qualcomm.python.PyQnnManagerAdaptor as PyQnnManager
+
+from executorch.backends.qualcomm.partition.utils import generate_qnn_executorch_option
+from executorch.backends.qualcomm.serialization.qc_schema import (
+    QnnExecuTorchBackendType,
+)
+from executorch.backends.qualcomm.serialization.qc_schema_serialize import (
+    flatbuffer_to_option,
+)
+from executorch.exir.backend.compile_spec_schema import CompileSpec
+
+# Thread-local storage for QnnManager instances
+_current_qnn_managers = threading.local()
+
+
+class QnnManagerRegistry:
+    def __init__(self):
+        # Registry stores {backend_type: QnnManager instance}
+        self._registry = {}
+
+    def get_or_create_qnn_manager(
+        self, backend_type: QnnExecuTorchBackendType, option: bytes
+    ) -> PyQnnManager.QnnManager:
+        if backend_type not in self._registry:
+            qnn_manager = PyQnnManager.QnnManager(option)
+            qnn_manager.InitBackend()
+            self._registry[backend_type] = qnn_manager
+        return self._registry[backend_type]
+
+    def destroy_qnn_manager(self, backend_type: QnnExecuTorchBackendType):
+        if backend_type in self._registry:
+            self._registry[backend_type].Destroy()
+            del self._registry[backend_type]
+        else:
+            logging.warning(
+                f"Attempted to destroy non-existent QnnManager for backend type {backend_type.name}"
+            )
+
+
+@contextlib.contextmanager
+def QnnManagerContext(compile_specs: Dict[str, List[CompileSpec]]):
+    # Create a new registry for the current context
+    current_context_registry = QnnManagerRegistry()
+    _current_qnn_managers.active_registry = current_context_registry
+
+    backend_types_in_this_context = set()
+
+    try:
+        for compile_spec_list in compile_specs.values():
+            option = generate_qnn_executorch_option(compile_spec_list)
+            python_options = flatbuffer_to_option(option)
+            backend_type = python_options.backend_options.backend_type
+
+            # Use the current_context_registry to get/create the manager
+            current_context_registry.get_or_create_qnn_manager(backend_type, option)
+            backend_types_in_this_context.add(backend_type)
+        yield
+    finally:
+        # Destroy only the managers created within this context
+        for backend_type in backend_types_in_this_context:
+            current_context_registry.destroy_qnn_manager(backend_type)
+
+        # Clear the active registry reference
+        _current_qnn_managers.active_registry = None
+
+
+def get_current_qnn_manager(
+    backend_type: QnnExecuTorchBackendType, compile_specs: List[CompileSpec]
+) -> PyQnnManager.QnnManager:
+    """
+    Retrieves the QnnManager instance active for the current QnnManagerContext invocation.
+    Return a new QnnManger if no QnnManager is active for the given backend_type in the current context.
+    """
+    active_registry = getattr(_current_qnn_managers, "active_registry", None)
+    if active_registry is None or backend_type not in active_registry._registry:
+        logging.warning(
+            f"No QnnManager active for backend type {backend_type.name} in the current QnnManagerContext. "
+            "It would be better to use to_edge_transform_and_lower_to_qnn to lowering to QNN Backend."
+        )
+        return QnnManagerRegistry().get_or_create_qnn_manager(
+            backend_type, generate_qnn_executorch_option(compile_specs)
+        )
+    return active_registry._registry[backend_type]
diff --git a/backends/qualcomm/utils/utils.py b/backends/qualcomm/utils/utils.py
index 20a1d3c0f72..e4d0dc03d6d 100644
--- a/backends/qualcomm/utils/utils.py
+++ b/backends/qualcomm/utils/utils.py
@@ -50,6 +50,7 @@
     QCOM_QNN_COMPILE_SPEC,
     QCOM_QUANTIZED_IO,
 )
+from executorch.backends.qualcomm.utils.qnn_manager_lifecycle import QnnManagerContext
 
 from executorch.exir import EdgeCompileConfig, ExirExportedProgram, to_edge
 from executorch.exir.backend.compile_spec_schema import CompileSpec
@@ -185,8 +186,9 @@ def replace_linear(module: torch.nn.Module):
 def dump_context_from_pte(pte_path) -> List[str]:
     """
     Dump compiled binaries under the same directory of pte_path.
-    For partitioned graph, there will be multiple files with names f"{graph_name}_{index}".
-    Where 'graph_name' comes from the compiler_specs and 'index' represents the execution order.
+    For partitioned graph, there will be multiple files with names f"{method_name}_{index}".
+    'method_name' refers to the name of a method in the nn.Module that was traced to
+    generate this program, while 'index' indicates the order of execution.
 
     Args:
         pte_path (str): The path of generated pte.
@@ -201,14 +203,6 @@ def dump_context_from_pte(pte_path) -> List[str]:
     program = deserialize_pte_binary(program_data).program
 
     ctx_path = os.path.dirname(pte_path)
-    dummy_compiler_specs = generate_qnn_executorch_compiler_spec(
-        soc_model=QcomChipset.SM8650,
-        backend_options=generate_htp_compiler_spec(use_fp16=False),
-    )
-    qnn_mgr = PyQnnManagerAdaptor.QnnManager(
-        generate_qnn_executorch_option(dummy_compiler_specs)
-    )
-    qnn_mgr.Init()
     dumpfiles = []
     for execution_plan in program.execution_plan:
         for i, delegate in enumerate(execution_plan.delegates):
@@ -216,7 +210,7 @@ def dump_context_from_pte(pte_path) -> List[str]:
                 processed_bytes = program.backend_delegate_data[
                     delegate.processed.index
                 ].data
-                binary = qnn_mgr.StripProtocol(processed_bytes)
+                binary = PyQnnManagerAdaptor.StripProtocol(processed_bytes)
                 file_extension = ".bin"
                 if len(binary) == 0:
                     binary = processed_bytes
@@ -442,15 +436,15 @@ def ensure_graph_specific_dict(value, graph_names):
         transform_passes[graph_name] = QnnPassManager().get_to_edge_transform_passes(
             ep, passes_job=passes_job[graph_name], dep_table=dep_table[graph_name]
         )
-
-    return to_edge_transform_and_lower(
-        aten_programs,
-        transform_passes=transform_passes,
-        partitioner=qnn_partitioners,
-        constant_methods=constant_methods,
-        compile_config=qnn_edge_config(),
-        generate_etrecord=generate_etrecord,
-    )
+    with QnnManagerContext(compiler_specs):
+        return to_edge_transform_and_lower(
+            aten_programs,
+            transform_passes=transform_passes,
+            partitioner=qnn_partitioners,
+            constant_methods=constant_methods,
+            compile_config=qnn_edge_config(),
+            generate_etrecord=generate_etrecord,
+        )
 
 
 def capture_program(
@@ -988,7 +982,6 @@ def generate_qnn_executorch_compiler_spec(
     optrace: bool = False,
     shared_buffer: bool = False,
     is_from_context_binary: bool = False,
-    graph_name: str = "forward",
     op_package_options: QnnExecuTorchOpPackageOptions = None,
 ) -> List[CompileSpec]:
     """
@@ -1017,7 +1010,6 @@ def generate_qnn_executorch_compiler_spec(
         shared_buffer: Enables usage of shared buffer between application
             and backend for graph I/O.
         is_from_context_binary: True if current graph comes from pre-built context binary.
-        graph_name: Assign unique graph name if lowering multiple methods.
         op_package_options: Optional structure to specify op packages
             loaded and used by the backend.
 
@@ -1042,7 +1034,6 @@ def generate_qnn_executorch_compiler_spec(
     qnn_executorch_options = QnnExecuTorchOptions(
         _soc_info_table[soc_model], backend_options
     )
-    qnn_executorch_options.graph_name = [graph_name]
     qnn_executorch_options.log_level = (
         QnnExecuTorchLogLevel.kLogLevelDebug
         if debug
diff --git a/docs/source/backends-qualcomm.md b/docs/source/backends-qualcomm.md
index 6c5397f02be..ea5aadfb8c0 100644
--- a/docs/source/backends-qualcomm.md
+++ b/docs/source/backends-qualcomm.md
@@ -27,7 +27,7 @@ Qualcomm AI Engine Direct is also referred to as QNN in the source and documenta
 is designed to provide unified, low-level APIs for AI development.
 
 Developers can interact with various accelerators on Qualcomm SoCs with these set of APIs, including
-Kryo CPU, Adreno GPU, and Hexagon processors. More details can be found [here](https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-50/overview.html).
+Kryo CPU, Adreno GPU, and Hexagon processors. More details can be found [here](https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-10/overview.html).
 
 Currently, this ExecuTorch Backend can delegate AI computations to Hexagon processors through Qualcomm AI Engine Direct APIs.
 
diff --git a/examples/qualcomm/custom_op/README.md b/examples/qualcomm/custom_op/README.md
index e3d6b216d8b..98f6886eefb 100644
--- a/examples/qualcomm/custom_op/README.md
+++ b/examples/qualcomm/custom_op/README.md
@@ -10,7 +10,7 @@ This folder contains examples demonstrating how to register custom operators int
 
 - Please finish [setup QNN backend](../../../docs/source/backends-qualcomm.md).
 
-- Please follow [the instructions to install proper version of Hexagon SDK and Hexagon Tools.](https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-50/linux_setup.html#htp-and-dsp)
+- Please follow [the instructions to install proper version of Hexagon SDK and Hexagon Tools.](https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-10/linux_setup.html#htp-and-dsp)
   - This example is verified with SM8650 (Snapdragon 8 Gen 3).
   - Install hexagon-sdk-5.4.0, hexagon-sdk-6.0.0, and hexagon tool 8.8.02
   ```bash
@@ -91,7 +91,7 @@ For now, only support one output tensors.
 * Data type: backend specific
 * Shape: Any
 
-Consult the Qualcomm AI Engine Direct documentation for information on [generation op packages](https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-50/op_def_schema.html).
+Consult the Qualcomm AI Engine Direct documentation for information on [generation op packages](https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-10/op_def_schema.html).
 
 ## Registering Op Packages
 After an op package library has been generated, certain information needs to be passed to the `compile_spec` in order to properly delegate the nodes. [The example script](custom_ops_1.py) shows how to construct the `QnnExecuTorchOpPackageOptions` and register op packages with the `compile spec`.
diff --git a/examples/qualcomm/custom_op/custom_ops_1.py b/examples/qualcomm/custom_op/custom_ops_1.py
index e84ee87a251..1745e2df7fa 100644
--- a/examples/qualcomm/custom_op/custom_ops_1.py
+++ b/examples/qualcomm/custom_op/custom_ops_1.py
@@ -69,16 +69,14 @@ def annotate_custom(gm: torch.fx.GraphModule) -> None:
     This function is specific for custom op.
     The source_fn of the rewritten nn module turns out to be "my_ops.mul3.default"
     """
-    from executorch.backends.qualcomm.quantizer.annotators import (
-        _is_annotated,
-        QUANT_ANNOTATION_KEY,
-    )
+    from executorch.backends.qualcomm.quantizer.annotators import _is_annotated
 
     from executorch.backends.qualcomm.quantizer.qconfig import (
         get_ptq_per_channel_quant_config,
     )
     from torch.fx import Node
     from torchao.quantization.pt2e.quantizer import QuantizationAnnotation
+    from torchao.quantization.pt2e.quantizer.quantizer import Q_ANNOTATION_KEY
 
     quantization_config = get_ptq_per_channel_quant_config()
     for node in gm.graph.nodes:
@@ -95,7 +93,7 @@ def annotate_custom(gm: torch.fx.GraphModule) -> None:
         input_spec = quantization_config.input_activation
         input_qspec_map[input_act] = input_spec
 
-        node.meta[QUANT_ANNOTATION_KEY] = QuantizationAnnotation(
+        node.meta[Q_ANNOTATION_KEY] = QuantizationAnnotation(
             input_qspec_map=input_qspec_map,
             output_qspec=quantization_config.output_activation,
             _annotated=True,
diff --git a/examples/qualcomm/oss_scripts/llama/artifacts/stories260k_hybrid_llama_qnn.pte b/examples/qualcomm/oss_scripts/llama/artifacts/stories260k_hybrid_llama_qnn.pte
index 198b96e5b9b..5dc70df4253 100644
Binary files a/examples/qualcomm/oss_scripts/llama/artifacts/stories260k_hybrid_llama_qnn.pte and b/examples/qualcomm/oss_scripts/llama/artifacts/stories260k_hybrid_llama_qnn.pte differ
diff --git a/examples/qualcomm/oss_scripts/llama/llama.py b/examples/qualcomm/oss_scripts/llama/llama.py
index 29212c7855b..aa0f09d413e 100755
--- a/examples/qualcomm/oss_scripts/llama/llama.py
+++ b/examples/qualcomm/oss_scripts/llama/llama.py
@@ -425,6 +425,9 @@ def compile(
         params_path = decoder_model_config.params_path
     with open(params_path) as f:
         kv_config = ModelArgs(**json.load(f))
+    if args.decoder_model in {"gemma-2b", "gemma3-1b"}:
+        # For gemma, we have preprocessed the weight of rmsnorm
+        kv_config.norm_type = "rmsnorm"
 
     # get quant recipe
     quant_recipe: StaticLLMQuantRecipe = decoder_model_config.quant_recipe(True)
@@ -808,10 +811,8 @@ def permute(w, heads, partial_rotary_dim):
                 soc_model=get_soc_to_chipset_map()[args.model],
                 backend_options=backend_options,
                 shared_buffer=args.shared_buffer,
-                graph_name=graph_name,
             )
-            for graph_name in graph_names
-        ]
+        ] * len(graph_names)
 
         llama_instance_list[1].save_logits_quant_attrs()
         edge_prog_mgr = to_edge_transform_and_lower_to_qnn(
diff --git a/examples/qualcomm/qaihub_scripts/utils/README.md b/examples/qualcomm/qaihub_scripts/utils/README.md
index f0fe7f6acca..ade61a1fa76 100644
--- a/examples/qualcomm/qaihub_scripts/utils/README.md
+++ b/examples/qualcomm/qaihub_scripts/utils/README.md
@@ -32,7 +32,7 @@ If users are interested in well-known applications, [Qualcomm AI HUB](https://ai
   # target chipset is `SM8650`
   python -m qai_hub_models.models.quicksrnetlarge_quantized.export --target-runtime qnn --chipset qualcomm-snapdragon-8gen3
   ```
-* The compiled model library will be located under `$MY_WS/build/quicksrnetlarge_quantized/quicksrnetlarge_quantized.so`. This model library maps to the artifacts generated by SDK tools mentioned in `Integration workflow` section on [Qualcomm AI Engine Direct document](https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-50/overview.html).
+* The compiled model library will be located under `$MY_WS/build/quicksrnetlarge_quantized/quicksrnetlarge_quantized.so`. This model library maps to the artifacts generated by SDK tools mentioned in `Integration workflow` section on [Qualcomm AI Engine Direct document](https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-10/overview.html).
 
 ### Compiling Program
 
@@ -82,7 +82,7 @@ If users are interested in well-known applications, [Qualcomm AI HUB](https://ai
   # generally we would have same layout for input / output tensors: e.g. either NHWC or NCHW
   # this might not be true under different converter configurations
   # learn more with converter tool from Qualcomm AI Engine Direct documentation
-  # https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-50/tools.html#model-conversion
+  # https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-10/tools.html#model-conversion
   with open('output__142.pt', 'rb') as f:
       buffer = io.BytesIO(f.read())
   img = torch.load(buffer, weights_only=False)