pytorch · shewu-quic · Oct 22, 2025 · Nov 20, 2025 · Nov 20, 2025
@@ -150,6 +150,7 @@ add_library(qnn_executorch_backend SHARED)
 add_library(qnn_executorch_header INTERFACE)
 add_library(qnn_executorch_logging STATIC)
 add_library(qnn_factory STATIC)
+add_library(qnn_backend_unified_registry STATIC)
 add_library(qnn_function_interface INTERFACE)
 add_library(qnn_graph STATIC)
 add_library(qnn_implementation STATIC)
@@ -213,13 +214,30 @@ target_link_libraries(
 )
 
 target_link_libraries(
-  qnn_dlc_manager PRIVATE qnn_factory qnn_backend qnn_device qnn_context
-                          qnn_graph qnn_mem_manager
+  qnn_backend_unified_registry PRIVATE qnn_schema qnn_backend qnn_device
+                                       qnn_implementation
 )
 
 target_link_libraries(
-  qnn_manager PRIVATE qnn_factory wrappers qnn_schema utils shared_buffer
-                      qnn_dlc_manager
+  qnn_dlc_manager
+  PRIVATE qnn_factory
+          qnn_backend_unified_registry
+          qnn_backend
+          qnn_device
+          qnn_context
+          qnn_graph
+          qnn_mem_manager
+)
+
+target_link_libraries(
+  qnn_manager
+  PRIVATE qnn_factory
+          qnn_backend_unified_registry
+          wrappers
+          qnn_schema
+          utils
+          shared_buffer
+          qnn_dlc_manager
 )
 target_link_libraries(
   qnn_executorch_backend

@@ -28,15 +28,39 @@ std::string GetQnnSdkBuildId(std::string library_path) {
   if (err != QNN_SUCCESS || id == nullptr) {
     throw std::runtime_error("Failed to get QNN backend build ID");
   }
-  qnn_loaded_backend.TerminateAllBackends();
+  qnn_loaded_backend.Unload();
   return std::string(id);
 }
 
+py::array_t<char> StripProtocol(const py::bytes& preprocessed_binary) {
+  py::buffer_info info(py::buffer(preprocessed_binary).request());
+
+  void* buf_ptr = nullptr;
+  size_t buf_size = 0;
+  // check if it's a qnn context binary
+  auto [status, signature, ctx_size, ctx_bin] =
+      QnnContextCustomProtocol().DeserializeContextCustomBuffer(info.ptr);
+
+  if (status == Error::Ok) {
+    buf_size = ctx_size;
+    buf_ptr = ctx_bin;
+  } else {
+    // the format should be DLC, return nothing here
+    return py::array_t<char>(0);
+  }
+
+  auto result = py::array_t<char>(buf_size);
+  auto result_buffer = result.request();
+  std::memcpy(result_buffer.ptr, buf_ptr, buf_size);
+  return result;
+}
+
 PYBIND11_MODULE(PyQnnManagerAdaptor, m) {
   // TODO: Add related documents for configurations listed below
   using namespace qnn_delegate;
 
   m.def("GetQnnSdkBuildId", &GetQnnSdkBuildId);
+  m.def("StripProtocol", &StripProtocol);
   py::class_<QnnExecuTorchContextBinary>(m, "QnnExecuTorchContextBinary")
       .def(py::init<>());
 
@@ -49,6 +73,8 @@ PYBIND11_MODULE(PyQnnManagerAdaptor, m) {
       .def(py::init<const py::bytes&>())
       .def(py::init<const py::bytes&, const py::bytes&>())
       .def("Init", &PyQnnManager::Init)
+      .def("InitBackend", &PyQnnManager::InitBackend)
+      .def("InitContext", &PyQnnManager::InitContext)
       .def("IsNodeSupportedByBackend", &PyQnnManager::IsNodeSupportedByBackend)
       .def(
           "Compile",
@@ -57,6 +83,7 @@ PYBIND11_MODULE(PyQnnManagerAdaptor, m) {
               std::vector<std::vector<std::shared_ptr<OpWrapper>>>&>(
               &PyQnnManager::Compile))
       .def("Destroy", &PyQnnManager::Destroy)
+      .def("DestroyContext", &PyQnnManager::DestroyContext)
       .def("IsAvailable", &PyQnnManager::IsAvailable)
       .def("IsTensorDump", &PyQnnManager::IsTensorDump)
       .def("AllocateTensor", &PyQnnManager::AllocateTensor)
@@ -66,8 +93,7 @@ PYBIND11_MODULE(PyQnnManagerAdaptor, m) {
       .def("GetSpillFillBufferSize", &PyQnnManager::GetSpillFillBufferSize)
       .def(
           "MakeBinaryInfo",
-          py::overload_cast<const py::bytes&>(&PyQnnManager::MakeBinaryInfo))
-      .def("StripProtocol", &PyQnnManager::StripProtocol);
+          py::overload_cast<const py::bytes&>(&PyQnnManager::MakeBinaryInfo));
 }
 } // namespace qnn
 } // namespace backends

@@ -50,7 +50,24 @@ class PyQnnManager {
   }
 
   executorch::runtime::Error Init() {
-    return qnn_manager_->Init();
+    ET_CHECK_OR_RETURN_ERROR(
+        qnn_manager_->InitBackend() == Error::Ok,
+        Internal,
+        "Fail to initailize backend");
+    ET_CHECK_OR_RETURN_ERROR(
+        qnn_manager_->InitContext() == Error::Ok,
+        Internal,
+        "Fail to initailize context");
+    return Error::Ok;
+  }
+
+  executorch::runtime::Error InitBackend() {
+    return qnn_manager_->InitBackend();
+  }
+
+  executorch::runtime::Error InitContext(
+      const std::vector<std::string>& graph_names) {
+    return qnn_manager_->InitContext(std::optional{graph_names});
   }
 
   bool IsNodeSupportedByBackend(
@@ -90,6 +107,10 @@ class PyQnnManager {
     return qnn_manager_->Destroy();
   }
 
+  void DestroyContext() {
+    return qnn_manager_->DestroyContext();
+  }
+
   bool IsAvailable() {
     return qnn_manager_->IsAvailable();
   }
@@ -148,37 +169,13 @@ class PyQnnManager {
     return result;
   }
 
-  py::array_t<char> StripProtocol(const py::bytes& preprocessed_binary) {
-    py::buffer_info info(py::buffer(preprocessed_binary).request());
-
-    void* buf_ptr = nullptr;
-    size_t buf_size = 0;
-    // check if it's a qnn context binary
-    auto [status, signature, ctx_size, ctx_bin] =
-        QnnContextCustomProtocol().DeserializeContextCustomBuffer(info.ptr);
-
-    if (status == Error::Ok) {
-      buf_size = ctx_size;
-      buf_ptr = ctx_bin;
-    } else {
-      // the format should be DLC, return nothing here
-      return py::array_t<char>(0);
-    }
-
-    auto result = py::array_t<char>(buf_size);
-    auto result_buffer = result.request();
-    std::memcpy(result_buffer.ptr, buf_ptr, buf_size);
-    return result;
-  }
-
  private:
   // Store the bytes object instead of a raw pointer so that this module will
   // keep the bytes alive.
   const py::bytes qnn_executorch_option_ptr_;
   QnnExecuTorchContextBinary qnn_executorch_context_binary_;
   std::shared_ptr<QnnManager> qnn_manager_;
   QnnContextCustomProtocol custom_context_custom_buffer_;
-  flatbuffers::FlatBufferBuilder builder_;
 };
 } // namespace qnn
 } // namespace backends

@@ -18,8 +18,8 @@ Thank you for contributing to Qualcomm AI Engine Direct delegate for ExecuTorch.
 
 ## References
 ### Qualcomm AI Engine Direct
-- [Operator Definitions](https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-50/MasterOpDef.html)
-- [Supported Operators in Backends](https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-50/operations.html#backend-supplements)
+- [Operator Definitions](https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-10/MasterOpDef.html)
+- [Supported Operators in Backends](https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-10/operations.html#backend-supplements)
 
 ### PyTorch
 - [torch.nn Operator Definitions](https://pytorch.org/docs/stable/nn.html)
@@ -124,9 +124,9 @@ It will provide more hint to the source PyTorch layer where the missing operator
         };
     } Qnn_Param_t;
     ```
-    The name value equals to the parameter name described in [Operator Definitions](https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-50/MasterOpDef.html), there are `epsilon`, `axes` for `LayerNorm` case.<br/>
+    The name value equals to the parameter name described in [Operator Definitions](https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-10/MasterOpDef.html), there are `epsilon`, `axes` for `LayerNorm` case.<br/>
 
-    If you find it hard to correlate missing operator with documentation, this [table](https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-50/SupportedOps.html) might be helpful for searching. In some cases, an exact match may not exist. Consider seeking for a math equivalent approach or notify maintainer for further analysis.
+    If you find it hard to correlate missing operator with documentation, this [table](https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-10/SupportedOps.html) might be helpful for searching. In some cases, an exact match may not exist. Consider seeking for a math equivalent approach or notify maintainer for further analysis.
 
 - **PyTorch**:<br/>
     We could also read the IO spec from [function declaration](https://github.com/pytorch/pytorch/blob/main/aten/src/ATen/native/layer_norm.cpp) mentioned in [PyTorch Documentation](#pytorch):

@@ -348,8 +348,8 @@ def generate_optrace(
         qnn_binary_file="forward_0.dlc",
     ):
         """
-        Generate Qnn HTP Optrace Profiling https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-50/htp_backend.html#qnn-htp-optrace-profiling
-        and QNN HTP Analysis Summary (QHAS) https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-50/htp_backend.html#qnn-htp-analysis-summary-qhas
+        Generate Qnn HTP Optrace Profiling https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-10/htp_backend.html#qnn-htp-optrace-profiling
+        and QNN HTP Analysis Summary (QHAS) https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-10/htp_backend.html#qnn-htp-analysis-summary-qhas
         . You can utilize the QAIRT Visualizer (https://pypi.org/project/qairt-visualizer/) to visualize the results from the files above.
         """
         graph_name, file_extension = os.path.splitext(qnn_binary_file)

@@ -8,7 +8,6 @@
 from collections import defaultdict
 from typing import Any, Callable, Dict, List, Optional, Tuple
 
-import executorch.backends.qualcomm.python.PyQnnManagerAdaptor as PyQnnManager
 import torch
 from executorch.backends.qualcomm.builders import node_visitor_manager
 from executorch.backends.qualcomm.builders.qnn_constants import OpContextLoader
@@ -21,6 +20,9 @@
     QCOM_BYPASS_NODE,
 )
 
+from executorch.backends.qualcomm.utils.qnn_manager_lifecycle import (
+    get_current_qnn_manager,
+)
 from executorch.exir.backend.backend_details import CompileSpec
 from executorch.exir.backend.canonical_partitioners.pattern_op_partitioner import (
     generate_partitions_from_list_of_nodes,
@@ -55,7 +57,8 @@ def __init__(
         skip_node_id_set: set = None,
         skip_node_op_set: set = None,
     ):
-        python_options = flatbuffer_to_option(compiler_specs[0].value)
+        option = generate_qnn_executorch_option(compiler_specs)
+        python_options = flatbuffer_to_option(option)
         self.node_visitors = node_visitor_manager.get_node_visitors(
             edge_program,
             op_package_infos=python_options.op_package_options.op_package_infos,
@@ -64,12 +67,10 @@ def __init__(
         self.skip_node_op_set = skip_node_op_set
         self.skip_node_id_set = skip_node_id_set
         self.nodes_to_wrappers = defaultdict(dict)
-        self.qnn_manager = PyQnnManager.QnnManager(
-            generate_qnn_executorch_option(compiler_specs)
+        self.qnn_manager = get_current_qnn_manager(
+            python_options.backend_options.backend_type, compiler_specs
         )
 
-        self.qnn_manager.Init()
-
     def is_node_supported(self, _, node: torch.fx.Node) -> bool:
         if node.op != "call_function" or node.target in not_supported_operator:
             return False
@@ -118,9 +119,6 @@ def is_node_supported(self, _, node: torch.fx.Node) -> bool:
         print(f"[QNN Partitioner Op Support]: {node.target.__name__} | {supported}")
         return supported
 
-    def __del__(self):
-        self.qnn_manager.Destroy()
-
 
 class QnnPartitioner(Partitioner):
     """

@@ -8,8 +8,6 @@
 from collections import defaultdict
 from typing import Dict, final, List
 
-import executorch.backends.qualcomm.python.PyQnnManagerAdaptor as PyQnnManager
-
 import torch  # noqa: F401
 from executorch.backends.qualcomm._passes.qnn_pass_manager import QnnPassManager
 from executorch.backends.qualcomm.builders.node_visitor_manager import get_node_visitors
@@ -20,7 +18,9 @@
 )
 from executorch.backends.qualcomm.serialization.qc_schema_serialize import (
     flatbuffer_to_option,
-    option_to_flatbuffer,
+)
+from executorch.backends.qualcomm.utils.qnn_manager_lifecycle import (
+    get_current_qnn_manager,
 )
 from executorch.exir.backend.backend_details import (
     BackendDetails,
@@ -30,6 +30,7 @@
 from torch.export.exported_program import ExportedProgram
 
 DEFAULT_DEBUG_HANDLE = 65535
+DEFAULT_GRAPH_NAME = "forward"
 
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.DEBUG)
@@ -99,9 +100,11 @@ def preprocess(
         compile_specs: List[CompileSpec],
     ) -> PreprocessResult:
         option = generate_qnn_executorch_option(compile_specs)
-        qnn_manager = PyQnnManager.QnnManager(option)
-        qnn_manager.Init()
         obj_options = flatbuffer_to_option(option)
+        qnn_manager = get_current_qnn_manager(
+            obj_options.backend_options.backend_type, compile_specs
+        )
+        qnn_manager.InitContext([DEFAULT_GRAPH_NAME])
         py_op_wrapper_list = QnnBackend._build_op_wrappers(
             edge_program,
             qnn_manager.IsTensorDump(),
@@ -118,7 +121,7 @@ def preprocess(
                 f"Record all QNN API calls from saver backend at: {obj_options.saver_output_dir}"
             )
         assert len(qnn_context_binary) != 0, "Failed to generate Qnn context binary."
-        qnn_manager.Destroy()
+        qnn_manager.DestroyContext()
         # For now, debug_handle_map is not used by QNN ExecuTorch
         return PreprocessResult(
             processed_bytes=bytes(qnn_context_binary),
@@ -132,12 +135,9 @@ def preprocess_multimethod(
     ) -> PreprocessResult:
         # TODO: refactor QnnManager to consume multiple compile_spec
         # take first compile_specs here for the same partitions
-        graph_name = list(edge_programs.keys())
+        graph_names = list(edge_programs.keys())
         compile_spec = list(compile_specs.values())[0][0]
-        # gather all graph names
         option = flatbuffer_to_option(compile_spec[0].value)
-        option.graph_name = graph_name
-        compile_spec[0].value = option_to_flatbuffer(option)
         # check if each graph has equal number of partitions
         num_sub_graphs = set()
         for edge_program in edge_programs.values():
@@ -149,15 +149,15 @@ def preprocess_multimethod(
 
         all_processed_results = {key: [] for key in edge_programs.keys()}
         num_sub_graphs = next(iter(num_sub_graphs))
+        qnn_manager = get_current_qnn_manager(
+            option.backend_options.backend_type, compile_spec
+        )
         for i in range(num_sub_graphs):
             # e.g. 2 methods (x, y) with 3 partitions
             #      > context_binary_0: [x.subgraph_0, y.subgraph_0]
             #      > context_binary_1: [x.subgraph_1, y.subgraph_1]
             #      > context_binary_2: [x.subgraph_2, y.subgraph_2]
-            qnn_manager = PyQnnManager.QnnManager(
-                generate_qnn_executorch_option(compile_spec)
-            )
-            qnn_manager.Init()
+            qnn_manager.InitContext(graph_names)
             py_op_wrapper_list, ctx_binary_list = [], []
             for j, programs in enumerate(edge_programs.values()):
                 logger.info(f"Processing Method({j}): ({i+1}/{num_sub_graphs})")
@@ -177,7 +177,9 @@ def preprocess_multimethod(
                     )
 
             if len(py_op_wrapper_list) == len(edge_programs.values()):
-                qnn_context_binary = qnn_manager.Compile(graph_name, py_op_wrapper_list)
+                qnn_context_binary = qnn_manager.Compile(
+                    graph_names, py_op_wrapper_list
+                )
                 if option.saver:
                     # TODO: Currently, only the first method is saved. Update this logic if saving multiple methods becomes necessary in the future.
                     exit(
@@ -186,7 +188,7 @@ def preprocess_multimethod(
                 assert (
                     len(qnn_context_binary) != 0
                 ), "Failed to generate Qnn context binary."
-                qnn_manager.Destroy()
+                qnn_manager.DestroyContext()
                 # methods should share the same context binary for current partition
                 for key in edge_programs.keys():
                     all_processed_results[key].append(