Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 22 additions & 4 deletions backends/qualcomm/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,7 @@ add_library(qnn_executorch_backend SHARED)
add_library(qnn_executorch_header INTERFACE)
add_library(qnn_executorch_logging STATIC)
add_library(qnn_factory STATIC)
add_library(qnn_backend_unified_registry STATIC)
add_library(qnn_function_interface INTERFACE)
add_library(qnn_graph STATIC)
add_library(qnn_implementation STATIC)
Expand Down Expand Up @@ -213,13 +214,30 @@ target_link_libraries(
)

target_link_libraries(
qnn_dlc_manager PRIVATE qnn_factory qnn_backend qnn_device qnn_context
qnn_graph qnn_mem_manager
qnn_backend_unified_registry PRIVATE qnn_schema qnn_backend qnn_device
qnn_implementation
)

target_link_libraries(
qnn_manager PRIVATE qnn_factory wrappers qnn_schema utils shared_buffer
qnn_dlc_manager
qnn_dlc_manager
PRIVATE qnn_factory
qnn_backend_unified_registry
qnn_backend
qnn_device
qnn_context
qnn_graph
qnn_mem_manager
)

target_link_libraries(
qnn_manager
PRIVATE qnn_factory
qnn_backend_unified_registry
wrappers
qnn_schema
utils
shared_buffer
qnn_dlc_manager
)
target_link_libraries(
qnn_executorch_backend
Expand Down
32 changes: 29 additions & 3 deletions backends/qualcomm/aot/python/PyQnnManagerAdaptor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,15 +28,39 @@ std::string GetQnnSdkBuildId(std::string library_path) {
if (err != QNN_SUCCESS || id == nullptr) {
throw std::runtime_error("Failed to get QNN backend build ID");
}
qnn_loaded_backend.TerminateAllBackends();
qnn_loaded_backend.Unload();
return std::string(id);
}

py::array_t<char> StripProtocol(const py::bytes& preprocessed_binary) {
py::buffer_info info(py::buffer(preprocessed_binary).request());

void* buf_ptr = nullptr;
size_t buf_size = 0;
// check if it's a qnn context binary
auto [status, signature, ctx_size, ctx_bin] =
QnnContextCustomProtocol().DeserializeContextCustomBuffer(info.ptr);

if (status == Error::Ok) {
buf_size = ctx_size;
buf_ptr = ctx_bin;
} else {
// the format should be DLC, return nothing here
return py::array_t<char>(0);
}

auto result = py::array_t<char>(buf_size);
auto result_buffer = result.request();
std::memcpy(result_buffer.ptr, buf_ptr, buf_size);
return result;
}

PYBIND11_MODULE(PyQnnManagerAdaptor, m) {
// TODO: Add related documents for configurations listed below
using namespace qnn_delegate;

m.def("GetQnnSdkBuildId", &GetQnnSdkBuildId);
m.def("StripProtocol", &StripProtocol);
py::class_<QnnExecuTorchContextBinary>(m, "QnnExecuTorchContextBinary")
.def(py::init<>());

Expand All @@ -49,6 +73,8 @@ PYBIND11_MODULE(PyQnnManagerAdaptor, m) {
.def(py::init<const py::bytes&>())
.def(py::init<const py::bytes&, const py::bytes&>())
.def("Init", &PyQnnManager::Init)
.def("InitBackend", &PyQnnManager::InitBackend)
.def("InitContext", &PyQnnManager::InitContext)
.def("IsNodeSupportedByBackend", &PyQnnManager::IsNodeSupportedByBackend)
.def(
"Compile",
Expand All @@ -57,6 +83,7 @@ PYBIND11_MODULE(PyQnnManagerAdaptor, m) {
std::vector<std::vector<std::shared_ptr<OpWrapper>>>&>(
&PyQnnManager::Compile))
.def("Destroy", &PyQnnManager::Destroy)
.def("DestroyContext", &PyQnnManager::DestroyContext)
.def("IsAvailable", &PyQnnManager::IsAvailable)
.def("IsTensorDump", &PyQnnManager::IsTensorDump)
.def("AllocateTensor", &PyQnnManager::AllocateTensor)
Expand All @@ -66,8 +93,7 @@ PYBIND11_MODULE(PyQnnManagerAdaptor, m) {
.def("GetSpillFillBufferSize", &PyQnnManager::GetSpillFillBufferSize)
.def(
"MakeBinaryInfo",
py::overload_cast<const py::bytes&>(&PyQnnManager::MakeBinaryInfo))
.def("StripProtocol", &PyQnnManager::StripProtocol);
py::overload_cast<const py::bytes&>(&PyQnnManager::MakeBinaryInfo));
}
} // namespace qnn
} // namespace backends
Expand Down
47 changes: 22 additions & 25 deletions backends/qualcomm/aot/python/PyQnnManagerAdaptor.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,24 @@ class PyQnnManager {
}

executorch::runtime::Error Init() {
return qnn_manager_->Init();
ET_CHECK_OR_RETURN_ERROR(
qnn_manager_->InitBackend() == Error::Ok,
Internal,
"Fail to initailize backend");
ET_CHECK_OR_RETURN_ERROR(
qnn_manager_->InitContext() == Error::Ok,
Internal,
"Fail to initailize context");
return Error::Ok;
}

executorch::runtime::Error InitBackend() {
return qnn_manager_->InitBackend();
}

executorch::runtime::Error InitContext(
const std::vector<std::string>& graph_names) {
return qnn_manager_->InitContext(std::optional{graph_names});
}

bool IsNodeSupportedByBackend(
Expand Down Expand Up @@ -90,6 +107,10 @@ class PyQnnManager {
return qnn_manager_->Destroy();
}

void DestroyContext() {
return qnn_manager_->DestroyContext();
}

bool IsAvailable() {
return qnn_manager_->IsAvailable();
}
Expand Down Expand Up @@ -148,37 +169,13 @@ class PyQnnManager {
return result;
}

py::array_t<char> StripProtocol(const py::bytes& preprocessed_binary) {
py::buffer_info info(py::buffer(preprocessed_binary).request());

void* buf_ptr = nullptr;
size_t buf_size = 0;
// check if it's a qnn context binary
auto [status, signature, ctx_size, ctx_bin] =
QnnContextCustomProtocol().DeserializeContextCustomBuffer(info.ptr);

if (status == Error::Ok) {
buf_size = ctx_size;
buf_ptr = ctx_bin;
} else {
// the format should be DLC, return nothing here
return py::array_t<char>(0);
}

auto result = py::array_t<char>(buf_size);
auto result_buffer = result.request();
std::memcpy(result_buffer.ptr, buf_ptr, buf_size);
return result;
}

private:
// Store the bytes object instead of a raw pointer so that this module will
// keep the bytes alive.
const py::bytes qnn_executorch_option_ptr_;
QnnExecuTorchContextBinary qnn_executorch_context_binary_;
std::shared_ptr<QnnManager> qnn_manager_;
QnnContextCustomProtocol custom_context_custom_buffer_;
flatbuffers::FlatBufferBuilder builder_;
};
} // namespace qnn
} // namespace backends
Expand Down
8 changes: 4 additions & 4 deletions backends/qualcomm/builders/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ Thank you for contributing to Qualcomm AI Engine Direct delegate for ExecuTorch.

## References
### Qualcomm AI Engine Direct
- [Operator Definitions](https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-50/MasterOpDef.html)
- [Supported Operators in Backends](https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-50/operations.html#backend-supplements)
- [Operator Definitions](https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-10/MasterOpDef.html)
- [Supported Operators in Backends](https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-10/operations.html#backend-supplements)

### PyTorch
- [torch.nn Operator Definitions](https://pytorch.org/docs/stable/nn.html)
Expand Down Expand Up @@ -124,9 +124,9 @@ It will provide more hint to the source PyTorch layer where the missing operator
};
} Qnn_Param_t;
```
The name value equals to the parameter name described in [Operator Definitions](https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-50/MasterOpDef.html), there are `epsilon`, `axes` for `LayerNorm` case.<br/>
The name value equals to the parameter name described in [Operator Definitions](https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-10/MasterOpDef.html), there are `epsilon`, `axes` for `LayerNorm` case.<br/>

If you find it hard to correlate missing operator with documentation, this [table](https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-50/SupportedOps.html) might be helpful for searching. In some cases, an exact match may not exist. Consider seeking for a math equivalent approach or notify maintainer for further analysis.
If you find it hard to correlate missing operator with documentation, this [table](https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-10/SupportedOps.html) might be helpful for searching. In some cases, an exact match may not exist. Consider seeking for a math equivalent approach or notify maintainer for further analysis.

- **PyTorch**:<br/>
We could also read the IO spec from [function declaration](https://github.com/pytorch/pytorch/blob/main/aten/src/ATen/native/layer_norm.cpp) mentioned in [PyTorch Documentation](#pytorch):
Expand Down
4 changes: 2 additions & 2 deletions backends/qualcomm/debugger/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -348,8 +348,8 @@ def generate_optrace(
qnn_binary_file="forward_0.dlc",
):
"""
Generate Qnn HTP Optrace Profiling https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-50/htp_backend.html#qnn-htp-optrace-profiling
and QNN HTP Analysis Summary (QHAS) https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-50/htp_backend.html#qnn-htp-analysis-summary-qhas
Generate Qnn HTP Optrace Profiling https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-10/htp_backend.html#qnn-htp-optrace-profiling
and QNN HTP Analysis Summary (QHAS) https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-10/htp_backend.html#qnn-htp-analysis-summary-qhas
. You can utilize the QAIRT Visualizer (https://pypi.org/project/qairt-visualizer/) to visualize the results from the files above.
"""
graph_name, file_extension = os.path.splitext(qnn_binary_file)
Expand Down
16 changes: 7 additions & 9 deletions backends/qualcomm/partition/qnn_partitioner.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
from collections import defaultdict
from typing import Any, Callable, Dict, List, Optional, Tuple

import executorch.backends.qualcomm.python.PyQnnManagerAdaptor as PyQnnManager
import torch
from executorch.backends.qualcomm.builders import node_visitor_manager
from executorch.backends.qualcomm.builders.qnn_constants import OpContextLoader
Expand All @@ -21,6 +20,9 @@
QCOM_BYPASS_NODE,
)

from executorch.backends.qualcomm.utils.qnn_manager_lifecycle import (
get_current_qnn_manager,
)
from executorch.exir.backend.backend_details import CompileSpec
from executorch.exir.backend.canonical_partitioners.pattern_op_partitioner import (
generate_partitions_from_list_of_nodes,
Expand Down Expand Up @@ -55,7 +57,8 @@ def __init__(
skip_node_id_set: set = None,
skip_node_op_set: set = None,
):
python_options = flatbuffer_to_option(compiler_specs[0].value)
option = generate_qnn_executorch_option(compiler_specs)
python_options = flatbuffer_to_option(option)
self.node_visitors = node_visitor_manager.get_node_visitors(
edge_program,
op_package_infos=python_options.op_package_options.op_package_infos,
Expand All @@ -64,12 +67,10 @@ def __init__(
self.skip_node_op_set = skip_node_op_set
self.skip_node_id_set = skip_node_id_set
self.nodes_to_wrappers = defaultdict(dict)
self.qnn_manager = PyQnnManager.QnnManager(
generate_qnn_executorch_option(compiler_specs)
self.qnn_manager = get_current_qnn_manager(
python_options.backend_options.backend_type, compiler_specs
)

self.qnn_manager.Init()

def is_node_supported(self, _, node: torch.fx.Node) -> bool:
if node.op != "call_function" or node.target in not_supported_operator:
return False
Expand Down Expand Up @@ -118,9 +119,6 @@ def is_node_supported(self, _, node: torch.fx.Node) -> bool:
print(f"[QNN Partitioner Op Support]: {node.target.__name__} | {supported}")
return supported

def __del__(self):
self.qnn_manager.Destroy()


class QnnPartitioner(Partitioner):
"""
Expand Down
34 changes: 18 additions & 16 deletions backends/qualcomm/qnn_preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,6 @@
from collections import defaultdict
from typing import Dict, final, List

import executorch.backends.qualcomm.python.PyQnnManagerAdaptor as PyQnnManager

import torch # noqa: F401
from executorch.backends.qualcomm._passes.qnn_pass_manager import QnnPassManager
from executorch.backends.qualcomm.builders.node_visitor_manager import get_node_visitors
Expand All @@ -20,7 +18,9 @@
)
from executorch.backends.qualcomm.serialization.qc_schema_serialize import (
flatbuffer_to_option,
option_to_flatbuffer,
)
from executorch.backends.qualcomm.utils.qnn_manager_lifecycle import (
get_current_qnn_manager,
)
from executorch.exir.backend.backend_details import (
BackendDetails,
Expand All @@ -30,6 +30,7 @@
from torch.export.exported_program import ExportedProgram

DEFAULT_DEBUG_HANDLE = 65535
DEFAULT_GRAPH_NAME = "forward"

logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)
Expand Down Expand Up @@ -99,9 +100,11 @@ def preprocess(
compile_specs: List[CompileSpec],
) -> PreprocessResult:
option = generate_qnn_executorch_option(compile_specs)
qnn_manager = PyQnnManager.QnnManager(option)
qnn_manager.Init()
obj_options = flatbuffer_to_option(option)
qnn_manager = get_current_qnn_manager(
obj_options.backend_options.backend_type, compile_specs
)
qnn_manager.InitContext([DEFAULT_GRAPH_NAME])
py_op_wrapper_list = QnnBackend._build_op_wrappers(
edge_program,
qnn_manager.IsTensorDump(),
Expand All @@ -118,7 +121,7 @@ def preprocess(
f"Record all QNN API calls from saver backend at: {obj_options.saver_output_dir}"
)
assert len(qnn_context_binary) != 0, "Failed to generate Qnn context binary."
qnn_manager.Destroy()
qnn_manager.DestroyContext()
# For now, debug_handle_map is not used by QNN ExecuTorch
return PreprocessResult(
processed_bytes=bytes(qnn_context_binary),
Expand All @@ -132,12 +135,9 @@ def preprocess_multimethod(
) -> PreprocessResult:
# TODO: refactor QnnManager to consume multiple compile_spec
# take first compile_specs here for the same partitions
graph_name = list(edge_programs.keys())
graph_names = list(edge_programs.keys())
compile_spec = list(compile_specs.values())[0][0]
# gather all graph names
option = flatbuffer_to_option(compile_spec[0].value)
option.graph_name = graph_name
compile_spec[0].value = option_to_flatbuffer(option)
# check if each graph has equal number of partitions
num_sub_graphs = set()
for edge_program in edge_programs.values():
Expand All @@ -149,15 +149,15 @@ def preprocess_multimethod(

all_processed_results = {key: [] for key in edge_programs.keys()}
num_sub_graphs = next(iter(num_sub_graphs))
qnn_manager = get_current_qnn_manager(
option.backend_options.backend_type, compile_spec
)
for i in range(num_sub_graphs):
# e.g. 2 methods (x, y) with 3 partitions
# > context_binary_0: [x.subgraph_0, y.subgraph_0]
# > context_binary_1: [x.subgraph_1, y.subgraph_1]
# > context_binary_2: [x.subgraph_2, y.subgraph_2]
qnn_manager = PyQnnManager.QnnManager(
generate_qnn_executorch_option(compile_spec)
)
qnn_manager.Init()
qnn_manager.InitContext(graph_names)
py_op_wrapper_list, ctx_binary_list = [], []
for j, programs in enumerate(edge_programs.values()):
logger.info(f"Processing Method({j}): ({i+1}/{num_sub_graphs})")
Expand All @@ -177,7 +177,9 @@ def preprocess_multimethod(
)

if len(py_op_wrapper_list) == len(edge_programs.values()):
qnn_context_binary = qnn_manager.Compile(graph_name, py_op_wrapper_list)
qnn_context_binary = qnn_manager.Compile(
graph_names, py_op_wrapper_list
)
if option.saver:
# TODO: Currently, only the first method is saved. Update this logic if saving multiple methods becomes necessary in the future.
exit(
Expand All @@ -186,7 +188,7 @@ def preprocess_multimethod(
assert (
len(qnn_context_binary) != 0
), "Failed to generate Qnn context binary."
qnn_manager.Destroy()
qnn_manager.DestroyContext()
# methods should share the same context binary for current partition
for key in edge_programs.keys():
all_processed_results[key].append(
Expand Down
Loading
Loading