Skip to content

Commit 85f7a21

Browse files
committed
[QNN-EP] Add LoraV2 Support with offline QNN context binary
Description - Add the new run option called lora_config to feed the information from lora binary - Parse and apply the lora binary in OnRunStart Motivation and Context - Support Lora Adapter Binary with QNN Context Binary Usage
1 parent fe7634e commit 85f7a21

File tree

4 files changed

+79
-0
lines changed

4 files changed

+79
-0
lines changed

include/onnxruntime/core/session/onnxruntime_run_options_config_keys.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,9 @@ static const char* const kOrtRunOptionsConfigQnnPerfModePostRun = "qnn.htp_perf_
4343
// Set RPC control latency for QNN HTP backend
4444
static const char* const kOrtRunOptionsConfigQnnRpcControlLatency = "qnn.rpc_control_latency";
4545

46+
// Set QNN Lora Config File for apply Lora in QNN context binary
47+
static const char* const kOrtRunOptionsConfigQnnLoraConfig = "qnn.lora_config";
48+
4649
// Set graph annotation id for CUDA EP. Use with enable_cuda_graph=true.
4750
// The value should be an integer. If the value is not set, the default value is 0 and
4851
// ORT session only captures one cuda graph before another capture is requested.

onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,74 @@ static const char* DlError() {
5252
#endif
5353
}
5454

55+
Status readBinaryFromFile(std::string filePath, uint8_t* buffer, size_t bufferSize) {
56+
ORT_RETURN_IF(nullptr == buffer, "Binary buffer is nullptr");
57+
std::ifstream in(filePath, std::ifstream::binary);
58+
ORT_RETURN_IF(!in, "Failed to open input file: ", filePath.c_str());
59+
ORT_RETURN_IF(!in.read(reinterpret_cast<char*>(buffer), bufferSize), "Failed to read the contents of: ", filePath.c_str());
60+
return Status::OK();
61+
}
62+
63+
64+
Status QnnBackendManager::ParseLoraConfig(std::string lora_config_path) {
65+
66+
LOGS_DEFAULT(INFO) << "Acquiring the QnnInterface " << lora_config_path;
67+
68+
QnnInterface_t* backend_interface_provider{nullptr};
69+
auto rt = GetQnnInterfaceProvider<QnnInterfaceGetProvidersFn_t,
70+
QnnInterface_t>(backend_path_.c_str(),
71+
"QnnInterface_getProviders",
72+
&backend_lib_handle_,
73+
{QNN_API_VERSION_MAJOR,
74+
QNN_API_VERSION_MINOR,
75+
QNN_API_VERSION_PATCH},
76+
&backend_interface_provider);
77+
78+
ORT_RETURN_IF_ERROR(rt);
79+
qnn_interface_ = backend_interface_provider->QNN_INTERFACE_VER_NAME;
80+
81+
// QNN Lora Config file format should be a single line, with the graph name first,
82+
// followed by the qnn lora context binary path, separated by a semicolon (;)
83+
// Example: <graph_name>;<binary_path>
84+
LOGS_DEFAULT(INFO) << "Loading Lora Config " << lora_config_path;
85+
std::ifstream file(lora_config_path);
86+
std::string line;
87+
88+
if (file.is_open()) {
89+
if (std::getline(file, line)) {
90+
std::istringstream ss(line);
91+
std::string graph_name;
92+
std::string lora_adapter_bin_path;
93+
94+
if (std::getline(ss, graph_name, ';') && std::getline(ss, lora_adapter_bin_path)) {
95+
size_t bufferSize = std::filesystem::file_size(lora_adapter_bin_path.c_str());
96+
97+
ORT_RETURN_IF(0 == bufferSize, "Received path to an empty file. Nothing to deserialize.");
98+
std::unique_ptr<uint8_t[]> buffer = std::make_unique<uint8_t[]>(bufferSize);
99+
void *voidBufferPtr = static_cast<void *>(buffer.get());
100+
QnnContext_Buffer_t contextBuffer{QNN_CONTEXT_BUFFER_VERSION_1,
101+
{QNN_CONTEXTMEMTYPE_RAW, {voidBufferPtr, bufferSize}}};
102+
103+
auto status = readBinaryFromFile(lora_adapter_bin_path,
104+
reinterpret_cast<uint8_t *>(buffer.get()),
105+
bufferSize);
106+
107+
ORT_RETURN_IF(status != Status::OK(), "Failed to read binary data.");
108+
Qnn_GraphHandle_t graph;
109+
qnn_interface_.graphRetrieve(contexts_[0], graph_name.c_str(), &graph);
110+
111+
qnn_interface_.contextApplyBinarySection(
112+
contexts_[0], graph, QNN_CONTEXT_SECTION_UPDATABLE, &contextBuffer, profile_backend_handle_, nullptr);
113+
}
114+
}
115+
file.close();
116+
} else {
117+
LOGS_DEFAULT(ERROR) << "Unable to load Lora Config " << lora_config_path;
118+
}
119+
120+
return Status::OK();
121+
}
122+
55123
template <typename F, class T>
56124
Status QnnBackendManager::GetQnnInterfaceProvider(const char* lib_path,
57125
const char* interface_provider_name,

onnxruntime/core/providers/qnn/builder/qnn_backend_manager.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,8 @@ class QnnBackendManager : public std::enable_shared_from_this<QnnBackendManager>
140140
const Qnn_Tensor_t& qnn_tensor,
141141
Qnn_MemHandle_t& mem_handle);
142142

143+
Status ParseLoraConfig(std::string lora_config);
144+
143145
private:
144146
Status LoadBackend();
145147

onnxruntime/core/providers/qnn/qnn_execution_provider.cc

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1202,6 +1202,12 @@ Status QNNExecutionProvider::OnRunStart(const onnxruntime::RunOptions& run_optio
12021202
}
12031203
}
12041204

1205+
std::string lora_config = "";
1206+
if (TryGetConfigEntry(config_options, kOrtRunOptionsConfigQnnLoraConfig, lora_config)){
1207+
LOGS_DEFAULT(VERBOSE) << "lora_config: " << lora_config;
1208+
ORT_RETURN_IF_ERROR(qnn_backend_manager_->ParseLoraConfig(lora_config));
1209+
}
1210+
12051211
return Status::OK();
12061212
}
12071213

0 commit comments

Comments
 (0)