Skip to content

Commit aced002

Browse files
chuteng-quicqti-chuteng
authored andcommitted
[QNN-EP] Add LoraV2 Support with offline QNN context binary
Description - Add the new run option called lora_config to feed the information from lora binary - Parse and apply the lora binary in OnRunStart Motivation and Context - Support Lora Adapter Binary with QNN Context Binary Usage
1 parent 85f7a21 commit aced002

File tree

2 files changed

+42
-46
lines changed

2 files changed

+42
-46
lines changed

onnxruntime/core/providers/qnn/builder/qnn_backend_manager.cc

Lines changed: 41 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -52,32 +52,17 @@ static const char* DlError() {
5252
#endif
5353
}
5454

55-
Status readBinaryFromFile(std::string filePath, uint8_t* buffer, size_t bufferSize) {
55+
Status ReadBinaryFromFile(const std::string& file_path, uint8_t* buffer, size_t buffer_size) {
5656
ORT_RETURN_IF(nullptr == buffer, "Binary buffer is nullptr");
57-
std::ifstream in(filePath, std::ifstream::binary);
58-
ORT_RETURN_IF(!in, "Failed to open input file: ", filePath.c_str());
59-
ORT_RETURN_IF(!in.read(reinterpret_cast<char*>(buffer), bufferSize), "Failed to read the contents of: ", filePath.c_str());
57+
std::ifstream in(file_path, std::ifstream::binary);
58+
ORT_RETURN_IF(!in, "Failed to open input file: ", file_path.c_str());
59+
ORT_RETURN_IF(!in.read(reinterpret_cast<char*>(buffer), buffer_size), "Failed to read the contents of: ", file_path.c_str());
6060
return Status::OK();
6161
}
6262

63-
6463
Status QnnBackendManager::ParseLoraConfig(std::string lora_config_path) {
65-
6664
LOGS_DEFAULT(INFO) << "Acquiring the QnnInterface " << lora_config_path;
6765

68-
QnnInterface_t* backend_interface_provider{nullptr};
69-
auto rt = GetQnnInterfaceProvider<QnnInterfaceGetProvidersFn_t,
70-
QnnInterface_t>(backend_path_.c_str(),
71-
"QnnInterface_getProviders",
72-
&backend_lib_handle_,
73-
{QNN_API_VERSION_MAJOR,
74-
QNN_API_VERSION_MINOR,
75-
QNN_API_VERSION_PATCH},
76-
&backend_interface_provider);
77-
78-
ORT_RETURN_IF_ERROR(rt);
79-
qnn_interface_ = backend_interface_provider->QNN_INTERFACE_VER_NAME;
80-
8166
// QNN Lora Config file format should be a single line, with the graph name first,
8267
// followed by the qnn lora context binary path, separated by a semicolon (;)
8368
// Example: <graph_name>;<binary_path>
@@ -86,35 +71,46 @@ Status QnnBackendManager::ParseLoraConfig(std::string lora_config_path) {
8671
std::string line;
8772

8873
if (file.is_open()) {
89-
if (std::getline(file, line)) {
90-
std::istringstream ss(line);
91-
std::string graph_name;
92-
std::string lora_adapter_bin_path;
93-
94-
if (std::getline(ss, graph_name, ';') && std::getline(ss, lora_adapter_bin_path)) {
95-
size_t bufferSize = std::filesystem::file_size(lora_adapter_bin_path.c_str());
96-
97-
ORT_RETURN_IF(0 == bufferSize, "Received path to an empty file. Nothing to deserialize.");
98-
std::unique_ptr<uint8_t[]> buffer = std::make_unique<uint8_t[]>(bufferSize);
99-
void *voidBufferPtr = static_cast<void *>(buffer.get());
100-
QnnContext_Buffer_t contextBuffer{QNN_CONTEXT_BUFFER_VERSION_1,
101-
{QNN_CONTEXTMEMTYPE_RAW, {voidBufferPtr, bufferSize}}};
102-
103-
auto status = readBinaryFromFile(lora_adapter_bin_path,
104-
reinterpret_cast<uint8_t *>(buffer.get()),
105-
bufferSize);
106-
107-
ORT_RETURN_IF(status != Status::OK(), "Failed to read binary data.");
108-
Qnn_GraphHandle_t graph;
109-
qnn_interface_.graphRetrieve(contexts_[0], graph_name.c_str(), &graph);
110-
111-
qnn_interface_.contextApplyBinarySection(
112-
contexts_[0], graph, QNN_CONTEXT_SECTION_UPDATABLE, &contextBuffer, profile_backend_handle_, nullptr);
74+
if (std::getline(file, line)) {
75+
std::istringstream ss(line);
76+
std::string graph_name;
77+
std::string lora_adapter_bin_path;
78+
79+
if (std::getline(ss, graph_name, ';') && std::getline(ss, lora_adapter_bin_path)) {
80+
size_t buffer_size = std::filesystem::file_size(lora_adapter_bin_path.c_str());
81+
82+
ORT_RETURN_IF(0 == buffer_size, "Received path to an empty file. Nothing to deserialize.");
83+
std::unique_ptr<uint8_t[]> buffer = std::make_unique<uint8_t[]>(buffer_size);
84+
void* voidBufferPtr = static_cast<void*>(buffer.get());
85+
QnnContext_Buffer_t contextBuffer{QNN_CONTEXT_BUFFER_VERSION_1,
86+
{QNN_CONTEXTMEMTYPE_RAW, {{voidBufferPtr, buffer_size}}}};
87+
88+
auto status = ReadBinaryFromFile(lora_adapter_bin_path,
89+
reinterpret_cast<uint8_t*>(buffer.get()),
90+
buffer_size);
91+
92+
ORT_RETURN_IF(status != Status::OK(), "Failed to read binary data.");
93+
Qnn_GraphHandle_t graph;
94+
bool graph_retrieve_success = false;
95+
for (size_t cIdx = 0; cIdx < contexts_.size(); cIdx++) {
96+
auto graph_retrieve_rt = qnn_interface_.graphRetrieve(contexts_[cIdx], graph_name.c_str(), &graph);
97+
if (QNN_SUCCESS != graph_retrieve_rt) {
98+
continue;
11399
}
100+
101+
graph_retrieve_success = true;
102+
103+
auto context_apply_binary_section_rt = qnn_interface_.contextApplyBinarySection(
104+
contexts_[cIdx], graph, QNN_CONTEXT_SECTION_UPDATABLE, &contextBuffer, profile_backend_handle_, nullptr);
105+
ORT_RETURN_IF(QNN_SUCCESS != context_apply_binary_section_rt, "Failed to apply binary section.");
106+
break;
107+
}
108+
ORT_RETURN_IF_NOT(graph_retrieve_success, "Failed to retrieve graph: ", graph_name, " and apply binary section.");
114109
}
115-
file.close();
110+
}
111+
file.close();
116112
} else {
117-
LOGS_DEFAULT(ERROR) << "Unable to load Lora Config " << lora_config_path;
113+
LOGS_DEFAULT(ERROR) << "Unable to load Lora Config " << lora_config_path;
118114
}
119115

120116
return Status::OK();

onnxruntime/core/providers/qnn/qnn_execution_provider.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1203,7 +1203,7 @@ Status QNNExecutionProvider::OnRunStart(const onnxruntime::RunOptions& run_optio
12031203
}
12041204

12051205
std::string lora_config = "";
1206-
if (TryGetConfigEntry(config_options, kOrtRunOptionsConfigQnnLoraConfig, lora_config)){
1206+
if (TryGetConfigEntry(config_options, kOrtRunOptionsConfigQnnLoraConfig, lora_config)) {
12071207
LOGS_DEFAULT(VERBOSE) << "lora_config: " << lora_config;
12081208
ORT_RETURN_IF_ERROR(qnn_backend_manager_->ParseLoraConfig(lora_config));
12091209
}

0 commit comments

Comments
 (0)