Skip to content

Commit a822d00

Browse files
authored
feat: run on win (#24)
* move qnn_instance function implementation into cpp * wip * wip * move dl related function into separated file * use cast op for gpu * Revert "use cast op for gpu" This reverts commit 05df736. * Reapply "use cast op for gpu" This reverts commit 2520e59. * fix compiling error in win * fix align_alloc in win * fix compiling error * add get sys free/total mem for win * wip * suppress warning in win * add missing chrono header * set the correct qnn lib name for windows * add flag to control cpu backend * wip * wip * Revert "Reapply "use cast op for gpu"" This reverts commit f56519c. * fix compiling error for linux build * fix cdsprpc dynamic library name * wip * skip rpc load fail * fix page_align_alloc * suppress some warning in gcc * wip * reuse align to function * more log * add log and fix warning * wip * fix asan errors and memory leaks * fix the get_io_tensors_from_graph * improve comment * print GGML_QNN_DEFAULT_LIB_SEARCH_PATH * revert some unused changes * move library search path setter into qnn module * fix android library loading * skip qnn_device_get_platform_info for npu emulator
1 parent 12c75f1 commit a822d00

File tree

15 files changed

+781
-591
lines changed

15 files changed

+781
-591
lines changed

ggml/src/ggml-qnn/CMakeLists.txt

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,15 @@ if(CMAKE_SYSTEM_NAME STREQUAL "Android")
44
find_library(LOG_LIB log)
55
set(QNN_LINK_LIBRARIES ${LOG_LIB})
66
set(QNN_DEFAULT_LIB_SEARCH_PATH "/data/local/tmp/" CACHE STRING "customized library search path for QNN backend")
7+
elseif(CMAKE_SYSTEM_NAME STREQUAL "Windows" OR CMAKE_SYSTEM_NAME STREQUAL "Linux")
8+
set(QNN_DEFAULT_LIB_SEARCH_PATH "" CACHE STRING "customized library search path for QNN backend")
79
else()
8-
message(FATAL_ERROR "QNN now only available on Android")
10+
message(FATAL_ERROR "QNN now only available on Android, Windows and Linux")
911
endif()
1012

1113
if(NOT DEFINED GGML_QNN_SDK_PATH)
1214
# try read from environment variable
15+
# TODO: create a function to search for the SDK path
1316
if(DEFINED ENV{QNN_SDK_PATH})
1417
set(GGML_QNN_SDK_PATH $ENV{QNN_SDK_PATH})
1518
else()
@@ -29,5 +32,14 @@ ggml_add_backend_library(ggml-qnn
2932
target_include_directories(ggml-qnn PRIVATE ${GGML_QNN_SDK_PATH}/include/QNN ${CMAKE_CURRENT_LIST_DIR})
3033
target_link_libraries(ggml-qnn PRIVATE ${QNN_LINK_LIBRARIES})
3134

32-
string(REGEX REPLACE "/$" "" GGML_QNN_DEFAULT_LIB_SEARCH_PATH "${QNN_DEFAULT_LIB_SEARCH_PATH}")
33-
target_compile_definitions(ggml-qnn PRIVATE GGML_QNN_DEFAULT_LIB_SEARCH_PATH="${QNN_DEFAULT_LIB_SEARCH_PATH}/")
35+
if(NOT "${QNN_DEFAULT_LIB_SEARCH_PATH}" STREQUAL "")
36+
string(REGEX REPLACE "/$" "" QNN_DEFAULT_LIB_SEARCH_PATH "${QNN_DEFAULT_LIB_SEARCH_PATH}")
37+
endif()
38+
39+
message("GGML_QNN_DEFAULT_LIB_SEARCH_PATH: ${QNN_DEFAULT_LIB_SEARCH_PATH}")
40+
target_compile_definitions(ggml-qnn PRIVATE GGML_QNN_DEFAULT_LIB_SEARCH_PATH="${QNN_DEFAULT_LIB_SEARCH_PATH}")
41+
42+
if(GGML_QNN_ENABLE_CPU_BACKEND)
43+
message("GGML_QNN_ENABLE_CPU_BACKEND is enabled")
44+
target_compile_definitions(ggml-qnn PRIVATE GGML_QNN_ENABLE_CPU_BACKEND)
45+
endif()

ggml/src/ggml-qnn/backend-ops.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -389,7 +389,7 @@ bool ggml_qnn_supports_tensor(ggml_backend_qnn_device_context *ctx, const ggml_t
389389
case GGML_TYPE_F16:
390390
case GGML_TYPE_Q8_0:
391391
case GGML_TYPE_Q4_0:
392-
if (!(ctx->supported_types & (1 << tensor->type))) {
392+
if (!(ctx->supported_types & (uint64_t(1) << tensor->type))) {
393393
QNN_LOG_DEBUG("[%s]unsupported data type %s, supported_types: 0x%x", qnn::get_backend_name(ctx->device),
394394
ggml_type_name(tensor->type), ctx->supported_types);
395395
return false;

ggml/src/ggml-qnn/buffer.hpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,11 +133,14 @@ class qnn_mem_buffer : public qnn_buffer_interface {
133133
if (data) {
134134
memcpy(_buffer, data, size);
135135
}
136+
137+
QNN_LOG_DEBUG("alloc buffer: %p, size: %ld", _buffer, size);
136138
}
137139

138140
explicit qnn_mem_buffer(size_t size) : qnn_mem_buffer(nullptr, size) {}
139141

140142
~qnn_mem_buffer() {
143+
QNN_LOG_DEBUG("free buffer: %p, size: %ld", _buffer, _size);
141144
// the free will do nothing if the _buffer is nullptr
142145
qnn::align_free(_buffer);
143146
}

ggml/src/ggml-qnn/dl_loader.hpp

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
#pragma once
2+
3+
#ifdef __linux__
4+
#include <dlfcn.h>
5+
#include <fcntl.h>
6+
#elif defined(_WIN32)
7+
#define WIN32_LEAN_AND_MEAN
8+
#ifndef NOMINMAX
9+
#define NOMINMAX
10+
#endif
11+
#include <windows.h>
12+
#endif
13+
14+
#include <string>
15+
16+
namespace qnn {
17+
18+
#ifdef __linux__
19+
typedef void *dl_handler_t;
20+
21+
inline qnn::dl_handler_t dl_load(const std::string &lib_path) {
22+
return dlopen(lib_path.c_str(), RTLD_NOW | RTLD_LOCAL);
23+
}
24+
25+
inline void *dl_sym(qnn::dl_handler_t handle, const std::string &symbol) { return dlsym(handle, symbol.c_str()); }
26+
27+
inline bool dl_unload(qnn::dl_handler_t handle) { return dlclose(handle) == 0; }
28+
29+
inline const char *dl_error() { return dlerror(); }
30+
#elif defined(_WIN32)
31+
using dl_handler_t = HMODULE;
32+
33+
inline qnn::dl_handler_t dl_load(const std::string &lib_path) {
34+
// suppress error dialogs for missing DLLs
35+
auto old_mode = SetErrorMode(SEM_FAILCRITICALERRORS);
36+
SetErrorMode(old_mode | SEM_FAILCRITICALERRORS);
37+
38+
auto handle = LoadLibraryA(lib_path.c_str()); // TODO: use wstring version for unicode paths
39+
40+
SetErrorMode(old_mode);
41+
return handle;
42+
}
43+
44+
inline void *dl_sym(qnn::dl_handler_t handle, const std::string &symbol) {
45+
auto old_mode = SetErrorMode(SEM_FAILCRITICALERRORS);
46+
SetErrorMode(old_mode | SEM_FAILCRITICALERRORS);
47+
48+
void *p = (void *)GetProcAddress(handle, symbol.c_str());
49+
50+
SetErrorMode(old_mode);
51+
return p;
52+
}
53+
54+
inline bool dl_unload(qnn::dl_handler_t handle) {
55+
FreeLibrary(handle);
56+
return true;
57+
}
58+
59+
inline const char *dl_error() {
60+
// TODO: implement dl_error for Windows
61+
return nullptr;
62+
}
63+
64+
#endif
65+
66+
template <typename Fn>
67+
Fn dl_sym_typed(qnn::dl_handler_t handle, const std::string &function_name) {
68+
return reinterpret_cast<Fn>(dl_sym(handle, function_name));
69+
}
70+
71+
} // namespace qnn

ggml/src/ggml-qnn/ggml-qnn.cpp

Lines changed: 19 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,7 @@
11
#include "ggml-qnn.h"
22

3-
#include <cassert>
4-
#include <chrono>
5-
#include <condition_variable>
6-
#include <fstream>
73
#include <functional>
8-
#include <iostream>
9-
#include <list>
104
#include <memory>
11-
#include <mutex>
12-
#include <queue>
13-
#include <random>
14-
#include <regex>
15-
#include <set>
16-
#include <sstream>
17-
#include <thread>
18-
#include <tuple>
19-
#include <unordered_set>
20-
#include <utility>
215
#include <vector>
226

237
#include "ggml-backend-impl.h"
@@ -44,6 +28,16 @@
4428

4529
namespace {
4630

31+
#ifdef _WIN32
32+
constexpr const char *kQnnCpuLibName = "QnnCpu.dll";
33+
constexpr const char *kQnnGpuLibName = "QnnGpu.dll";
34+
constexpr const char *kQnnNpuLibName = "QnnHtp.dll";
35+
#else
36+
constexpr const char *kQnnCpuLibName = "libQnnCpu.so";
37+
constexpr const char *kQnnGpuLibName = "libQnnGpu.so";
38+
constexpr const char *kQnnNpuLibName = "libQnnHtp.so";
39+
#endif
40+
4741
struct qnn_device_caps {
4842
const char *name;
4943
const char *description;
@@ -59,23 +53,23 @@ constexpr const qnn_device_caps kDeviceCaps[] = {
5953
// https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-50/CpuOpDefSupplement.html#matmul
6054
"qnn-cpu",
6155
"Qualcomm Kryo CPU",
62-
"libQnnCpu.so",
56+
kQnnCpuLibName,
6357
GGML_BACKEND_DEVICE_TYPE_CPU,
6458
(1 << GGML_TYPE_I8) | (1 << GGML_TYPE_F32),
6559
},
6660
{
6761
// https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-50/GpuOpDefSupplement.html#matmul
6862
"qnn-gpu",
6963
"Qualcomm Adreno GPU",
70-
"libQnnGpu.so",
64+
kQnnGpuLibName,
7165
GGML_BACKEND_DEVICE_TYPE_GPU,
7266
(1 << GGML_TYPE_F32) | (1 << GGML_TYPE_F16),
7367
},
7468
{
7569
// https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-50/HtpOpDefSupplement.html#matmul
7670
"qnn-npu",
7771
"Qualcomm NPU",
78-
"libQnnHtp.so",
72+
kQnnNpuLibName,
7973
GGML_BACKEND_DEVICE_TYPE_ACCEL,
8074
(1 << GGML_TYPE_F32) | (1 << GGML_TYPE_F16) | (1 << GGML_TYPE_I16) | (1 << GGML_TYPE_I8),
8175
},
@@ -214,6 +208,8 @@ void ggml_backend_qnn_free(ggml_backend_t backend) {
214208
instance->qnn_finalize();
215209
instance.reset();
216210
}
211+
212+
delete backend;
217213
}
218214

219215
bool ggml_backend_qnn_cpy_tensor_async(ggml_backend_t backend_src, ggml_backend_t backend_dst, const ggml_tensor *src,
@@ -332,42 +328,10 @@ ggml_backend_t ggml_backend_qnn_init_with_device_context(ggml_backend_dev_t dev,
332328
const auto device = dev_ctx->device;
333329
QNN_LOG_DEBUG("device %s", qnn::get_backend_name(device));
334330
QNN_LOG_DEBUG("extend_lib_search_path %s", extend_lib_search_path);
335-
std::string path = extend_lib_search_path;
336-
337-
// TODO: Fix this for other platforms
338-
#if defined(__ANDROID__) || defined(ANDROID)
339-
if (device == QNN_BACKEND_NPU) {
340-
if (setenv("LD_LIBRARY_PATH",
341-
(path + ":/vendor/dsp/cdsp:/vendor/lib64:/vendor/dsp/"
342-
"dsp:/vendor/dsp/images")
343-
.c_str(),
344-
1) == 0) {
345-
QNN_LOG_DEBUG("QNN NPU backend setenv successfully");
346-
} else {
347-
QNN_LOG_ERROR("QNN NPU backend setenv failure");
348-
}
349-
if (setenv("ADSP_LIBRARY_PATH",
350-
(path + ";/vendor/dsp/cdsp;/vendor/lib/rfsa/adsp;/system/lib/"
351-
"rfsa/adsp;/vendor/dsp/dsp;/vendor/dsp/images;/dsp")
352-
.c_str(),
353-
1) == 0) {
354-
QNN_LOG_DEBUG("QNN NPU backend setenv successfully");
355-
} else {
356-
QNN_LOG_ERROR("QNN NPU backend setenv failure");
357-
}
358-
} else {
359-
if (setenv("LD_LIBRARY_PATH", path.c_str(), 1) == 0) {
360-
QNN_LOG_DEBUG("%s backend setenv successfully", qnn::get_backend_name(device));
361-
} else {
362-
QNN_LOG_ERROR("%s backend setenv failure", qnn::get_backend_name(device));
363-
}
364-
}
365-
#endif
366-
367-
auto instance = std::make_shared<qnn::qnn_instance>(path, dev_ctx->lib_name, "ggml");
331+
auto instance = std::make_shared<qnn::qnn_instance>(extend_lib_search_path, dev_ctx->lib_name);
368332
auto result = instance->qnn_init(nullptr);
369333
if (result != 0) {
370-
QNN_LOG_WARN("init qnn subsystem failed with qnn backend %s, pls check why", qnn::get_backend_name(device));
334+
QNN_LOG_WARN("failed to init qnn backend %s", qnn::get_backend_name(device));
371335
return nullptr;
372336
}
373337
auto qnn_interface = instance->get_qnn_interface();
@@ -466,13 +430,15 @@ struct ggml_backend_qnn_reg_impl : ggml_backend_reg {
466430
QNN_LOG_DEBUG("qnn backend registry init");
467431
for (size_t i = 0; i < QNN_BACKEND_COUNT; i++) {
468432
const auto device_enum = (QNNBackend)(QNN_BACKEND_COUNT - 1 - i); // init from the last device, i.e. NPU
433+
#ifndef GGML_QNN_ENABLE_CPU_BACKEND
469434
if (device_enum == QNN_BACKEND_CPU) {
470435
/*
471436
* here we skip the initialization of CPU device,
472437
* cause it'll block unsupported ops fallback to ggml cpu backend
473438
*/
474439
continue;
475440
}
441+
#endif
476442

477443
device_contexts.emplace_back(std::make_unique<ggml_backend_qnn_device_context>(
478444
/* .device = */ device_enum, // init from the last device, i.e. NPU

ggml/src/ggml-qnn/graph.cpp

Lines changed: 57 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11

22
#include "graph.hpp"
33

4-
#include <set>
4+
#include <algorithm>
55
#include <unordered_map>
66

77
#include "ggml-impl.h"
@@ -106,13 +106,29 @@ bool bind_src_tensors(ggml_tensor *op, qnn::qnn_tensor_array_t &tensor_wrappers,
106106
return true;
107107
}
108108

109+
/**
110+
* @brief Extracts input and output tensors from a computational graph.
111+
*
112+
* This function identifies the input and output tensors of a computational graph by analyzing the connectivity between
113+
* tensor nodes. It does this by iterating over each node in the graph, using a connectivity map that associates every
114+
* tensor with its number of incoming connections (in_degree), outgoing connections (out_degree), and an insertion index
115+
* that preserves order. The insertion index is used later to sort the tensors in their original discovery order.
116+
*
117+
* TODO: this algorithm is not perfect and may not work for all cases. It assumes that the tensors are
118+
* connected in a way that allows for unambiguous categorization.
119+
* It also assumes that the tensors are connected in a way that allows for unambiguous categorization.
120+
*/
109121
int get_io_tensors_from_graph(const ggml_cgraph *cgraph, qnn::ggml_tensor_array_t &inputs,
110122
qnn::ggml_tensor_array_t &outputs) {
111-
using ggml_tensor_set_t = std::set<ggml_tensor *>;
123+
struct _tensor_connectivity_info {
124+
size_t in_degree = 0;
125+
size_t out_degree = 0;
126+
size_t insert_index = 0;
127+
};
112128

113-
ggml_tensor_set_t input_set;
114-
ggml_tensor_set_t output_set;
115-
ggml_tensor_set_t visited_set;
129+
using ggml_tensor_connectivity_map_t = std::unordered_map<ggml_tensor *, _tensor_connectivity_info>;
130+
131+
ggml_tensor_connectivity_map_t connectivity_map;
116132
int rank = 0;
117133
for (int i = 0; i < cgraph->n_nodes; i++) {
118134
ggml_tensor *dst = cgraph->nodes[i];
@@ -126,25 +142,50 @@ int get_io_tensors_from_graph(const ggml_cgraph *cgraph, qnn::ggml_tensor_array_
126142
}
127143

128144
rank = std::max(rank, ggml_n_dims(dst));
129-
input_set.erase(dst);
130-
if (!visited_set.count(dst)) {
131-
output_set.insert(dst);
132-
visited_set.insert(dst);
145+
if (connectivity_map.count(dst) == 0) {
146+
connectivity_map[dst] = {
147+
1, // in-degree, at least 1
148+
0,
149+
connectivity_map.size(),
150+
};
151+
} else {
152+
++(connectivity_map[dst].in_degree);
133153
}
134154

135155
for (size_t i = 0; i < GGML_MAX_DIMS && dst->src[i]; ++i) {
136156
auto *src = dst->src[i];
137157
rank = std::max(rank, ggml_n_dims(src));
138-
output_set.erase(src);
139-
if (!visited_set.count(src)) {
140-
input_set.insert(src);
141-
visited_set.insert(src);
158+
159+
if (connectivity_map.count(src) == 0) {
160+
connectivity_map[src] = {
161+
0,
162+
1, // out-degree, at least 1
163+
connectivity_map.size(),
164+
};
165+
} else {
166+
++(connectivity_map[src].out_degree);
142167
}
143168
}
144169
}
145170

146-
inputs.assign(input_set.begin(), input_set.end());
147-
outputs.assign(output_set.begin(), output_set.end());
171+
for (const auto &kv : connectivity_map) {
172+
if (kv.second.in_degree == 0) {
173+
inputs.push_back(kv.first);
174+
}
175+
176+
if (kv.second.out_degree == 0) {
177+
outputs.push_back(kv.first);
178+
}
179+
}
180+
181+
std::sort(inputs.begin(), inputs.end(), [&connectivity_map](ggml_tensor *lhs, ggml_tensor *rhs) {
182+
return connectivity_map[lhs].insert_index < connectivity_map[rhs].insert_index;
183+
});
184+
185+
std::sort(outputs.begin(), outputs.end(), [&connectivity_map](ggml_tensor *lhs, ggml_tensor *rhs) {
186+
return connectivity_map[lhs].insert_index < connectivity_map[rhs].insert_index;
187+
});
188+
148189
return rank;
149190
}
150191

@@ -187,7 +228,7 @@ qnn_graph::qnn_graph(const std::string &graph_name, QNNBackend device, std::shar
187228

188229
QnnHtpGraph_CustomConfig_t vtcm_config;
189230
vtcm_config.option = QNN_HTP_GRAPH_CONFIG_OPTION_VTCM_SIZE;
190-
vtcm_config.vtcmSizeInMB = vtcm_size_in_mb;
231+
vtcm_config.vtcmSizeInMB = (uint32_t)vtcm_size_in_mb;
191232
QnnGraph_Config_t graph_vtcm_config;
192233
graph_vtcm_config.option = QNN_GRAPH_CONFIG_OPTION_CUSTOM;
193234
graph_vtcm_config.customConfig = &vtcm_config;

0 commit comments

Comments
 (0)