Skip to content

Commit 28565d1

Browse files
committed
Enhance ggml_backend_reg_layla to support Hexagon backend and update CMake configuration for Hexagon SDK integration
1 parent fe88096 commit 28565d1

File tree

5 files changed

+78
-41
lines changed

5 files changed

+78
-41
lines changed

ggml/include/ggml-backend.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -202,7 +202,7 @@ extern "C" {
202202
//
203203
// Backend registry
204204
//
205-
GGML_API void ggml_backend_reg_layla(bool useVulkan, bool useOpenCL);
205+
GGML_API void ggml_backend_reg_layla(bool useVulkan, bool useOpenCL, bool useHexagon);
206206

207207
GGML_API void ggml_backend_device_register(ggml_backend_dev_t device);
208208

ggml/src/ggml-backend-reg.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,7 @@ struct ggml_backend_reg_entry {
161161

162162
static bool laylaUseVulkan = false;
163163
static bool laylaUseOpenCL = false;
164+
static bool laylaUseHexagon = false;
164165

165166
struct ggml_backend_registry {
166167
std::vector<ggml_backend_reg_entry> backends;
@@ -199,7 +200,9 @@ struct ggml_backend_registry {
199200
register_backend(ggml_backend_kompute_reg());
200201
#endif
201202
#ifdef GGML_USE_HEXAGON
202-
register_backend(ggml_backend_hexagon_reg());
203+
if(laylaUseHexagon) {
204+
register_backend(ggml_backend_hexagon_reg());
205+
}
203206
#endif
204207
#ifdef GGML_USE_CPU
205208
register_backend(ggml_backend_cpu_reg());
@@ -310,9 +313,10 @@ struct ggml_backend_registry {
310313
}
311314
};
312315

313-
void ggml_backend_reg_layla(bool useVulkan, bool useOpenCL) {
316+
void ggml_backend_reg_layla(bool useVulkan, bool useOpenCL, bool useHexagon) {
314317
laylaUseVulkan = useVulkan;
315318
laylaUseOpenCL = useOpenCL;
319+
laylaUseHexagon = useHexagon;
316320
}
317321

318322
static ggml_backend_registry & get_reg() {

ggml/src/ggml-hexagon/CMakeLists.txt

Lines changed: 31 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -33,42 +33,45 @@ endif()
3333
#v73 --- Snapdragon 8 Gen2
3434
#v75 --- Snapdragon 8 Gen3
3535
#v79 --- Snapdragon 8 Elite(aka Gen4)
36-
if(NOT DEFINED HTP_ARCH_VERSION)
37-
message(FATAL_ERROR "HTP_ARCH_VERSION not defined, valid htp arch: v68,v69,v73,v75,v79")
38-
endif()
36+
# we do not use HTP_ARCH_VERSION right now because we don't use raw cdsp calls
37+
#if(NOT DEFINED HTP_ARCH_VERSION)
38+
# message(FATAL_ERROR "HTP_ARCH_VERSION not defined, valid htp arch: v68,v69,v73,v75,v79")
39+
#endif()
3940

4041
#check whether user's specified htp arch is valid
41-
set(CHECK_HTP_ARCH "WRONG")
42-
foreach (feat v68 v69 v73 v75 v79)
43-
if (${feat} STREQUAL ${HTP_ARCH_VERSION})
44-
set(CHECK_HTP_ARCH "GOOD")
45-
endif()
46-
endforeach()
47-
if (${CHECK_HTP_ARCH} STREQUAL "WRONG")
48-
message(FATAL_ERROR "ggml-hexagon backend only support htp arch v68,v69,v73,v75,v79")
49-
endif()
42+
#set(CHECK_HTP_ARCH "WRONG")
43+
#foreach (feat v68 v69 v73 v75 v79)
44+
# if (${feat} STREQUAL ${HTP_ARCH_VERSION})
45+
# set(CHECK_HTP_ARCH "GOOD")
46+
# endif()
47+
#endforeach()
48+
#if (${CHECK_HTP_ARCH} STREQUAL "WRONG")
49+
# message(FATAL_ERROR "ggml-hexagon backend only support htp arch v68,v69,v73,v75,v79")
50+
#endif()
5051

5152
#check optimization flags
5253
set(OPT_FLAG " ")
53-
if (${HTP_ARCH_VERSION} STREQUAL "v75" OR ${HTP_ARCH_VERSION} STREQUAL "v79")
54+
#if (${HTP_ARCH_VERSION} STREQUAL "v75" OR ${HTP_ARCH_VERSION} STREQUAL "v79")
5455
#works fine on Snapdragon 8Gen3&8Elite with 1.5x - 3x performance gains with the default ggml backend
55-
set(OPT_FLAG " -O3 -march=armv8.7-a -mcpu=cortex-x1 -mtune=cortex-x1 -flto -D_GNU_SOURCE -fvectorize -ffp-model=fast -fno-finite-math-only")
56-
endif()
57-
message("OPT_FLAG:${OPT_FLAG}")
56+
# set(OPT_FLAG " -O3 -march=armv8.7-a -mcpu=cortex-x1 -mtune=cortex-x1 -flto -D_GNU_SOURCE -fvectorize -ffp-model=fast -fno-finite-math-only")
57+
#endif()
58+
#message("OPT_FLAG:${OPT_FLAG}")
5859

5960
if(CMAKE_SYSTEM_NAME STREQUAL "Android")
6061
find_library(LOG_LIB log)
6162

62-
add_library(cdsprpc
63-
SHARED
64-
IMPORTED)
65-
set_target_properties(cdsprpc
66-
PROPERTIES
67-
IMPORTED_LOCATION
68-
${HEXAGON_SDK_PATH}/ipc/fastrpc/remote/ship/android_aarch64/libcdsprpc.so)
63+
# we do not use libcdsprpc.so provided in the Hexagon SDK, we will look for the one installed by the user's phone vendor
64+
#add_library(cdsprpc
65+
# SHARED
66+
# IMPORTED)
67+
#set_target_properties(cdsprpc
68+
# PROPERTIES
69+
# IMPORTED_LOCATION
70+
# ${HEXAGON_SDK_PATH}/ipc/fastrpc/remote/ship/android_aarch64/libcdsprpc.so)
6971

70-
set(QNN_LINK_LIBRARIES ${LOG_LIB} cdsprpc)
71-
set(QNN_DEFAULT_LIB_SEARCH_PATH "/data/local/tmp/" CACHE STRING "customized library search path for QNN backend")
72+
#set(QNN_LINK_LIBRARIES ${LOG_LIB} cdsprpc)
73+
set(QNN_LINK_LIBRARIES ${LOG_LIB})
74+
#set(QNN_DEFAULT_LIB_SEARCH_PATH "/data/local/tmp/" CACHE STRING "customized library search path for QNN backend")
7275

7376
include_directories(${HEXAGON_SDK_PATH}/incs)
7477
include_directories(${HEXAGON_SDK_PATH}/incs/stddef)
@@ -129,5 +132,6 @@ function(ggml_hexagon_setup_cfg KNAME)
129132
)
130133
endfunction()
131134

132-
ggml_hexagon_build_kernel("cdsp")
133-
ggml_hexagon_setup_cfg("ggml-hexagon.cfg")
135+
# we do not build cdsp kernels directly in CMake
136+
#ggml_hexagon_build_kernel("cdsp")
137+
#ggml_hexagon_setup_cfg("ggml-hexagon.cfg")

ggml/src/ggml-hexagon/ggml-hexagon.cpp

Lines changed: 32 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -391,7 +391,7 @@ static struct hexagon_appcfg_t g_hexagon_appcfg = {
391391
#if defined(STANDARD_ANDROID_APP)
392392
.runtime_libpath = "/data/data/com.kantvai.kantvplayer/",
393393
#else
394-
.runtime_libpath = "/data/local/tmp/",
394+
.runtime_libpath = "/data/data/com.layla/files/app-data/qnn-inference/",
395395
#endif
396396
#elif defined(__linux__)
397397
.qnn_runtimelib_path = "/tmp/",
@@ -1829,24 +1829,31 @@ static void ggmlhexagon_set_runtime_path(size_t device, const std::string & path
18291829
if ((HEXAGON_BACKEND_QNNNPU == device) || (HWACCEL_CDSP == g_hexagon_appcfg.hwaccel_approach)) {
18301830
std::string lib_runtime_path = path + ":/vendor/dsp/cdsp:/vendor/lib64:/vendor/dsp/dsp:/vendor/dsp/images";
18311831
if (0 == setenv("LD_LIBRARY_PATH", lib_runtime_path.c_str(), 1)) {
1832-
GGMLHEXAGON_LOG_DEBUG("setenv LD_LIBRARY_PATH %s successfully", lib_runtime_path.c_str());
1832+
GGMLHEXAGON_LOG_INFO("setenv LD_LIBRARY_PATH %s successfully", lib_runtime_path.c_str());
18331833
} else {
18341834
GGMLHEXAGON_LOG_ERROR("setenv LD_LIBRARY_PATH %s failure", lib_runtime_path.c_str());
18351835
}
18361836

18371837
std::string adsp_runtime_path = path + ";/vendor/dsp/cdsp;/vendor/lib/rfsa/adsp;/system/lib/rfsa/adsp;/vendor/dsp/dsp;/vendor/dsp/images;/dsp";
18381838
if (0 == setenv("ADSP_LIBRARY_PATH", adsp_runtime_path.c_str(), 1)) {
1839-
GGMLHEXAGON_LOG_DEBUG("setenv ADSP_LIBRARY_PATH %s successfully", adsp_runtime_path.c_str());
1839+
GGMLHEXAGON_LOG_INFO("setenv ADSP_LIBRARY_PATH %s successfully", adsp_runtime_path.c_str());
18401840
} else {
18411841
GGMLHEXAGON_LOG_ERROR("setenv ADSP_LIBRARY_PATH %s failure", adsp_runtime_path.c_str());
18421842
}
1843+
1844+
std::string dsp_runtime_path = path;
1845+
if (0 == setenv("DSP_LIBRARY_PATH", dsp_runtime_path.c_str(), 1)) {
1846+
GGMLHEXAGON_LOG_INFO("setenv DSP_LIBRARY_PATH %s successfully", dsp_runtime_path.c_str());
1847+
} else {
1848+
GGMLHEXAGON_LOG_ERROR("setenv DSP_LIBRARY_PATH %s failure", dsp_runtime_path.c_str());
1849+
}
18431850
} else {
18441851
if (0 == setenv("LD_LIBRARY_PATH",
18451852
(path +
18461853
":/vendor/dsp/cdsp:/vendor/lib64:/vendor/dsp/dsp:/vendor/dsp/images").c_str(),
18471854
1)) {
18481855
GGMLHEXAGON_LOG_DEBUG("%s backend setenv successfully\n",
1849-
ggml_backend_hexagon_get_devname(device));
1856+
ggml_backend_hexagon_get_devname(device));
18501857
} else {
18511858
GGMLHEXAGON_LOG_ERROR("%s backend setenv failure\n",
18521859
ggml_backend_hexagon_get_devname(device));
@@ -3375,10 +3382,10 @@ int qnn_instance::qnn_init(const QnnSaver_Config_t ** saver_config) {
33753382

33763383
#if defined(__ANDROID__) || defined(__linux__)
33773384
std::filesystem::path full_path(std::string(g_hexagon_appcfg.runtime_libpath) + "libcdsprpc.so");
3378-
full_path /= std::filesystem::path("libcdsprpc.so").filename();
3385+
//full_path /= std::filesystem::path("libcdsprpc.so").filename();
33793386
_rpc_lib_handle = dlopen(full_path.string().c_str(), RTLD_NOW | RTLD_LOCAL);
33803387
if (nullptr == _rpc_lib_handle) {
3381-
GGMLHEXAGON_LOG_WARN("failed to load %s\n", full_path.c_str());
3388+
GGMLHEXAGON_LOG_WARN("failed to load %s from local file, trying to find in system libraries\n", full_path.c_str());
33823389
_rpc_lib_handle = dlopen("libcdsprpc.so", RTLD_NOW | RTLD_LOCAL);
33833390
}
33843391
#else
@@ -5185,6 +5192,8 @@ static int ggmlhexagon_request_status_notifications(int domain_id, void * contex
51855192
}
51865193

51875194
static int ggmlhexagon_init_rpcmempool(ggml_backend_hexagon_context * ctx) {
5195+
throw std::runtime_error("Not implemented. Directly initialising RPC memory pool is not supported right now.");
5196+
51885197
size_t candidate_size = 0;
51895198
uint8_t * rpc_buffer = nullptr;
51905199
size_t probe_slots[] = {1024, 1536, 2000, 2048};
@@ -5232,6 +5241,8 @@ static int ggmlhexagon_init_rpcmempool(ggml_backend_hexagon_context * ctx) {
52325241
}
52335242

52345243
static void ggmlhexagon_deinit_rpcmempool(ggml_backend_hexagon_context * ctx) {
5244+
throw std::runtime_error("Not implemented. Directly initialising RPC memory pool is not supported right now.");
5245+
52355246
if ((g_hexagon_appcfg.hwaccel_approach == HWACCEL_CDSP) && (1 == g_hexagon_appcfg.enable_rpc_ion_mempool)) {
52365247
if (ctx->rpc_mempool) {
52375248
//deregister rpc memory pool
@@ -6233,7 +6244,11 @@ static ggml_backend_buffer_type_t ggml_backend_hexagon_buffer_type(size_t device
62336244
//cover following special case:
62346245
// toggle backend and forth between cDSP and ggml in a standard Android APP or in
62356246
// a same running process
6236-
g_hexagon_appcfg.hexagon_backend = device_index;
6247+
6248+
// TODO: not sure why we need to update the global setting here in the original code
6249+
// it seems this code is reached when we allocate buffers for all devices (including the qnn-cpu device)
6250+
// so if it reaches this code, then it won't use the NPU anymore since the backend config will be updated to use the cpu device
6251+
// g_hexagon_appcfg.hexagon_backend = device_index;
62376252
}
62386253

62396254
static struct ggml_backend_buffer_type ggml_backend_hexagon_buffer_types[GGML_HEXAGON_MAX_DEVICES];
@@ -6284,6 +6299,10 @@ static const char * ggml_backend_hexagon_host_buffer_name(ggml_backend_buffer_t
62846299
}
62856300

62866301
static void ggml_backend_hexagon_host_buffer_free(ggml_backend_buffer_t buffer) {
6302+
// always use ggml memory management for now
6303+
ggml_aligned_free(buffer->context, 0);
6304+
return;
6305+
62876306
if (0 == g_hexagon_appcfg.enable_pinned_memory) {
62886307
ggml_aligned_free(buffer->context, 0);
62896308
} else {
@@ -6292,6 +6311,9 @@ static void ggml_backend_hexagon_host_buffer_free(ggml_backend_buffer_t buffer)
62926311
}
62936312

62946313
static void * ggml_hexagon_host_malloc(ggml_backend_buffer_type_t buft, size_t size) {
6314+
// we always use ggml malloc right now
6315+
return ggml_aligned_malloc(size);
6316+
62956317
if (0 == g_hexagon_appcfg.enable_pinned_memory) {
62966318
return ggml_aligned_malloc(size);
62976319
} else {
@@ -6664,6 +6686,9 @@ ggml_backend_t ggml_backend_hexagon_init(size_t device, const char * runtime_lib
66646686
ggmlhexagon_set_runtime_path(device, runtime_libpath);
66656687
}
66666688

6689+
// the condition above never be true because our hardcoded runtime_libpath is always the same as the config, so we manually set the library paths here
6690+
ggmlhexagon_set_runtime_path(g_hexagon_appcfg.hexagon_backend, g_hexagon_appcfg.runtime_libpath);
6691+
66676692
if (nullptr != g_hexagon_mgr[device].backend) {
66686693
GGMLHEXAGON_LOG_DEBUG("backend %d(%s) already loaded", device,
66696694
ggml_backend_hexagon_get_devname(device));

ggml/src/ggml-hexagon/kernels/stub.c

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -291,10 +291,12 @@ __QAIC_SLIM_EXPORT const Interface __QAIC_SLIM(ggmlop_slim) = {8,&(methodArrays[
291291
extern "C" {
292292
#endif
293293
__QAIC_STUB_EXPORT int __QAIC_STUB(ggmlop_dsp_open)(const char* uri, remote_handle64* h) __QAIC_STUB_ATTRIBUTE {
294-
return __QAIC_REMOTE(remote_handle64_open)(uri, h);
294+
return -1; // don't support direct dsp calls yet
295+
//return __QAIC_REMOTE(remote_handle64_open)(uri, h);
295296
}
296297
__QAIC_STUB_EXPORT int __QAIC_STUB(ggmlop_dsp_close)(remote_handle64 h) __QAIC_STUB_ATTRIBUTE {
297-
return __QAIC_REMOTE(remote_handle64_close)(h);
298+
return -1; // don't support direct dsp calls yet
299+
//return __QAIC_REMOTE(remote_handle64_close)(h);
298300
}
299301
static __inline int _stub_method(remote_handle64 _handle, uint32_t _mid, uint32_t _in0[1], uint32_t _in1[1], uint32_t _in2[1], uint32_t _in3[1]) {
300302
remote_arg _pra[1] = {0};
@@ -306,7 +308,8 @@ static __inline int _stub_method(remote_handle64 _handle, uint32_t _mid, uint32_
306308
_COPY(_primIn, 4, _in1, 0, 4);
307309
_COPY(_primIn, 8, _in2, 0, 4);
308310
_COPY(_primIn, 12,_in3, 0, 4);
309-
_TRY_FARF(_nErr, __QAIC_REMOTE(remote_handle64_invoke)(_handle, REMOTE_SCALARS_MAKEX(0, _mid, 1, 0, 0, 0), _pra));
311+
// TODO: we don't support direct dsp calls yet
312+
//_TRY_FARF(_nErr, __QAIC_REMOTE(remote_handle64_invoke)(_handle, REMOTE_SCALARS_MAKEX(0, _mid, 1, 0, 0, 0), _pra));
310313
_CATCH_FARF(_nErr) {
311314
_QAIC_FARF(RUNTIME_ERROR, "ERROR 0x%x: handle=0x%"PRIx64", scalar=0x%x, method ID=%d: %s failed\n", _nErr , _handle, REMOTE_SCALARS_MAKEX(0, _mid, 1, 0, 0, 0), _mid, __func__);
312315
}
@@ -432,7 +435,8 @@ static __inline int _stub_method_1(remote_handle64 _handle, uint32_t _mid, uintp
432435
_TRY(_nErr, _stub_pack(_al, (_praIn + 0), _ppraIn, (_praROut + 0), _ppraROut, _praHIn, _ppraHIn, _praHROut, _ppraHROut, ((char*)_primIn + 224), ((char*)_primROut + 0), (uint32_t*)&(((uint32_t*)_rout2)[0]), (uint32_t*)&(((uint32_t*)_rout2)[1]), (uint32_t*)&(((uint32_t*)_rout2)[5]), (uint32_t*)&(((uint32_t*)_rout2)[9]), (uint32_t*)&(((uint32_t*)_rout2)[10]), (uint32_t*)&(((uint32_t*)_rout2)[26]), SLIM_IFPTR32((char**)&(((uint32_t*)_rout2)[27]), (char**)&(((uint64_t*)_rout2)[14])), SLIM_IFPTR32((uint32_t*)&(((uint32_t*)_rout2)[28]), (uint32_t*)&(((uint32_t*)_rout2)[30]))));
433436
_QAIC_ASSERT(_nErr, (_numInH[0] + 0) <= 15);
434437
_QAIC_ASSERT(_nErr, (_numROutH[0] + 0) <= 15);
435-
_TRY_FARF(_nErr, __QAIC_REMOTE(remote_handle64_invoke)(_handle, REMOTE_SCALARS_MAKEX(0, _mid, (_numIn[0] + 1), (_numROut[0] + 1), (_numInH[0] + 0), (_numROutH[0] + 0)), _pra));
438+
// TODO: we don't support direct dsp calls yet
439+
//_TRY_FARF(_nErr, __QAIC_REMOTE(remote_handle64_invoke)(_handle, REMOTE_SCALARS_MAKEX(0, _mid, (_numIn[0] + 1), (_numROut[0] + 1), (_numInH[0] + 0), (_numROutH[0] + 0)), _pra));
436440
_TRY(_nErr, _stub_unpack((_praROutPost + 0), _ppraROutPost, ((char*)_primROut + 0), (uint32_t*)&(((uint32_t*)_rout2)[0]), (uint32_t*)&(((uint32_t*)_rout2)[1]), (uint32_t*)&(((uint32_t*)_rout2)[5]), (uint32_t*)&(((uint32_t*)_rout2)[9]), (uint32_t*)&(((uint32_t*)_rout2)[10]), (uint32_t*)&(((uint32_t*)_rout2)[26]), SLIM_IFPTR32((char**)&(((uint32_t*)_rout2)[27]), (char**)&(((uint64_t*)_rout2)[14])), SLIM_IFPTR32((uint32_t*)&(((uint32_t*)_rout2)[28]), (uint32_t*)&(((uint32_t*)_rout2)[30]))));
437441
_QAIC_CATCH(_nErr) {}
438442
_CATCH_FARF(_nErr) {

0 commit comments

Comments
 (0)