Skip to content

Commit b7d40f1

Browse files
author
zhouwg
committed
ggml-qnn: add Qualcomm mobile SoC native backend for GGML
1 parent 27e8a23 commit b7d40f1

File tree

8 files changed

+4251
-0
lines changed

8 files changed

+4251
-0
lines changed

build-run-android.sh

Lines changed: 202 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,202 @@
1+
#!/bin/bash
2+
3+
set -e
4+
5+
PWD=`pwd`
6+
ANDROID_PLATFORM=android-34
7+
ANDROID_NDK=${PWD}/android-ndk-r26c
8+
REMOTE_PATH=/data/local/tmp/
9+
GGUF_MODEL_NAME=/sdcard/deepseek-r1-distill-qwen-1.5b-q4_0.gguf
10+
11+
#QNN SDK could be found at:
12+
#https://www.qualcomm.com/developer/software/qualcomm-ai-engine-direct-sdk
13+
#https://developer.qualcomm.com/software/hexagon-dsp-sdk/tools
14+
QNN_SDK_URL=https://www.qualcomm.com/developer/software/qualcomm-ai-engine-direct-sdk
15+
QNN_SDK_PATH=/opt/qcom/aistack/qairt/2.31.0.250130/
16+
17+
function dump_vars()
18+
{
19+
echo -e "ANDROID_NDK: ${ANDROID_NDK}"
20+
echo -e "QNN_SDK_PATH: ${QNN_SDK_PATH}"
21+
}
22+
23+
24+
function show_pwd()
25+
{
26+
echo -e "current working path:$(pwd)\n"
27+
}
28+
29+
30+
function check_qnn_sdk()
31+
{
32+
if [ ! -d ${QNN_SDK_PATH} ]; then
33+
echo -e "QNN_SDK_PATH ${QNN_SDK_PATH} not exist, pls check or download it from ${QNN_SDK_URL}...\n"
34+
exit 1
35+
fi
36+
}
37+
38+
39+
function check_and_download_ndk()
40+
{
41+
is_android_ndk_exist=1
42+
43+
if [ ! -d ${ANDROID_NDK} ]; then
44+
is_android_ndk_exist=0
45+
fi
46+
47+
if [ ! -f ${ANDROID_NDK}/build/cmake/android.toolchain.cmake ]; then
48+
is_android_ndk_exist=0
49+
fi
50+
51+
if [ ${is_android_ndk_exist} -eq 0 ]; then
52+
53+
if [ ! -f android-ndk-r26c-linux.zip ]; then
54+
wget --no-config --quiet --show-progress -O android-ndk-r26c-linux.zip https://dl.google.com/android/repository/android-ndk-r26c-linux.zip
55+
fi
56+
57+
unzip android-ndk-r26c-linux.zip
58+
59+
if [ $? -ne 0 ]; then
60+
printf "failed to download android ndk to %s \n" "${ANDROID_NDK}"
61+
exit 1
62+
fi
63+
64+
printf "android ndk saved to ${ANDROID_NDK} \n\n"
65+
else
66+
printf "android ndk already exist:${ANDROID_NDK} \n\n"
67+
fi
68+
}
69+
70+
71+
function build_arm64
72+
{
73+
cmake -H. -B./out/android -DGGML_USE_QNN=ON -DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake -DANDROID_ABI=arm64-v8a -DANDROID_PLATFORM=latest -DCMAKE_C_FLAGS=-march=armv8.7-a -DGGML_QNN=ON -DGGML_QNN_SDK_PATH=${QNN_SDK_PATH}
74+
cd out/android
75+
make -j16
76+
show_pwd
77+
78+
cd -
79+
}
80+
81+
82+
function remove_temp_dir()
83+
{
84+
if [ -d out ]; then
85+
echo "remove out directory in `pwd`"
86+
rm -rf out
87+
fi
88+
}
89+
90+
91+
function check_qnn_libs()
92+
{
93+
#reuse the cached qnn libs on Android phone
94+
adb shell ls ${REMOTE_PATH}/libQnnCpu.so
95+
if [ $? -eq 0 ]; then
96+
printf "QNN libs already exist on Android phone\n"
97+
else
98+
update_qnn_libs
99+
fi
100+
}
101+
102+
103+
function update_qnn_libs()
104+
{
105+
adb push ${QNN_SDK_PATH}/lib/aarch64-android/libQnnSystem.so ${REMOTE_PATH}/
106+
adb push ${QNN_SDK_PATH}/lib/aarch64-android/libQnnCpu.so ${REMOTE_PATH}/
107+
adb push ${QNN_SDK_PATH}/lib/aarch64-android/libQnnGpu.so ${REMOTE_PATH}/
108+
109+
adb push ${QNN_SDK_PATH}/lib/aarch64-android/libQnnHtp.so ${REMOTE_PATH}/
110+
adb push ${QNN_SDK_PATH}/lib/aarch64-android/libQnnHtpNetRunExtensions.so ${REMOTE_PATH}/
111+
adb push ${QNN_SDK_PATH}/lib/aarch64-android/libQnnHtpPrepare.so ${REMOTE_PATH}/
112+
adb push ${QNN_SDK_PATH}/lib/aarch64-android/libQnnHtpV75Stub.so ${REMOTE_PATH}/
113+
adb push ${QNN_SDK_PATH}/lib/hexagon-v75/unsigned/libQnnHtpV75Skel.so ${REMOTE_PATH}/
114+
}
115+
116+
117+
function build_ggml_qnn()
118+
{
119+
show_pwd
120+
check_and_download_ndk
121+
check_qnn_sdk
122+
dump_vars
123+
remove_temp_dir
124+
build_arm64
125+
}
126+
127+
128+
function run_llamacli()
129+
{
130+
check_qnn_libs
131+
132+
if [ -f ./out/android/bin/libggml-qnn.so ]; then
133+
adb push ./out/android/bin/*.so ${REMOTE_PATH}/
134+
fi
135+
adb push ./out/android/bin/llama-cli ${REMOTE_PATH}/
136+
adb shell chmod +x ${REMOTE_PATH}/llama-cli
137+
138+
adb shell "cd ${REMOTE_PATH} \
139+
&& export LD_LIBRARY_PATH=${REMOTE_PATH} \
140+
&& ${REMOTE_PATH}/llama-cli -mg 2 -m ${GGUF_MODEL_NAME} -p \"introduce the movie Once Upon a Time in America briefly.\n\""
141+
142+
}
143+
144+
function run_test-backend-ops()
145+
{
146+
check_qnn_libs
147+
148+
if [ -f ./out/android/bin/libggml-qnn.so ]; then
149+
adb push ./out/android/bin/*.so ${REMOTE_PATH}/
150+
fi
151+
adb push ./out/android/bin/test-backend-ops ${REMOTE_PATH}/
152+
adb shell chmod +x ${REMOTE_PATH}/test-backend-ops
153+
154+
adb shell "cd ${REMOTE_PATH} \
155+
&& export LD_LIBRARY_PATH=${REMOTE_PATH} \
156+
&& ${REMOTE_PATH}/test-backend-ops test"
157+
158+
}
159+
160+
161+
function show_usage()
162+
{
163+
echo "Usage:"
164+
echo " $0 build"
165+
echo " $0 updateqnnlib"
166+
echo " $0 run_llamacli"
167+
echo " $0 run_testop"
168+
echo -e "\n\n\n"
169+
}
170+
171+
172+
show_pwd
173+
174+
check_qnn_sdk
175+
176+
if [ $# == 0 ]; then
177+
show_usage
178+
exit 1
179+
elif [ $# == 1 ]; then
180+
if [ "$1" == "-h" ]; then
181+
show_usage
182+
exit 1
183+
elif [ "$1" == "help" ]; then
184+
show_usage
185+
exit 1
186+
elif [ "$1" == "build" ]; then
187+
build_ggml_qnn
188+
exit 0
189+
elif [ "$1" == "run_llamacli" ]; then
190+
run_llamacli
191+
exit 0
192+
elif [ "$1" == "run_testop" ]; then
193+
run_test-backend-ops
194+
exit 0
195+
elif [ "$1" == "updateqnnlib" ]; then
196+
update_qnn_libs
197+
exit 0
198+
fi
199+
else
200+
show_usage
201+
exit 1
202+
fi

ggml/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,7 @@ option(GGML_OPENCL "ggml: use OpenCL"
187187
option(GGML_OPENCL_PROFILING "ggml: use OpenCL profiling (increases overhead)" OFF)
188188
option(GGML_OPENCL_EMBED_KERNELS "ggml: embed kernels" ON)
189189
option(GGML_OPENCL_USE_ADRENO_KERNELS "ggml: use optimized kernels for Adreno" ON)
190+
option(GGML_QNN "ggml: use QNN" ON)
190191

191192
# toolchain for vulkan-shaders-gen
192193
set (GGML_VULKAN_SHADERS_GEN_TOOLCHAIN "" CACHE FILEPATH "ggml: toolchain file for vulkan-shaders-gen")
@@ -250,6 +251,7 @@ set(GGML_PUBLIC_HEADERS
250251
include/ggml-rpc.h
251252
include/ggml-sycl.h
252253
include/ggml-vulkan.h
254+
include/ggml-qnn.h
253255
include/gguf.h)
254256

255257
set_target_properties(ggml PROPERTIES PUBLIC_HEADER "${GGML_PUBLIC_HEADERS}")

ggml/include/ggml-qnn.h

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
/*
2+
* Copyright (c) 2023-2024 The ggml authors
3+
*
4+
* Permission is hereby granted, free of charge, to any person obtaining a copy
5+
* of this software and associated documentation files (the "Software"), to
6+
* deal in the Software without restriction, including without limitation the
7+
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
8+
* sell copies of the Software, and to permit persons to whom the Software is
9+
* furnished to do so, subject to the following conditions:
10+
*
11+
* The above copyright notice and this permission notice shall be included in
12+
* all copies or substantial portions of the Software.
13+
*
14+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15+
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16+
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17+
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18+
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19+
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
20+
* IN THE SOFTWARE.
21+
*/
22+
#pragma once
23+
24+
#include "ggml.h"
25+
#include "ggml-backend.h"
26+
27+
#ifdef __cplusplus
28+
extern "C" {
29+
#endif
30+
31+
#define GGML_QNN_MAX_DEVICES 3
32+
#define GGML_QNN_BACKEND_NAME "qnn"
33+
34+
enum QNNBackend {
35+
QNN_BACKEND_CPU,
36+
QNN_BACKEND_GPU,
37+
QNN_BACKEND_NPU,
38+
QNN_BACKEND_GGML, //"fake" QNN backend for compare performance between QNN backend and cpu backend
39+
};
40+
41+
GGML_BACKEND_API ggml_backend_t ggml_backend_qnn_init(size_t dev_num, const char * qnn_lib_path);
42+
43+
GGML_BACKEND_API bool ggml_backend_is_qnn(ggml_backend_t backend);
44+
45+
GGML_BACKEND_API void ggml_backend_qnn_set_n_threads(ggml_backend_t backend, int thread_counts);
46+
47+
GGML_BACKEND_API int ggml_backend_qnn_get_device_count(void);
48+
49+
GGML_BACKEND_API ggml_backend_reg_t ggml_backend_qnn_reg(void);
50+
51+
inline const char * ggml_backend_qnn_get_devname(size_t dev_num) {
52+
switch (dev_num) {
53+
case QNN_BACKEND_CPU:
54+
return "QNN-CPU";
55+
case QNN_BACKEND_GPU:
56+
return "QNN-GPU";
57+
case QNN_BACKEND_NPU:
58+
return "QNN-NPU";
59+
case QNN_BACKEND_GGML:
60+
return "ggml"; //"fake" QNN backend, used for compare performance between QNN backend and original GGML
61+
default:
62+
return "unknown";
63+
}
64+
}
65+
66+
#ifdef __cplusplus
67+
}
68+
#endif

ggml/src/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -329,6 +329,7 @@ ggml_add_backend(RPC)
329329
ggml_add_backend(SYCL)
330330
ggml_add_backend(Vulkan)
331331
ggml_add_backend(OpenCL)
332+
ggml_add_backend(QNN)
332333

333334
foreach (target ggml-base ggml)
334335
target_include_directories(${target} PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../include> $<INSTALL_INTERFACE:include>)

ggml/src/ggml-backend-reg.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,10 @@
6666
#include "ggml-kompute.h"
6767
#endif
6868

69+
#ifdef GGML_USE_QNN
70+
#include "ggml-qnn.h"
71+
#endif
72+
6973
// disable C++17 deprecation warning for std::codecvt_utf8
7074
#if defined(__clang__)
7175
# pragma clang diagnostic push
@@ -180,6 +184,9 @@ struct ggml_backend_registry {
180184
#ifdef GGML_USE_KOMPUTE
181185
register_backend(ggml_backend_kompute_reg());
182186
#endif
187+
#ifdef GGML_USE_QNN
188+
register_backend(ggml_backend_qnn_reg());
189+
#endif
183190
#ifdef GGML_USE_CPU
184191
register_backend(ggml_backend_cpu_reg());
185192
#endif
@@ -573,6 +580,7 @@ void ggml_backend_load_all_from_path(const char * dir_path) {
573580
ggml_backend_load_best("vulkan", silent, dir_path);
574581
ggml_backend_load_best("opencl", silent, dir_path);
575582
ggml_backend_load_best("musa", silent, dir_path);
583+
ggml_backend_load_best("qnn", silent, dir_path);
576584
ggml_backend_load_best("cpu", silent, dir_path);
577585
// check the environment variable GGML_BACKEND_PATH to load an out-of-tree backend
578586
const char * backend_path = std::getenv("GGML_BACKEND_PATH");

ggml/src/ggml-qnn/CMakeLists.txt

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
message(STATUS "Using QNN backend")
2+
3+
if(CMAKE_SYSTEM_NAME STREQUAL "Android")
4+
find_library(LOG_LIB log)
5+
set(QNN_LINK_LIBRARIES ${LOG_LIB})
6+
set(QNN_DEFAULT_LIB_SEARCH_PATH "/data/local/tmp/" CACHE STRING "customized library search path for QNN backend")
7+
else()
8+
message(FATAL_ERROR "QNN now only available on Android")
9+
endif()
10+
11+
if(NOT DEFINED GGML_QNN_SDK_PATH)
12+
# try read from environment variable
13+
if(DEFINED ENV{QNN_SDK_PATH})
14+
set(GGML_QNN_SDK_PATH $ENV{QNN_SDK_PATH})
15+
else()
16+
message(FATAL_ERROR "GGML_QNN_SDK_PATH not defined")
17+
endif()
18+
endif()
19+
20+
message("QNN_SDK_PATH: ${GGML_QNN_SDK_PATH}")
21+
22+
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3")
23+
24+
file(GLOB QNN_SOURCES "${CMAKE_CURRENT_LIST_DIR}/*.cpp")
25+
ggml_add_backend_library(ggml-qnn
26+
${QNN_SOURCES}
27+
)
28+
29+
target_include_directories(ggml-qnn PRIVATE ${GGML_QNN_SDK_PATH}/include/QNN ${CMAKE_CURRENT_LIST_DIR})
30+
target_link_libraries(ggml-qnn PRIVATE ${QNN_LINK_LIBRARIES})
31+
32+
string(REGEX REPLACE "/$" "" GGML_QNN_DEFAULT_LIB_SEARCH_PATH "${QNN_DEFAULT_LIB_SEARCH_PATH}")
33+
target_compile_definitions(ggml-qnn PRIVATE GGML_QNN_DEFAULT_LIB_SEARCH_PATH="${QNN_DEFAULT_LIB_SEARCH_PATH}/")

0 commit comments

Comments
 (0)