Skip to content

Commit 36c3ff6

Browse files
author
zhouwg
committed
ggml-hexagon: upgrade Android NDK to android-ndk-r28
1 parent 67c7d06 commit 36c3ff6

File tree

4 files changed

+75
-29
lines changed

4 files changed

+75
-29
lines changed

CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
99

1010
if (${HTP_ARCH_VERSION} STREQUAL "v75" OR ${HTP_ARCH_VERSION} STREQUAL "v79")
1111
#works fine on Snapdragon 8Gen3&8Elite with 1.5x - 3x performance gains with the default ggml backend
12-
set(OPT_FLAG " -O3 -march=armv8.7-a -mcpu=cortex-x1 -mtune=cortex-x1 -fvectorize -ffp-model=fast -fno-finite-math-only")
12+
set(OPT_FLAG " -O3 -march=armv8.7-a -mcpu=cortex-x1 -mtune=cortex-x1 -flto -D_GNU_SOURCE -fvectorize -ffp-model=fast -fno-finite-math-only")
1313
message("OPT_FLAG:${OPT_FLAG}")
1414
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DGGML_USE_HEXAGON ${DEBUG_FLAG} ${OPT_FLAG}")
1515
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DGGML_USE_HEXAGON ${DEBUG_FLAG} ${OPT_FLAG}")

ggml/src/ggml-hexagon/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ endif()
4949
set(OPT_FLAG " ")
5050
if (${HTP_ARCH_VERSION} STREQUAL "v75" OR ${HTP_ARCH_VERSION} STREQUAL "v79")
5151
#works fine on Snapdragon 8Gen3&8Elite with 1.5x - 3x performance gains with the default ggml backend
52-
set(OPT_FLAG " -O3 -march=armv8.7-a -mcpu=cortex-x1 -mtune=cortex-x1 -fvectorize -fno-finite-math-only -ffp-model=fast ")
52+
set(OPT_FLAG " -O3 -march=armv8.7-a -mcpu=cortex-x1 -mtune=cortex-x1 -flto -D_GNU_SOURCE -fvectorize -fno-finite-math-only -ffp-model=fast ")
5353
endif()
5454
message("OPT_FLAG:${OPT_FLAG}")
5555

ggml/src/ggml-hexagon/ggml-hexagon.cpp

Lines changed: 49 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -482,7 +482,21 @@ static struct ggml_backend_hexagon_context g_hexagon_mgr[GGML_HEXAGON_MAX_DEVICE
482482
.backend = nullptr,
483483
.raw_interface = {},
484484
.raw_system_interface = {},
485-
.socinfo = {}},
485+
.socinfo = {},
486+
.qnn_singlenode_graph_map = {},
487+
.work_data = nullptr,
488+
.tasks = {},
489+
.work_size = 0,
490+
.desired_size = 0,
491+
.n_threads = 8,
492+
.rpc_mempool_capacity = 0,
493+
.rpc_mempool_len = 0,
494+
.rpc_mempool_usage = 0,
495+
.rpc_mempool = nullptr,
496+
.rpc_mempool_handle = 0,
497+
.ggmlop_handle = 0,
498+
.domain_id = HEXAGON_CDSP,
499+
},
486500

487501
{ .device = 1,
488502
.name = "qnn-gpu",
@@ -496,7 +510,21 @@ static struct ggml_backend_hexagon_context g_hexagon_mgr[GGML_HEXAGON_MAX_DEVICE
496510
.backend = nullptr,
497511
.raw_interface = {},
498512
.raw_system_interface = {},
499-
.socinfo = {}},
513+
.socinfo = {},
514+
.qnn_singlenode_graph_map = {},
515+
.work_data = nullptr,
516+
.tasks = {},
517+
.work_size = 0,
518+
.desired_size = 0,
519+
.n_threads = 8,
520+
.rpc_mempool_capacity = 0,
521+
.rpc_mempool_len = 0,
522+
.rpc_mempool_usage = 0,
523+
.rpc_mempool = nullptr,
524+
.rpc_mempool_handle = 0,
525+
.ggmlop_handle = 0,
526+
.domain_id = HEXAGON_CDSP,
527+
},
500528

501529
{ .device = 2,
502530
.name = "qnn-npu",
@@ -510,7 +538,21 @@ static struct ggml_backend_hexagon_context g_hexagon_mgr[GGML_HEXAGON_MAX_DEVICE
510538
.backend = nullptr,
511539
.raw_interface = {},
512540
.raw_system_interface = {},
513-
.socinfo = {}},
541+
.socinfo = {},
542+
.qnn_singlenode_graph_map = {},
543+
.work_data = nullptr,
544+
.tasks = {},
545+
.work_size = 0,
546+
.desired_size = 0,
547+
.n_threads = 8,
548+
.rpc_mempool_capacity = 0,
549+
.rpc_mempool_len = 0,
550+
.rpc_mempool_usage = 0,
551+
.rpc_mempool = nullptr,
552+
.rpc_mempool_handle = 0,
553+
.ggmlop_handle = 0,
554+
.domain_id = HEXAGON_CDSP,
555+
},
514556
};
515557

516558
static domain hexagon_supported_domains[] = {
@@ -3857,7 +3899,9 @@ static Qnn_Tensor_t * ggmlqnn_create_general_tensor(qnn_instance * instance, Qnn
38573899
.dataFormat = QNN_TENSOR_DATA_FORMAT_FLAT_BUFFER,
38583900
.dataType = qnn_data_type,
38593901
.quantizeParams = {.encodingDefinition = QNN_DEFINITION_UNDEFINED,
3860-
.quantizationEncoding = QNN_QUANTIZATION_ENCODING_UNDEFINED},
3902+
.quantizationEncoding = QNN_QUANTIZATION_ENCODING_UNDEFINED,
3903+
.scaleOffsetEncoding = {.scale = 0.0000000000000000f, .offset = 0}
3904+
},
38613905
.rank = rank,
38623906
.dimensions = tensor_dims,
38633907
.memType = QNN_TENSORMEMTYPE_RAW,
@@ -4559,12 +4603,6 @@ static void ggmlqnn_compute_rms_norm(ggml_backend_hexagon_context * ctx, ggml_te
45594603
GGML_UNUSED(dst);
45604604
}
45614605

4562-
static void ggmlqnn_compute_diag_mask(ggml_backend_hexagon_context * ctx, ggml_tensor * dst, float value) {
4563-
GGML_UNUSED(ctx);
4564-
GGML_UNUSED(dst);
4565-
GGML_UNUSED(value);
4566-
}
4567-
45684606
static void ggmlqnn_compute_im2col(ggml_backend_hexagon_context * ctx, ggml_tensor * dst) {
45694607
GGML_UNUSED(ctx);
45704608
GGML_UNUSED(dst);
@@ -5203,9 +5241,8 @@ static void ggmlhexagon_deinit_cdsp(ggml_backend_hexagon_context * ctx) {
52035241
hexagon_error = ggmlop_dsp_close(ctx->ggmlop_handle);
52045242
if (AEE_SUCCESS != hexagon_error) {
52055243
GGMLHEXAGON_LOG_WARN("error 0x%x: failed to close ggmlop dsp handle", hexagon_error);
5206-
} else {
5207-
ctx->ggmlop_handle = 0;
52085244
}
5245+
ctx->ggmlop_handle = 0;
52095246
}
52105247

52115248
ggmlhexagon_deinit_rpcmempool(ctx);
@@ -5722,9 +5759,6 @@ static bool ggmlhexagon_compute_forward(ggml_backend_t backend, struct ggml_tens
57225759
case GGML_OP_PERMUTE:
57235760
case GGML_OP_TRANSPOSE:
57245761
break;
5725-
case GGML_OP_DIAG_MASK_INF:
5726-
ggmlqnn_compute_diag_mask(ctx, dst, -INFINITY);
5727-
break;
57285762
case GGML_OP_SOFT_MAX:
57295763
ggmlqnn_compute_softmax(ctx, dst);
57305764
break;

scripts/build-run-android.sh

Lines changed: 24 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,39 +1,51 @@
11
#!/bin/bash
2-
# build llama.cpp + ggml-hexagon for Snapdragon mobile SoC equipped Android phone on Linux
3-
2+
# build llama.cpp + ggml-hexagon for Qualcomm Snapdragon mobile SoC equipped Android phone on Linux
3+
#
4+
# this script will download Android NDK and Qualcomm QNN SDK automatically,
5+
# Hexagon SDK must be obtained with a Qualcomm Developer Account and cannot be downloaded automatically in this script.
6+
#
47
set -e
58

69
PWD=`pwd`
7-
ANDROID_PLATFORM=android-34
8-
ANDROID_NDK=${PWD}/android-ndk-r26c
10+
11+
#running path on Android phone
912
REMOTE_PATH=/data/local/tmp/
13+
#LLM model file on Android phone
1014
GGUF_MODEL_NAME=/sdcard/gemma-3-4b-it-Q8_0.gguf
1115
GGUF_MODEL_NAME=/sdcard/qwen1_5-1_8b-chat-q4_0.gguf
1216

13-
#QNN SDK could be found at:
17+
#Android NDK can be found at:
18+
#https://developer.android.com/ndk/downloads
19+
ANDROID_PLATFORM=android-34
20+
ANDROID_NDK_VERSION=r28
21+
ANDROID_NDK_NAME=android-ndk-${ANDROID_NDK_VERSION}
22+
ANDROID_NDK_FULLNAME=${ANDROID_NDK_NAME}-linux.zip
23+
ANDROID_NDK=${PWD}/${ANDROID_NDK_NAME}
24+
25+
#QNN SDK can be found at:
1426
#https://www.qualcomm.com/developer/software/qualcomm-ai-engine-direct-sdk
15-
#https://developer.qualcomm.com/software/hexagon-dsp-sdk/tools
1627
QNN_SDK_URL=https://www.qualcomm.com/developer/software/qualcomm-ai-engine-direct-sdk
1728
QNN_SDK_INSTALL_PATH=/opt/qcom/aistack/qairt/
1829
QNN_SDK_VERSION=2.32.0.250228
1930
QNN_SDK_VERSION=2.33.0.250327
2031
QNN_SDK_PATH=${QNN_SDK_INSTALL_PATH}/${QNN_SDK_VERSION}
2132

22-
#5.5.3.0 should be also ok
33+
#Hexagon SDK can be found at:
34+
#https://developer.qualcomm.com/software/hexagon-dsp-sdk/tools
2335
HEXAGON_SDK_PATH=/opt/qcom/Hexagon_SDK/6.2.0.1
2436
#available htp arch version:
2537
#v68 --- Snapdragon 888
2638
#v69 --- Snapdragon 8 Gen1
2739
#v73 --- Snapdragon 8 Gen2
2840
#v75 --- Snapdragon 8 Gen3
2941
#v79 --- Snapdragon 8 Elite(aka Gen4)
42+
#8Gen3
3043
HTP_ARCH_VERSION=v75
3144
HTP_ARCH_VERSION_a=V75
32-
45+
#8Elite
3346
HTP_ARCH_VERSION=v79
3447
HTP_ARCH_VERSION_a=V79
3548

36-
#running_params=" -mg 2 -ngl 99 "
3749
#running_params=" -mg 2 -ngl 99 -t 8 -fa 1 "
3850
running_params=" -mg 2 -ngl 99 -t 8 "
3951

@@ -109,11 +121,11 @@ function check_and_download_ndk()
109121

110122
if [ ${is_android_ndk_exist} -eq 0 ]; then
111123

112-
if [ ! -f android-ndk-r26c-linux.zip ]; then
113-
wget --no-config --quiet --show-progress -O android-ndk-r26c-linux.zip https://dl.google.com/android/repository/android-ndk-r26c-linux.zip
124+
if [ ! -f ${ANDROID_NDK_FULLNAME} ]; then
125+
wget --no-config --quiet --show-progress -O ${ANDROID_NDK_FULLNAME} https://dl.google.com/android/repository/${ANDROID_NDK_FULLNAME}
114126
fi
115127

116-
unzip android-ndk-r26c-linux.zip
128+
unzip ${ANDROID_NDK_FULLNAME}
117129

118130
if [ $? -ne 0 ]; then
119131
printf "failed to download android ndk to %s \n" "${ANDROID_NDK}"

0 commit comments

Comments
 (0)