Skip to content

Commit 3c72168

Browse files
committed
project: release libggmldsp-skel.so v0.98
1 parent 8525353 commit 3c72168

File tree

13 files changed

+249
-138
lines changed

13 files changed

+249
-138
lines changed

ggml/include/ggml-hexagon.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,8 @@ GGML_BACKEND_API void ggml_backend_hexagon_set_cfg(int new_hexagon
4444

4545
GGML_BACKEND_API int ggml_backend_hexagon_get_mulmat_algotype(void);
4646

47+
GGML_BACKEND_API void ggml_backend_hexagon_set_mulmat_algotype(int new_mulmat_algotype);
48+
4749
#ifdef __cplusplus
4850
}
4951
#endif

ggml/src/ggml-hexagon/CMakeLists.txt

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,13 +41,17 @@ endif()
4141
set(CHECK_HTP_ARCH "WRONG")
4242
#ref: https://github.com/quic/ai-hub-apps/tree/main/tutorials/llm_on_genie
4343
#foreach (feat v68 v69 v73 v75 v79)
44-
foreach (feat v73 v75 v79)
44+
#foreach (feat v73 v75 v79)
45+
#for simplify workflow, only support v75 and v79, or only support 8Gen3 and 8Elite
46+
foreach (feat v75 v79)
4547
if (${feat} STREQUAL ${HTP_ARCH_VERSION})
4648
set(CHECK_HTP_ARCH "GOOD")
4749
endif()
4850
endforeach()
4951
if (${CHECK_HTP_ARCH} STREQUAL "WRONG")
50-
message(FATAL_ERROR "ggml-hexagon backend only support htp arch v68,v69,v73,v75,v79")
52+
#message(FATAL_ERROR "ggml-hexagon backend only support htp arch v68,v69,v73,v75,v79")
53+
#for simplify workflow, only support v75 and v79, or only support 8Gen3 and 8Elite
54+
message(FATAL_ERROR "ggml-hexagon backend only support htp arch v75,v79")
5155
endif()
5256

5357
#check optimization flags

ggml/src/ggml-hexagon/ggml-hexagon.cpp

Lines changed: 90 additions & 68 deletions
Large diffs are not rendered by default.

ggml/src/ggml-hexagon/kernels/mulmat.c

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,6 @@ static void ggml_compute_forward_mul_mat_one_chunk(const ggml_tensor *src0, cons
145145
}
146146
}
147147

148-
//TODO: only support fp32 mulmat on cDSP
149148
static int ggmlop_dsp_mulmat_singlethread(remote_handle64 h, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
150149
GGMLHEXAGON_LOG_DEBUG("enter %s", __func__ );
151150
ggmlhexagon_dump_tensor(src0, 0);
@@ -274,7 +273,6 @@ static int ggmlop_dsp_mulmat_singlethread(remote_handle64 h, const ggml_tensor *
274273
return 0;
275274
}
276275

277-
//TODO:multithreading mulmat
278276
static int ggmlop_dsp_mulmat_multithread(remote_handle64 h, const struct dsptensor * src0, const struct dsptensor * src1, dsptensor * dst) {
279277
GGMLHEXAGON_LOG_DEBUG("enter %s", __func__ );
280278
GGMLHEXAGON_LOG_DEBUG("leave %s", __func__ );
-973 KB
Binary file not shown.
-973 KB
Binary file not shown.
973 KB
Binary file not shown.
973 KB
Binary file not shown.

scripts/build-run-android.sh

Lines changed: 105 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
1-
#!/bin/bash
1+
#!/usr/bin/env bash
22
#
33
# Copyright (c) 2024-2025 The KanTV authors
44
#
55
# 1. build llama.cpp + ggml-hexagon backend on Linux for Android phone equipped with Qualcomm Snapdragon mobile SoC
66
# this script will setup local dev envs automatically
77
#
8-
# 2. verify prebuilt libggmldsp-skel.so on Android phone equipped with Qualcomm Snapdragon mobile SoC
8+
# 2. verify prebuilt libggmldsp-skel.so on Android phone equipped with Qualcomm Snapdragon mobile SoC(8Elite is recommended)
99
#
1010
# 3. compare performance of QNN-CPU,QNN-GPU,QNN-NPU,Hexagon-cDSP,ggml on Android phone equipped with Qualcomm Snapdragon mobile SoC
1111
#
@@ -19,7 +19,7 @@ PROJECT_ROOT_PATH=${PROJECT_HOME_PATH}
1919
HOST_CPU_COUNTS=`cat /proc/cpuinfo | grep "processor" | wc | awk '{print int($1)}'`
2020

2121
#running path on Android phone
22-
REMOTE_PATH=/data/local/tmp/
22+
REMOTE_PATH=/data/local/tmp
2323

2424
#Android NDK can be found at:
2525
#https://developer.android.com/ndk/downloads
@@ -47,13 +47,13 @@ HEXAGON_SDK_PATH=/opt/qcom/Hexagon_SDK/6.2.0.1
4747
#customized/tailored Hexagon SDK from the offcial Hexagon SDK for simplify workflow
4848
HEXAGON_SDK_PATH=${PROJECT_ROOT_PATH}/prebuilts/Hexagon_SDK/6.2.0.1
4949

50-
#running_params=" -mg 2 -ngl 99 -t 8 -fa 1 "
51-
#running_params=" -mg 2 -ngl 99 -t 8 "
50+
#running_params=" -ngl 99 -t 8 -n 256 --no-warmup -fa 1 "
5251
running_params=" -ngl 99 -t 8 -n 256 --no-warmup "
5352

5453
#available prebuilt libs can be found at prebuilts/ggml-dsp
55-
#GGMLDSP_RELEASE_DATE=20250531
54+
GGMLDSP_RELEASE_DATE=20250531
5655
GGMLDSP_RELEASE_DATE=20250609
56+
GGMLDSP_RELEASE_DATE=20250625
5757

5858

5959
######## part-2: contents in this part can be modified ########
@@ -94,6 +94,12 @@ GGUF_MODEL_NAME=/sdcard/qwen1_5-1_8b-chat-q4_0.gguf
9494
#HTP_ARCH_VERSION_a=V79
9595

9696
#modify the following two lines to adapt to test phone
97+
#for simplify workflow, only support v75 and v79, or only support 8Gen3 and 8Elite
98+
#v79/8Elite is strongly recommended because:
99+
#1. sometimes the same dsp codes can running well as expected on Snapdragon 8Elite based phone
100+
# but can't works as expected on other Snapdragon based phone(e.g. 8Gen3).
101+
#2. DSP clock rate on 8Gen3 is slower than DSP clock rate on 8Elite.
102+
#3. 8Elite support for LP-DDR5x memory, up to 5300 MHz; 8Gen3 support for LP-DDR5x memory, up to 4800 MHz.
97103
HTP_ARCH_VERSION=v79
98104
HTP_ARCH_VERSION_a=V79
99105

@@ -331,32 +337,6 @@ function build_ggml_hexagon_debug()
331337
}
332338

333339

334-
#added on 05/31/2025, for purpose of non-tech factor
335-
function prepare_ggmlhexagon()
336-
{
337-
adb push ./scripts/ggml-hexagon-for-binary-lib.cfg ${REMOTE_PATH}/ggml-hexagon.cfg
338-
echo "adb push ${PROJECT_ROOT_PATH}/prebuilts/ggml-dsp/${GGMLDSP_RELEASE_DATE}/libggmlop-skel${HTP_ARCH_VERSION}.so ${REMOTE_PATH}/libggmlop-skel.so"
339-
case "$HTP_ARCH_VERSION" in
340-
v69)
341-
adb push ${PROJECT_ROOT_PATH}/prebuilts/ggml-dsp/${GGMLDSP_RELEASE_DATE}/libggmlop-skel${HTP_ARCH_VERSION}.so ${REMOTE_PATH}/libggmlop-skel.so
342-
;;
343-
v73)
344-
adb push ${PROJECT_ROOT_PATH}/prebuilts/ggml-dsp/${GGMLDSP_RELEASE_DATE}/libggmlop-skel${HTP_ARCH_VERSION}.so ${REMOTE_PATH}/libggmlop-skel.so
345-
;;
346-
v75)
347-
adb push ${PROJECT_ROOT_PATH}/prebuilts/ggml-dsp/${GGMLDSP_RELEASE_DATE}/libggmlop-skel${HTP_ARCH_VERSION}.so ${REMOTE_PATH}/libggmlop-skel.so
348-
;;
349-
v79)
350-
adb push ${PROJECT_ROOT_PATH}/prebuilts/ggml-dsp/${GGMLDSP_RELEASE_DATE}/libggmlop-skel${HTP_ARCH_VERSION}.so ${REMOTE_PATH}/libggmlop-skel.so
351-
;;
352-
*)
353-
show_usage
354-
exit 1
355-
;;
356-
esac
357-
}
358-
359-
360340
function prepare_ggmldsp()
361341
{
362342
adb push ./scripts/ggml-hexagon-for-binary-lib.cfg ${REMOTE_PATH}/ggml-hexagon.cfg
@@ -432,7 +412,7 @@ function check_prebuilt_models()
432412

433413
check_and_download_model qwen1_5-1_8b-chat-q4_0.gguf https://huggingface.co/Qwen/Qwen1.5-1.8B-Chat-GGUF/resolve/main/qwen1_5-1_8b-chat-q4_0.gguf
434414
#check_and_download_model MiniCPM4-0.5B-F32.gguf https://huggingface.co/zhouwg/kantv/resolve/main/MiniCPM4-0.5B-F32.gguf
435-
check_and_download_model t5-277M-F32.gguf https://huggingface.co/zhouwg/kantv/resolve/main/t5-277M-F32.gguf
415+
#check_and_download_model t5-277M-F32.gguf https://huggingface.co/zhouwg/kantv/resolve/main/t5-277M-F32.gguf
436416

437417
set -e
438418
}
@@ -458,10 +438,6 @@ function prepare_run_on_phone()
458438
#for troubleshooting issues in upstream llama.cpp project
459439
adb shell ls -l ${REMOTE_PATH}/libggml-*.so
460440

461-
#for verify prebuilt binary library(built on 05/31/2025) on Hexagon cDSP
462-
#not used since 06/2025 and would be removed in the future
463-
#prepare_ggmlhexagon
464-
465441
#for verify prebuilt binary library(after 06/2025) on Hexagon cDSP
466442
#comment this line when build library on Hexagon cDSP from the reference/self-develop source codes in this project
467443
prepare_ggmldsp
@@ -476,10 +452,10 @@ function run_llamacli()
476452
{
477453
prepare_run_on_phone llama-cli
478454

479-
echo "${REMOTE_PATH}/llama-cli ${running_params} -mg $qnnbackend -no-cnv -m ${TEST_MODEL_NAME} -p \"${PROMPT_STRING}\""
455+
echo "${REMOTE_PATH}/llama-cli ${running_params} -mg ${hexagon_backend} -no-cnv -m ${TEST_MODEL_NAME} -p \"${PROMPT_STRING}\""
480456
adb shell "cd ${REMOTE_PATH} \
481457
&& export LD_LIBRARY_PATH=${REMOTE_PATH} \
482-
&& ${REMOTE_PATH}/llama-cli ${running_params} -mg $qnnbackend -no-cnv -m ${TEST_MODEL_NAME} -p \"${PROMPT_STRING}\""
458+
&& ${REMOTE_PATH}/llama-cli ${running_params} -mg ${hexagon_backend} -no-cnv -m ${TEST_MODEL_NAME} -p \"${PROMPT_STRING}\""
483459

484460
}
485461

@@ -490,12 +466,12 @@ function run_llamabench()
490466

491467
echo "adb shell \"cd ${REMOTE_PATH} \
492468
&& export LD_LIBRARY_PATH=${REMOTE_PATH} \
493-
&& ${REMOTE_PATH}/llama-bench ${running_params} -mg $qnnbackend -m ${GGUF_MODEL_NAME}\""
494-
echo "${REMOTE_PATH}/llama-bench ${running_params} -mg $qnnbackend -m ${GGUF_MODEL_NAME}"
469+
&& ${REMOTE_PATH}/llama-bench ${running_params} -mg ${hexagon_backend} -m ${GGUF_MODEL_NAME}\""
470+
echo "${REMOTE_PATH}/llama-bench ${running_params} -mg ${hexagon_backend} -m ${GGUF_MODEL_NAME}"
495471

496472
adb shell "cd ${REMOTE_PATH} \
497473
&& export LD_LIBRARY_PATH=${REMOTE_PATH} \
498-
&& ${REMOTE_PATH}/llama-bench ${running_params} -mg $qnnbackend -m ${GGUF_MODEL_NAME}"
474+
&& ${REMOTE_PATH}/llama-bench ${running_params} -mg ${hexagon_backend} -m ${GGUF_MODEL_NAME}"
499475

500476
}
501477

@@ -504,10 +480,10 @@ function run_threadsafety()
504480
{
505481
prepare_run_on_phone test-thread-safety
506482

507-
echo "${REMOTE_PATH}/test-thread-safety -np 2 -mg $qnnbackend -m ${GGUF_MODEL_NAME} -p \"hello,world\" -n 256 -ngl 99 "
483+
echo "${REMOTE_PATH}/test-thread-safety -np 2 -mg ${hexagon_backend} -m ${GGUF_MODEL_NAME} -p \"hello,world\" -n 256 -ngl 99 "
508484
adb shell "cd ${REMOTE_PATH} \
509485
&& export LD_LIBRARY_PATH=${REMOTE_PATH} \
510-
&& ${REMOTE_PATH}/test-thread-safety -np 1 -mg $qnnbackend -m ${GGUF_MODEL_NAME} -p \"hello,world\" -n 256 -ngl 99 "
486+
&& ${REMOTE_PATH}/test-thread-safety -np 1 -mg ${hexagon_backend} -m ${GGUF_MODEL_NAME} -p \"hello,world\" -n 256 -ngl 99 "
511487

512488
}
513489

@@ -524,18 +500,41 @@ function run_test-ops()
524500
}
525501

526502

503+
function check_hexagon_backend
504+
{
505+
if [[ ${hexagon_backend} != 0 ]] && [[ ${hexagon_backend} != 1 ]] && [[ ${hexagon_backend} != 2 ]] && [[ ${hexagon_backend} != 3 ]] && [[ ${hexagon_backend} != 4 ]] ; then
506+
printf "invalid hexagon backend\n"
507+
printf "valid hexagon backend: 0(QNN_CPU), 1(QNN_GPU), 2(QNN_NPU), 3(cDSP), 4(ggml)\n"
508+
exit 1
509+
fi
510+
}
511+
512+
513+
function check_mulmat_algotype
514+
{
515+
printf "mulmat_algotype ${mulmat_algotype} \n"
516+
if [[ ${mulmat_algotype} != 0 ]] && [[ ${mulmat_algotype} != 1 ]] && [[ ${mulmat_algotype} != 2 ]] && [[ ${mulmat_algotype} != 3 ]] && [[ ${mulmat_algotype} != 4 ]] && [[ ${mulmat_algotype} != 5 ]] && [[ ${mulmat_algotype} != 6 ]] && [[ ${mulmat_algotype} != 32 ]] && [[ ${mulmat_algotype} != 33 ]]; then
517+
printf "invalid mulmat algotype\n"
518+
printf "valid mulmat algotype: 0, 1, 2, 3, 4, 5, 6, 32, 33 \n"
519+
exit 1
520+
fi
521+
}
522+
523+
527524
function run_test-op()
528525
{
529526
prepare_run_on_phone test-backend-ops
530527

528+
check_mulmat_algotype
529+
531530
echo "adb shell cd ${REMOTE_PATH} \
532531
&& export LD_LIBRARY_PATH=${REMOTE_PATH} \
533-
&& ${REMOTE_PATH}/test-backend-ops test -o $opname "
532+
&& ${REMOTE_PATH}/test-backend-ops test -o $opname -a ${mulmat_algotype}"
534533

535534
echo "\n"
536535
adb shell "cd ${REMOTE_PATH} \
537536
&& export LD_LIBRARY_PATH=${REMOTE_PATH} \
538-
&& ${REMOTE_PATH}/test-backend-ops test -o $opname "
537+
&& ${REMOTE_PATH}/test-backend-ops test -o $opname -a ${mulmat_algotype}"
539538

540539
}
541540

@@ -544,9 +543,12 @@ function run_benchmark()
544543
{
545544
prepare_run_on_phone ggmlhexagon-benchmark
546545

546+
check_mulmat_algotype
547+
548+
echo "${REMOTE_PATH}/ggmlhexagon-benchmark -t ${opname} -b ${hexagon_backend} -m ${row} -n ${col} -a ${mulmat_algotype}"
547549
adb shell "cd ${REMOTE_PATH} \
548550
&& export LD_LIBRARY_PATH=${REMOTE_PATH} \
549-
&& ${REMOTE_PATH}/ggmlhexagon-benchmark -t $opname -b $qnnbackend -m $row -n $col"
551+
&& ${REMOTE_PATH}/ggmlhexagon-benchmark -t ${opname} -b ${hexagon_backend} -m ${row} -n ${col} -a ${mulmat_algotype}"
550552

551553
}
552554

@@ -648,6 +650,10 @@ function show_usage()
648650
echo " $0 run_threadsafety 0(QNN_CPU)/1(QNN_GPU)/2(QNN_NPU)/3(cdsp)/4(ggml)"
649651
echo " $0 run_benchmark ADD/MUL_MAT 0(QNN_CPU)/1(QNN_GPU)/2(QNN_NPU)/3(cdsp)/4(ggml)"
650652
echo " $0 run_benchmark ADD/MUL_MAT 0(QNN_CPU)/1(QNN_GPU)/2(QNN_NPU)/3(cdsp)/4(ggml) 256/512/1024/2048/4096 256/512/1024/2048/4096"
653+
#verify performance of mulmat on cDSP
654+
echo " $0 run_benchmark MUL_MAT 3(cdsp) mulmat_algotype(0,1,2,3,4,5,6,32,33) (verify performance of mulmat on cDSP)"
655+
#verify accuracy of mulmat on cDSP
656+
echo " $0 run_testop MUL_MAT mulmat_algotype(0,1,2,3,4,5,6,32,33) (verify accuracy of mulmat on cDSP)"
651657

652658
echo -e "\n\n\n"
653659
}
@@ -698,38 +704,75 @@ elif [ $# == 2 ]; then
698704

699705
if [ "$1" == "run_testop" ]; then
700706
opname=$2
707+
mulmat_algotype=0
701708
run_test-op
702709
exit 0
703710
elif [ "$1" == "run_llamacli" ]; then
704-
qnnbackend=$2
711+
hexagon_backend=$2
712+
check_hexagon_backend
705713
run_llamacli
706714
exit 0
707715
elif [ "$1" == "run_llamabench" ]; then
708-
qnnbackend=$2
716+
hexagon_backend=$2
717+
check_hexagon_backend
709718
run_llamabench
710719
exit 0
711720
elif [ "$1" == "run_threadsafety" ]; then
712-
qnnbackend=$2
721+
hexagon_backend=$2
722+
check_hexagon_backend
713723
run_threadsafety
714724
exit 0
715725
else
716726
show_usage
717727
exit 1
718728
fi
719729
elif [ $# == 3 ]; then
720-
opname=$2
721-
qnnbackend=$3
722-
row=4096
723-
col=4096
724-
run_benchmark
725-
exit 0
730+
if [ "$1" == "run_benchmark" ]; then
731+
opname=$2
732+
hexagon_backend=$3
733+
row=4096
734+
col=4096
735+
mulmat_algotype=0
736+
check_hexagon_backend
737+
run_benchmark
738+
exit 0
739+
elif [ "$1" == "run_testop" ]; then
740+
opname=$2
741+
mulmat_algotype=$3
742+
run_test-op
743+
exit 0
744+
else
745+
show_usage
746+
exit 1
747+
fi
748+
elif [ $# == 4 ]; then
749+
if [ "$1" == "run_benchmark" ]; then
750+
opname=MUL_MAT
751+
#cDSP
752+
hexagon_backend=3
753+
row=4096
754+
col=4096
755+
mulmat_algotype=$4
756+
run_benchmark
757+
exit 0
758+
else
759+
show_usage
760+
exit 1
761+
fi
726762
elif [ $# == 5 ]; then
727-
opname=$2
728-
qnnbackend=$3
729-
row=$4
730-
col=$5
731-
run_benchmark
732-
exit 0
763+
if [ "$1" == "run_benchmark" ]; then
764+
opname=$2
765+
hexagon_backend=$3
766+
row=$4
767+
col=$5
768+
mulmat_algotype=0
769+
check_hexagon_backend
770+
run_benchmark
771+
exit 0
772+
else
773+
show_usage
774+
exit 1
775+
fi
733776
else
734777
show_usage
735778
exit 1

scripts/ggml-hexagon-for-binary-lib.cfg

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
[general]
22
#version of ggml-hexagon.cpp on ARM-AP side
3-
version = "1.12"
3+
version = "1.13"
44
#version of ggml-dsp.c on cDSP side
5-
ggmldsp_version = "0.97"
5+
ggmldsp_version = "0.98"
66

77
#0: HEXAGON_BACKEND_QNNCPU
88
#1: HEXAGON_BACKEND_QNNGPU

0 commit comments

Comments
 (0)