1- #! /bin/bash
1+ #! /usr/ bin/env bash
22#
33# Copyright (c) 2024-2025 The KanTV authors
44#
55# 1. build llama.cpp + ggml-hexagon backend on Linux for Android phone equipped with Qualcomm Snapdragon mobile SoC
66# this script will setup local dev envs automatically
77#
8- # 2. verify prebuilt libggmldsp-skel.so on Android phone equipped with Qualcomm Snapdragon mobile SoC
8+ # 2. verify prebuilt libggmldsp-skel.so on Android phone equipped with Qualcomm Snapdragon mobile SoC(8Elite is recommended)
99#
1010# 3. compare performance of QNN-CPU,QNN-GPU,QNN-NPU,Hexagon-cDSP,ggml on Android phone equipped with Qualcomm Snapdragon mobile SoC
1111#
@@ -19,7 +19,7 @@ PROJECT_ROOT_PATH=${PROJECT_HOME_PATH}
1919HOST_CPU_COUNTS=` cat /proc/cpuinfo | grep " processor" | wc | awk ' {print int($1)}' `
2020
2121# running path on Android phone
22- REMOTE_PATH=/data/local/tmp/
22+ REMOTE_PATH=/data/local/tmp
2323
2424# Android NDK can be found at:
2525# https://developer.android.com/ndk/downloads
@@ -47,13 +47,13 @@ HEXAGON_SDK_PATH=/opt/qcom/Hexagon_SDK/6.2.0.1
4747# customized/tailored Hexagon SDK from the offcial Hexagon SDK for simplify workflow
4848HEXAGON_SDK_PATH=${PROJECT_ROOT_PATH} /prebuilts/Hexagon_SDK/6.2.0.1
4949
50- # running_params=" -mg 2 -ngl 99 -t 8 -fa 1 "
51- # running_params=" -mg 2 -ngl 99 -t 8 "
50+ # running_params=" -ngl 99 -t 8 -n 256 --no-warmup -fa 1 "
5251running_params=" -ngl 99 -t 8 -n 256 --no-warmup "
5352
5453# available prebuilt libs can be found at prebuilts/ggml-dsp
55- # GGMLDSP_RELEASE_DATE=20250531
54+ GGMLDSP_RELEASE_DATE=20250531
5655GGMLDSP_RELEASE_DATE=20250609
56+ GGMLDSP_RELEASE_DATE=20250625
5757
5858
5959# ####### part-2: contents in this part can be modified ########
@@ -94,6 +94,12 @@ GGUF_MODEL_NAME=/sdcard/qwen1_5-1_8b-chat-q4_0.gguf
9494# HTP_ARCH_VERSION_a=V79
9595
9696# modify the following two lines to adapt to test phone
97+ # for simplify workflow, only support v75 and v79, or only support 8Gen3 and 8Elite
98+ # v79/8Elite is strongly recommended because:
99+ # 1. sometimes the same dsp codes can running well as expected on Snapdragon 8Elite based phone
100+ # but can't works as expected on other Snapdragon based phone(e.g. 8Gen3).
101+ # 2. DSP clock rate on 8Gen3 is slower than DSP clock rate on 8Elite.
102+ # 3. 8Elite support for LP-DDR5x memory, up to 5300 MHz; 8Gen3 support for LP-DDR5x memory, up to 4800 MHz.
97103HTP_ARCH_VERSION=v79
98104HTP_ARCH_VERSION_a=V79
99105
@@ -331,32 +337,6 @@ function build_ggml_hexagon_debug()
331337}
332338
333339
334- # added on 05/31/2025, for purpose of non-tech factor
335- function prepare_ggmlhexagon()
336- {
337- adb push ./scripts/ggml-hexagon-for-binary-lib.cfg ${REMOTE_PATH} /ggml-hexagon.cfg
338- echo " adb push ${PROJECT_ROOT_PATH} /prebuilts/ggml-dsp/${GGMLDSP_RELEASE_DATE} /libggmlop-skel${HTP_ARCH_VERSION} .so ${REMOTE_PATH} /libggmlop-skel.so"
339- case " $HTP_ARCH_VERSION " in
340- v69)
341- adb push ${PROJECT_ROOT_PATH} /prebuilts/ggml-dsp/${GGMLDSP_RELEASE_DATE} /libggmlop-skel${HTP_ARCH_VERSION} .so ${REMOTE_PATH} /libggmlop-skel.so
342- ;;
343- v73)
344- adb push ${PROJECT_ROOT_PATH} /prebuilts/ggml-dsp/${GGMLDSP_RELEASE_DATE} /libggmlop-skel${HTP_ARCH_VERSION} .so ${REMOTE_PATH} /libggmlop-skel.so
345- ;;
346- v75)
347- adb push ${PROJECT_ROOT_PATH} /prebuilts/ggml-dsp/${GGMLDSP_RELEASE_DATE} /libggmlop-skel${HTP_ARCH_VERSION} .so ${REMOTE_PATH} /libggmlop-skel.so
348- ;;
349- v79)
350- adb push ${PROJECT_ROOT_PATH} /prebuilts/ggml-dsp/${GGMLDSP_RELEASE_DATE} /libggmlop-skel${HTP_ARCH_VERSION} .so ${REMOTE_PATH} /libggmlop-skel.so
351- ;;
352- * )
353- show_usage
354- exit 1
355- ;;
356- esac
357- }
358-
359-
360340function prepare_ggmldsp()
361341{
362342 adb push ./scripts/ggml-hexagon-for-binary-lib.cfg ${REMOTE_PATH} /ggml-hexagon.cfg
@@ -432,7 +412,7 @@ function check_prebuilt_models()
432412
433413 check_and_download_model qwen1_5-1_8b-chat-q4_0.gguf https://huggingface.co/Qwen/Qwen1.5-1.8B-Chat-GGUF/resolve/main/qwen1_5-1_8b-chat-q4_0.gguf
434414 # check_and_download_model MiniCPM4-0.5B-F32.gguf https://huggingface.co/zhouwg/kantv/resolve/main/MiniCPM4-0.5B-F32.gguf
435- check_and_download_model t5-277M-F32.gguf https://huggingface.co/zhouwg/kantv/resolve/main/t5-277M-F32.gguf
415+ # check_and_download_model t5-277M-F32.gguf https://huggingface.co/zhouwg/kantv/resolve/main/t5-277M-F32.gguf
436416
437417 set -e
438418}
@@ -458,10 +438,6 @@ function prepare_run_on_phone()
458438 # for troubleshooting issues in upstream llama.cpp project
459439 adb shell ls -l ${REMOTE_PATH} /libggml-* .so
460440
461- # for verify prebuilt binary library(built on 05/31/2025) on Hexagon cDSP
462- # not used since 06/2025 and would be removed in the future
463- # prepare_ggmlhexagon
464-
465441 # for verify prebuilt binary library(after 06/2025) on Hexagon cDSP
466442 # comment this line when build library on Hexagon cDSP from the reference/self-develop source codes in this project
467443 prepare_ggmldsp
@@ -476,10 +452,10 @@ function run_llamacli()
476452{
477453 prepare_run_on_phone llama-cli
478454
479- echo " ${REMOTE_PATH} /llama-cli ${running_params} -mg $qnnbackend -no-cnv -m ${TEST_MODEL_NAME} -p \" ${PROMPT_STRING} \" "
455+ echo " ${REMOTE_PATH} /llama-cli ${running_params} -mg ${hexagon_backend} -no-cnv -m ${TEST_MODEL_NAME} -p \" ${PROMPT_STRING} \" "
480456 adb shell " cd ${REMOTE_PATH} \
481457 && export LD_LIBRARY_PATH=${REMOTE_PATH} \
482- && ${REMOTE_PATH} /llama-cli ${running_params} -mg $qnnbackend -no-cnv -m ${TEST_MODEL_NAME} -p \" ${PROMPT_STRING} \" "
458+ && ${REMOTE_PATH} /llama-cli ${running_params} -mg ${hexagon_backend} -no-cnv -m ${TEST_MODEL_NAME} -p \" ${PROMPT_STRING} \" "
483459
484460}
485461
@@ -490,12 +466,12 @@ function run_llamabench()
490466
491467 echo " adb shell \" cd ${REMOTE_PATH} \
492468 && export LD_LIBRARY_PATH=${REMOTE_PATH} \
493- && ${REMOTE_PATH} /llama-bench ${running_params} -mg $qnnbackend -m ${GGUF_MODEL_NAME} \" "
494- echo " ${REMOTE_PATH} /llama-bench ${running_params} -mg $qnnbackend -m ${GGUF_MODEL_NAME} "
469+ && ${REMOTE_PATH} /llama-bench ${running_params} -mg ${hexagon_backend} -m ${GGUF_MODEL_NAME} \" "
470+ echo " ${REMOTE_PATH} /llama-bench ${running_params} -mg ${hexagon_backend} -m ${GGUF_MODEL_NAME} "
495471
496472 adb shell " cd ${REMOTE_PATH} \
497473 && export LD_LIBRARY_PATH=${REMOTE_PATH} \
498- && ${REMOTE_PATH} /llama-bench ${running_params} -mg $qnnbackend -m ${GGUF_MODEL_NAME} "
474+ && ${REMOTE_PATH} /llama-bench ${running_params} -mg ${hexagon_backend} -m ${GGUF_MODEL_NAME} "
499475
500476}
501477
@@ -504,10 +480,10 @@ function run_threadsafety()
504480{
505481 prepare_run_on_phone test-thread-safety
506482
507- echo " ${REMOTE_PATH} /test-thread-safety -np 2 -mg $qnnbackend -m ${GGUF_MODEL_NAME} -p \" hello,world\" -n 256 -ngl 99 "
483+ echo " ${REMOTE_PATH} /test-thread-safety -np 2 -mg ${hexagon_backend} -m ${GGUF_MODEL_NAME} -p \" hello,world\" -n 256 -ngl 99 "
508484 adb shell " cd ${REMOTE_PATH} \
509485 && export LD_LIBRARY_PATH=${REMOTE_PATH} \
510- && ${REMOTE_PATH} /test-thread-safety -np 1 -mg $qnnbackend -m ${GGUF_MODEL_NAME} -p \" hello,world\" -n 256 -ngl 99 "
486+ && ${REMOTE_PATH} /test-thread-safety -np 1 -mg ${hexagon_backend} -m ${GGUF_MODEL_NAME} -p \" hello,world\" -n 256 -ngl 99 "
511487
512488}
513489
@@ -524,18 +500,41 @@ function run_test-ops()
524500}
525501
526502
503+ function check_hexagon_backend
504+ {
505+ if [[ ${hexagon_backend} != 0 ]] && [[ ${hexagon_backend} != 1 ]] && [[ ${hexagon_backend} != 2 ]] && [[ ${hexagon_backend} != 3 ]] && [[ ${hexagon_backend} != 4 ]] ; then
506+ printf " invalid hexagon backend\n"
507+ printf " valid hexagon backend: 0(QNN_CPU), 1(QNN_GPU), 2(QNN_NPU), 3(cDSP), 4(ggml)\n"
508+ exit 1
509+ fi
510+ }
511+
512+
513+ function check_mulmat_algotype
514+ {
515+ printf " mulmat_algotype ${mulmat_algotype} \n"
516+ if [[ ${mulmat_algotype} != 0 ]] && [[ ${mulmat_algotype} != 1 ]] && [[ ${mulmat_algotype} != 2 ]] && [[ ${mulmat_algotype} != 3 ]] && [[ ${mulmat_algotype} != 4 ]] && [[ ${mulmat_algotype} != 5 ]] && [[ ${mulmat_algotype} != 6 ]] && [[ ${mulmat_algotype} != 32 ]] && [[ ${mulmat_algotype} != 33 ]]; then
517+ printf " invalid mulmat algotype\n"
518+ printf " valid mulmat algotype: 0, 1, 2, 3, 4, 5, 6, 32, 33 \n"
519+ exit 1
520+ fi
521+ }
522+
523+
527524function run_test-op()
528525{
529526 prepare_run_on_phone test-backend-ops
530527
528+ check_mulmat_algotype
529+
531530 echo " adb shell cd ${REMOTE_PATH} \
532531 && export LD_LIBRARY_PATH=${REMOTE_PATH} \
533- && ${REMOTE_PATH} /test-backend-ops test -o $opname "
532+ && ${REMOTE_PATH} /test-backend-ops test -o $opname -a ${mulmat_algotype} "
534533
535534 echo " \n"
536535 adb shell " cd ${REMOTE_PATH} \
537536 && export LD_LIBRARY_PATH=${REMOTE_PATH} \
538- && ${REMOTE_PATH} /test-backend-ops test -o $opname "
537+ && ${REMOTE_PATH} /test-backend-ops test -o $opname -a ${mulmat_algotype} "
539538
540539}
541540
@@ -544,9 +543,12 @@ function run_benchmark()
544543{
545544 prepare_run_on_phone ggmlhexagon-benchmark
546545
546+ check_mulmat_algotype
547+
548+ echo " ${REMOTE_PATH} /ggmlhexagon-benchmark -t ${opname} -b ${hexagon_backend} -m ${row} -n ${col} -a ${mulmat_algotype} "
547549 adb shell " cd ${REMOTE_PATH} \
548550 && export LD_LIBRARY_PATH=${REMOTE_PATH} \
549- && ${REMOTE_PATH} /ggmlhexagon-benchmark -t $opname -b $qnnbackend -m $row -n $col "
551+ && ${REMOTE_PATH} /ggmlhexagon-benchmark -t ${ opname} -b ${hexagon_backend} -m ${ row} -n ${ col} -a ${mulmat_algotype} "
550552
551553}
552554
@@ -648,6 +650,10 @@ function show_usage()
648650 echo " $0 run_threadsafety 0(QNN_CPU)/1(QNN_GPU)/2(QNN_NPU)/3(cdsp)/4(ggml)"
649651 echo " $0 run_benchmark ADD/MUL_MAT 0(QNN_CPU)/1(QNN_GPU)/2(QNN_NPU)/3(cdsp)/4(ggml)"
650652 echo " $0 run_benchmark ADD/MUL_MAT 0(QNN_CPU)/1(QNN_GPU)/2(QNN_NPU)/3(cdsp)/4(ggml) 256/512/1024/2048/4096 256/512/1024/2048/4096"
653+ # verify performance of mulmat on cDSP
654+ echo " $0 run_benchmark MUL_MAT 3(cdsp) mulmat_algotype(0,1,2,3,4,5,6,32,33) (verify performance of mulmat on cDSP)"
655+ # verify accuracy of mulmat on cDSP
656+ echo " $0 run_testop MUL_MAT mulmat_algotype(0,1,2,3,4,5,6,32,33) (verify accuracy of mulmat on cDSP)"
651657
652658 echo -e " \n\n\n"
653659}
@@ -698,38 +704,75 @@ elif [ $# == 2 ]; then
698704
699705 if [ " $1 " == " run_testop" ]; then
700706 opname=$2
707+ mulmat_algotype=0
701708 run_test-op
702709 exit 0
703710 elif [ " $1 " == " run_llamacli" ]; then
704- qnnbackend=$2
711+ hexagon_backend=$2
712+ check_hexagon_backend
705713 run_llamacli
706714 exit 0
707715 elif [ " $1 " == " run_llamabench" ]; then
708- qnnbackend=$2
716+ hexagon_backend=$2
717+ check_hexagon_backend
709718 run_llamabench
710719 exit 0
711720 elif [ " $1 " == " run_threadsafety" ]; then
712- qnnbackend=$2
721+ hexagon_backend=$2
722+ check_hexagon_backend
713723 run_threadsafety
714724 exit 0
715725 else
716726 show_usage
717727 exit 1
718728 fi
719729elif [ $# == 3 ]; then
720- opname=$2
721- qnnbackend=$3
722- row=4096
723- col=4096
724- run_benchmark
725- exit 0
730+ if [ " $1 " == " run_benchmark" ]; then
731+ opname=$2
732+ hexagon_backend=$3
733+ row=4096
734+ col=4096
735+ mulmat_algotype=0
736+ check_hexagon_backend
737+ run_benchmark
738+ exit 0
739+ elif [ " $1 " == " run_testop" ]; then
740+ opname=$2
741+ mulmat_algotype=$3
742+ run_test-op
743+ exit 0
744+ else
745+ show_usage
746+ exit 1
747+ fi
748+ elif [ $# == 4 ]; then
749+ if [ " $1 " == " run_benchmark" ]; then
750+ opname=MUL_MAT
751+ # cDSP
752+ hexagon_backend=3
753+ row=4096
754+ col=4096
755+ mulmat_algotype=$4
756+ run_benchmark
757+ exit 0
758+ else
759+ show_usage
760+ exit 1
761+ fi
726762elif [ $# == 5 ]; then
727- opname=$2
728- qnnbackend=$3
729- row=$4
730- col=$5
731- run_benchmark
732- exit 0
763+ if [ " $1 " == " run_benchmark" ]; then
764+ opname=$2
765+ hexagon_backend=$3
766+ row=$4
767+ col=$5
768+ mulmat_algotype=0
769+ check_hexagon_backend
770+ run_benchmark
771+ exit 0
772+ else
773+ show_usage
774+ exit 1
775+ fi
733776else
734777 show_usage
735778 exit 1
0 commit comments