Skip to content

Commit 07687ba

Browse files
committed
project: add prebuilt LLM model t5-277M-F32.gguf for compare inference peformance between QNN-CPU,QNN-GPU,QNN-NPU,cDSP,ggml
1 parent 7ce85d3 commit 07687ba

File tree

1 file changed

+33
-17
lines changed

1 file changed

+33
-17
lines changed

scripts/build-run-android.sh

Lines changed: 33 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -11,20 +11,15 @@
1111
#
1212
set -e
1313

14+
######## part-1: don't modify contents in this part ########
15+
1416
PWD=`pwd`
1517
PROJECT_HOME_PATH=`pwd`
1618
PROJECT_ROOT_PATH=${PROJECT_HOME_PATH}
1719

1820
#running path on Android phone
1921
REMOTE_PATH=/data/local/tmp/
2022

21-
#for llama-cli, 20.4 MiB in models/t5-very-small-random-F32.gguf
22-
TEST_MODEL_NAME=/sdcard/t5-very-small-random-F32.gguf
23-
24-
#for llama-bench, 1.12 GiB, will be downloadded automatically via this script from
25-
#https://huggingface.co/Qwen/Qwen1.5-1.8B-Chat-GGUF/blob/main/qwen1_5-1_8b-chat-q4_0.gguf
26-
GGUF_MODEL_NAME=/sdcard/qwen1_5-1_8b-chat-q4_0.gguf
27-
2823
#Android NDK can be found at:
2924
#https://developer.android.com/ndk/downloads
3025
ANDROID_PLATFORM=android-34
@@ -51,6 +46,28 @@ HEXAGON_SDK_PATH=/opt/qcom/Hexagon_SDK/6.2.0.1
5146
#customized/tailored Hexagon SDK from the offcial Hexagon SDK for simplify workflow
5247
HEXAGON_SDK_PATH=${PROJECT_ROOT_PATH}/prebuilts/Hexagon_SDK/6.2.0.1
5348

49+
#running_params=" -mg 2 -ngl 99 -t 8 -fa 1 "
50+
#running_params=" -mg 2 -ngl 99 -t 8 "
51+
running_params=" -ngl 99 -t 8 -n 256 --no-warmup "
52+
53+
#available prebuilt libs can be found at prebuilts/ggml-dsp
54+
#GGMLDSP_RELEASE_DATE=20250531
55+
GGMLDSP_RELEASE_DATE=20250609
56+
57+
58+
######## part-2: contents in this part can be modified ########
59+
60+
PROMPT_STRING="every day of your life, it is important to take the time to “smell the roses” — to appreciate the experiences that lead to happiness. This is part of being truly happy.Happiness is a state of mind. It starts with accepting where you are, knowing where you are going and planning to enjoy every moment along the way. You know how to be happy, and feel that you have enough time or money or love or whatever you need to achieve your goals. And just feeling that you have enough of everything means that you do indeed have enough.You have to choose to be happy, and focus upon being happy, in order to be happy. If you instead focus upon knowing that you will be happy if you achieve something, you will never be happy, as you have not learned to “smell the roses”. The irony is that when you are happy, you are inevitably more productive, and far more likely to achieve what everything-seekers are seeking.you will never be happy, as you have not learned to “smell the roses”. The irony is that when you are happy, you are inevitably more productive, and far more likely to achieve what everything-seekers are seeking."
61+
PROMPT_STRING="introduce the movie Once Upon a Time in America briefly.\n"
62+
63+
#for llama-cli, 20.4 MiB in models/t5-very-small-random-F32.gguf
64+
TEST_MODEL_NAME=/sdcard/t5-very-small-random-F32.gguf
65+
#for llama-cli, 1.1 GiB, will be downloaded automatically via this script
66+
TEST_MODEL_NAME=/sdcard/t5-277M-F32.gguf
67+
68+
#for llama-bench, 1.12 GiB, will be downloadded automatically via this script
69+
GGUF_MODEL_NAME=/sdcard/qwen1_5-1_8b-chat-q4_0.gguf
70+
5471
#available htp arch version:
5572
#v68 --- Snapdragon 888
5673
#v69 --- Snapdragon 8 Gen1
@@ -74,20 +91,12 @@ HEXAGON_SDK_PATH=${PROJECT_ROOT_PATH}/prebuilts/Hexagon_SDK/6.2.0.1
7491
#HTP_ARCH_VERSION=v79
7592
#HTP_ARCH_VERSION_a=V79
7693

77-
#default HTP_ARCH
7894
#modify the following two lines to adapt to test phone
7995
HTP_ARCH_VERSION=v79
8096
HTP_ARCH_VERSION_a=V79
8197

82-
#available prebuilt libs can be found at prebuilts/ggml-dsp
83-
#modify the following line to select the appropriate libggmldsp-skel.so
84-
#GGMLDSP_RELEASE_DATE=20250531
85-
GGMLDSP_RELEASE_DATE=20250609
8698

87-
#running_params=" -mg 2 -ngl 99 -t 8 -fa 1 "
88-
#running_params=" -mg 2 -ngl 99 -t 8 "
89-
90-
running_params=" -ngl 99 -t 8 -n 256"
99+
######## part-3: don't modify contents in this part ########
91100

92101
function dump_vars()
93102
{
@@ -372,6 +381,10 @@ function check_prebuilt_models()
372381
#https://huggingface.co/zhouwg/kantv/blob/main/MiniCPM4-0.5B-F32.gguf, size 1.74 GiB
373382
#original model: https://huggingface.co/openbmb/MiniCPM4-0.5B
374383

384+
#customized LLM models for compare inference peformance of QNN-CPU, QNN-GPU, QNN-NPU, cDSP, the default ggml backend
385+
#during development stage
386+
#https://huggingface.co/zhouwg/kantv/blob/main/t5-277M-F32.gguf, size 1.1 GiB
387+
375388
set +e
376389

377390
adb shell ls /sdcard/t5-very-small-random-F32.gguf
@@ -384,6 +397,7 @@ function check_prebuilt_models()
384397

385398
check_and_download_model qwen1_5-1_8b-chat-q4_0.gguf https://huggingface.co/Qwen/Qwen1.5-1.8B-Chat-GGUF/resolve/main/qwen1_5-1_8b-chat-q4_0.gguf
386399
#check_and_download_model MiniCPM4-0.5B-F32.gguf https://huggingface.co/zhouwg/kantv/resolve/main/MiniCPM4-0.5B-F32.gguf
400+
check_and_download_model t5-277M-F32.gguf https://huggingface.co/zhouwg/kantv/resolve/main/t5-277M-F32.gguf
387401

388402
set -e
389403
}
@@ -429,7 +443,7 @@ function run_llamacli()
429443
echo "${REMOTE_PATH}/llama-cli ${running_params} -mg $qnnbackend -no-cnv -m ${TEST_MODEL_NAME} -p \"introduce the movie Once Upon a Time in America briefly.\n\""
430444
adb shell "cd ${REMOTE_PATH} \
431445
&& export LD_LIBRARY_PATH=${REMOTE_PATH} \
432-
&& ${REMOTE_PATH}/llama-cli ${running_params} -mg $qnnbackend -no-cnv -m ${TEST_MODEL_NAME} -p \"introduce the movie Once Upon a Time in America briefly.\n\""
446+
&& ${REMOTE_PATH}/llama-cli ${running_params} -mg $qnnbackend -no-cnv -m ${TEST_MODEL_NAME} -p \"${PROMPT_STRING}\""
433447

434448
}
435449

@@ -586,6 +600,8 @@ function show_usage()
586600
}
587601

588602

603+
######## part-4: entry point ########
604+
589605
show_pwd
590606

591607
check_and_download_ndk

0 commit comments

Comments
 (0)