Skip to content

Commit 0c53100

Browse files
committed
refine scripts to avoid confusion
1 parent 6962ac6 commit 0c53100

File tree

3 files changed

+100
-5
lines changed

3 files changed

+100
-5
lines changed

scripts/build-run-android.sh

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -260,7 +260,7 @@ function build_ggml_hexagon_debug()
260260
#added on 05/31/2025, for purpose of non-tech factor
261261
function prepare_ggmlhexagon()
262262
{
263-
adb push ./scripts/ggml-hexagon.cfg ${REMOTE_PATH}/
263+
adb push ./scripts/ggml-hexagon-for-binary-lib.cfg ${REMOTE_PATH}/ggml-hexagon.cfg
264264
echo "adb push ${PROJECT_ROOT_PATH}/prebuilts/ggml-dsp/libggmlop-skel${HTP_ARCH_VERSION}.so ${REMOTE_PATH}/libggmlop-skel.so"
265265
case "$HTP_ARCH_VERSION" in
266266
v69)
@@ -297,8 +297,12 @@ function prepare_run_on_phone()
297297
fi
298298
adb push ./out/android/bin/${program} ${REMOTE_PATH}/
299299

300+
#for verify binary library on Hexagon cDSP
300301
prepare_ggmlhexagon
301302

303+
#for build library on Hexagon cDSP from the reference source codes in this project
304+
#adb push ./scripts/ggml-hexagon.cfg ${REMOTE_PATH}/ggml-hexagon.cfg
305+
302306
adb shell chmod +x ${REMOTE_PATH}/${program}
303307
}
304308

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
[general]
2+
#version of ggml-hexagon.cpp on ARM-AP side
3+
version = "1.08"
4+
#version of ggml-dsp.c on cDSP side
5+
ggmldsp_version = "0.96"
6+
7+
#0: HEXAGON_BACKEND_QNNCPU
8+
#1: HEXAGON_BACKEND_QNNGPU
9+
#2: HEXAGON_BACKEND_QNNNPU
10+
#3: HEXAGON_BACKEND_CDSP
11+
#4: default ggml backend
12+
hexagon_backend = 3
13+
# 0: hwaccel approach through HWACCEL_QNN: offload ggml op to QNN
14+
# 1: hwaccel approach through HWACCEL_QNN_SINGLEGRAPH: mapping entire ggml cgraph to a single QNN graph
15+
# 2: hwaccel approach through HWACCEL_CDSP:offload ggml op to cDSP directly
16+
hwaccel_approach = 2
17+
#
18+
#attention:
19+
# a. HWACCEL_QNN_SINGLEGRAPH not supported at the moment;
20+
# b. following combinations are valid:
21+
# 1: hwaccel_approach = 2 AND hexagon_backend = 3(HWACCEL_CDSP, this is the default setting)
22+
# 2: hwaccel_approach = 0 AND hexagon_backend = 2(QNNNPU)
23+
# 3: hwaccel_approach = 0 AND hexagon_backend = 1(QNNGPU)
24+
# 4: hwaccel_approach = 0 AND hexagon_backend = 0(QNNCPU)
25+
# 5: hwaccel_approach = 2 AND hexagon_backend = 4(fall back to the default ggml backend)
26+
# 6: hwaccel_approach = 0 AND hexagon_backend = 4(fall back to the default ggml backend)
27+
#
28+
#generally speaking,
29+
# a. we only need to focus on b-1(HWACCEL_CDSP) and b-2(QNNNPU).
30+
# b. we can compare Hexagon NPU performance between HWACCEL_CDSP/QNNNPU/the default ggml backend accordingly
31+
32+
33+
#enable/disable offload quantized type mulmat
34+
#quatized type mulmat works fine through QNNNPU at the moment
35+
#quatized type mulmat doesn't works fine through HWACCEL_CDSP at the moment
36+
#this item will make mulmat performance comprision easily
37+
enable_q_mulmat = 1
38+
39+
40+
# enable/disable print tensors info in op function
41+
print_tensors_info = 0
42+
# enable/disable dump op info in handle_op
43+
dump_op_info = 0
44+
45+
46+
# enable/disable perf of op function
47+
# this is the default setting
48+
enable_perf = 1
49+
50+
51+
# enablie/disable profiler feature to visually compare NPU performance between HWACCEL_CDSP and QNNNPU
52+
# this is default setting
53+
enable_profiler = 0
54+
#threshold duration of NPU performance profiler, per seconds
55+
profiler_duration = 5
56+
#threshold counst of NPU performance profiler
57+
profiler_counts = 200
58+
#attention:
59+
# NPU performance might be slower when enable_profiler = 1 because of file I/O in this feature;
60+
# ensure enable_perf = 1 when set enable_profiler = 1;
61+
62+
63+
#enable/disable pinned-memory feature
64+
enable_pinned_memory = 0
65+
66+
#hwaccel approach through QNN(offload ggml op to QNN-NPU)
67+
[qnn]
68+
# enable/disable QNN SDK's internal log, this will very helpful for troubleshooting in HWACCEL_QNN approach
69+
print_qnn_internal_log = 0
70+
71+
hvx_threads = 8
72+
vtcm_size_in_mb = 8
73+
enable_dlbc = 1
74+
precision_mode = "fp16"
75+
76+
77+
#hwaccel approach through cDSP(offload ggml op to Hexagon cDSP directly)
78+
[cdsp]
79+
#enable/disable rpc ion memory pool
80+
enable_rpc_ion_mempool = 1
81+
82+
#enable/disable offload all quantized type mulmat to cDSP
83+
enable_all_q_mulmat = 1
84+
#attention:
85+
#ensure enable_q_mulmat = 1 when set enable_all_q_mulmat = 1
86+
87+
#enable/disable multi-threading on cDSP side
88+
# 0 disable multi-threading on cDSP side
89+
# 1 disable multi-threading on cDSP side
90+
# 2-8 thread_counts on cDSP side
91+
thread_counts = 8

scripts/ggml-hexagon.cfg

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
#version of ggml-hexagon.cpp on ARM-AP side
33
version = "1.08"
44
#version of ggml-dsp.c on cDSP side
5-
ggmldsp_version = "0.96"
5+
ggmldsp_version = "0.63"
66

77
#0: HEXAGON_BACKEND_QNNCPU
88
#1: HEXAGON_BACKEND_QNNGPU
@@ -34,7 +34,7 @@ hwaccel_approach = 2
3434
#quatized type mulmat works fine through QNNNPU at the moment
3535
#quatized type mulmat doesn't works fine through HWACCEL_CDSP at the moment
3636
#this item will make mulmat performance comprision easily
37-
enable_q_mulmat = 1
37+
enable_q_mulmat = 0
3838

3939

4040
# enable/disable print tensors info in op function
@@ -80,12 +80,12 @@ precision_mode = "fp16"
8080
enable_rpc_ion_mempool = 1
8181

8282
#enable/disable offload all quantized type mulmat to cDSP
83-
enable_all_q_mulmat = 1
83+
enable_all_q_mulmat = 0
8484
#attention:
8585
#ensure enable_q_mulmat = 1 when set enable_all_q_mulmat = 1
8686

8787
#enable/disable multi-threading on cDSP side
8888
# 0 disable multi-threading on cDSP side
8989
# 1 disable multi-threading on cDSP side
9090
# 2-8 thread_counts on cDSP side
91-
thread_counts = 8
91+
thread_counts = 1

0 commit comments

Comments
 (0)