refine scripts to avoid confusion

jeffzhou2000 · jeffzhou2000 · commit 0c5310029b67 · 2025-06-01T12:37:03.000+08:00
diff --git a/scripts/build-run-android.sh b/scripts/build-run-android.sh
@@ -260,7 +260,7 @@ function build_ggml_hexagon_debug()
 #added on 05/31/2025, for purpose of non-tech factor
 function prepare_ggmlhexagon()
 {
-    adb push ./scripts/ggml-hexagon.cfg ${REMOTE_PATH}/
+    adb push ./scripts/ggml-hexagon-for-binary-lib.cfg ${REMOTE_PATH}/ggml-hexagon.cfg
     echo "adb push ${PROJECT_ROOT_PATH}/prebuilts/ggml-dsp/libggmlop-skel${HTP_ARCH_VERSION}.so ${REMOTE_PATH}/libggmlop-skel.so"
 case "$HTP_ARCH_VERSION" in
     v69)
@@ -297,8 +297,12 @@ function prepare_run_on_phone()
     fi
     adb push ./out/android/bin/${program} ${REMOTE_PATH}/
 
+    #for verify binary library on Hexagon cDSP
     prepare_ggmlhexagon
 
+    #for build library on Hexagon cDSP from the reference source codes in this project
+    #adb push ./scripts/ggml-hexagon.cfg ${REMOTE_PATH}/ggml-hexagon.cfg
+
     adb shell chmod +x ${REMOTE_PATH}/${program}
 }
 
diff --git a/scripts/ggml-hexagon-for-binary-lib.cfg b/scripts/ggml-hexagon-for-binary-lib.cfg
@@ -0,0 +1,91 @@
+[general]
+#version of ggml-hexagon.cpp on ARM-AP side
+version = "1.08"
+#version of ggml-dsp.c on cDSP side
+ggmldsp_version = "0.96"
+
+#0: HEXAGON_BACKEND_QNNCPU
+#1: HEXAGON_BACKEND_QNNGPU
+#2: HEXAGON_BACKEND_QNNNPU
+#3: HEXAGON_BACKEND_CDSP
+#4: default ggml backend
+hexagon_backend  = 3
+# 0: hwaccel approach through HWACCEL_QNN: offload ggml op to QNN
+# 1: hwaccel approach through HWACCEL_QNN_SINGLEGRAPH: mapping entire ggml cgraph to a single QNN graph
+# 2: hwaccel approach through HWACCEL_CDSP:offload ggml op to cDSP directly
+hwaccel_approach = 2
+#
+#attention:
+#          a. HWACCEL_QNN_SINGLEGRAPH not supported at the moment;
+#          b. following combinations are valid:
+#             1: hwaccel_approach = 2 AND hexagon_backend = 3(HWACCEL_CDSP, this is the default setting)
+#             2: hwaccel_approach = 0 AND hexagon_backend = 2(QNNNPU)
+#             3: hwaccel_approach = 0 AND hexagon_backend = 1(QNNGPU)
+#             4: hwaccel_approach = 0 AND hexagon_backend = 0(QNNCPU)
+#             5: hwaccel_approach = 2 AND hexagon_backend = 4(fall back to the default ggml backend)
+#             6: hwaccel_approach = 0 AND hexagon_backend = 4(fall back to the default ggml backend)
+#
+#generally speaking,
+#          a. we only need to focus on b-1(HWACCEL_CDSP) and b-2(QNNNPU).
+#          b. we can compare Hexagon NPU performance between HWACCEL_CDSP/QNNNPU/the default ggml backend accordingly
+
+
+#enable/disable offload quantized type mulmat
+#quatized type mulmat works fine through QNNNPU at the moment
+#quatized type mulmat doesn't works fine through HWACCEL_CDSP at the moment
+#this item will make mulmat performance comprision easily
+enable_q_mulmat = 1
+
+
+# enable/disable print tensors info in op function
+print_tensors_info = 0
+# enable/disable dump op info in handle_op
+dump_op_info = 0
+
+
+# enable/disable perf of op function
+# this is the default setting
+enable_perf = 1
+
+
+# enablie/disable profiler feature to visually compare NPU performance between HWACCEL_CDSP and QNNNPU
+# this is default setting
+enable_profiler = 0
+#threshold duration of NPU performance profiler, per seconds
+profiler_duration = 5
+#threshold counst of NPU performance profiler
+profiler_counts = 200
+#attention:
+#          NPU performance might be slower when enable_profiler = 1 because of file I/O in this feature;
+#          ensure enable_perf = 1 when set enable_profiler = 1;
+
+
+#enable/disable pinned-memory feature
+enable_pinned_memory = 0
+
+#hwaccel approach through QNN(offload ggml op to QNN-NPU)
+[qnn]
+# enable/disable QNN SDK's internal log, this will very helpful for troubleshooting in HWACCEL_QNN approach
+print_qnn_internal_log = 0
+
+hvx_threads = 8
+vtcm_size_in_mb = 8
+enable_dlbc = 1
+precision_mode = "fp16"
+
+
+#hwaccel approach through cDSP(offload ggml op to Hexagon cDSP directly)
+[cdsp]
+#enable/disable rpc ion memory pool
+enable_rpc_ion_mempool = 1
+
+#enable/disable offload all quantized type mulmat to cDSP
+enable_all_q_mulmat = 1
+#attention:
+#ensure enable_q_mulmat = 1 when set enable_all_q_mulmat = 1
+
+#enable/disable multi-threading on cDSP side
+# 0    disable multi-threading on cDSP side
+# 1    disable multi-threading on cDSP side
+# 2-8  thread_counts on cDSP side
+thread_counts = 8
diff --git a/scripts/ggml-hexagon.cfg b/scripts/ggml-hexagon.cfg
@@ -2,7 +2,7 @@
 #version of ggml-hexagon.cpp on ARM-AP side
 version = "1.08"
 #version of ggml-dsp.c on cDSP side
-ggmldsp_version = "0.96"
+ggmldsp_version = "0.63"
 
 #0: HEXAGON_BACKEND_QNNCPU
 #1: HEXAGON_BACKEND_QNNGPU
@@ -34,7 +34,7 @@ hwaccel_approach = 2
 #quatized type mulmat works fine through QNNNPU at the moment
 #quatized type mulmat doesn't works fine through HWACCEL_CDSP at the moment
 #this item will make mulmat performance comprision easily
-enable_q_mulmat = 1
+enable_q_mulmat = 0
 
 
 # enable/disable print tensors info in op function
@@ -80,12 +80,12 @@ precision_mode = "fp16"
 enable_rpc_ion_mempool = 1
 
 #enable/disable offload all quantized type mulmat to cDSP
-enable_all_q_mulmat = 1
+enable_all_q_mulmat = 0
 #attention:
 #ensure enable_q_mulmat = 1 when set enable_all_q_mulmat = 1
 
 #enable/disable multi-threading on cDSP side
 # 0    disable multi-threading on cDSP side
 # 1    disable multi-threading on cDSP side
 # 2-8  thread_counts on cDSP side
-thread_counts = 8
+thread_counts = 1