Skip to content

Commit 361b9bb

Browse files
author
zhouwg
committed
ggml-hexagon: uniform NDEBUG usage in ggml-hexagon.cpp and ggml-dsp.c
1 parent 3a38081 commit 361b9bb

File tree

5 files changed

+79
-13
lines changed

5 files changed

+79
-13
lines changed

ggml/src/ggml-hexagon/CMakeLists.txt

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,15 @@ message("QNN_SDK_PATH : ${QNN_SDK_PATH}")
1717
message("HEXAGON_SDK_PATH: ${HEXAGON_SDK_PATH}")
1818
message("HTP_ARCH_VERSION: ${HTP_ARCH_VERSION}")
1919

20+
if (CMAKE_BUILD_TYPE STREQUAL "Debug")
21+
set(DEBUG_FLAG "-Wall")
22+
message("Debug mode:${DEBUG_FLAG}")
23+
else()
24+
set(DEBUG_FLAG "-DNDEBUG -Wall")
25+
message("Release mode:${DEBUG_FLAG}")
26+
endif()
27+
28+
2029
#v68 --- Snapdragon 888
2130
#v69 --- Snapdragon 8 Gen1
2231
#v73 --- Snapdragon 8 Gen2
@@ -32,6 +41,7 @@ set(HEXAGON_CC "${HEXAGON_SDK_PATH}/tools/HEXAGON_Tools/8.8.06/Tool
3241
set(HEXAGON_CXX "${HEXAGON_SDK_PATH}/tools/HEXAGON_Tools/8.8.06/Tools/bin/hexagon-clang")
3342
set(HEXAGON_TARGET libggmlop_skel${HTP_ARCH_VERSION}.so)
3443
set(HEXAGON_KERNELS_PATH "${CMAKE_CURRENT_LIST_DIR}/kernels")
44+
set(HEXAGON_COMPUTE "compute${HTP_ARCH_VERSION}")
3545

3646
if(CMAKE_SYSTEM_NAME STREQUAL "Android")
3747
find_library(LOG_LIB log)
@@ -64,7 +74,7 @@ else()
6474
message(FATAL_ERROR "QNN now only available on Android and Windows(Windows on ARM)")
6575
endif()
6676

67-
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DGGML_USE_QNN")
77+
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DGGML_USE_HEXAGON ${DEBUG_FLAG}")
6878
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3")
6979

7080
file(GLOB QNN_SOURCES "${CMAKE_CURRENT_LIST_DIR}/*.cpp" "${CMAKE_CURRENT_LIST_DIR}/kernels/ggmlop_ap_skel.c")
@@ -83,8 +93,8 @@ function(ggml_hexagon_build_kernel KNAME)
8393
TARGET ${PROJECT_NAME}
8494
POST_BUILD
8595
COMMAND echo "current working path:`pwd`\n"
86-
COMMAND ${HEXAGON_CC} -o ${HEXAGON_KERNELS_PATH}/ggml-dsp.o -c ${HEXAGON_KERNELS_PATH}/ggml-dsp.c -m${HTP_ARCH_VERSION} -c -Ofast -Wall -Wstrict-prototypes -fno-zero-initialized-in-bss -fdata-sections -fpic -D__V_DYNAMIC__ -mhvx -mhvx-length=128B -I${HEXAGON_SDK_PATH}/incs -I${HEXAGON_SDK_PATH}/libs/qprintf/inc -I${HEXAGON_SDK_PATH}/incs/stddef -I${HEXAGON_SDK_PATH}/ipc/fastrpc/incs -I${HEXAGON_SDK_PATH}/ipc/fastrpc/rpcmem/inc -I${HEXAGON_SDK_PATH}/utils/examples -I${HEXAGON_SDK_PATH}/ipc/fastrpc/rtld/ship/inc -I${HEXAGON_SDK_PATH}/libs/atomic/inc -I${HEXAGON_SDK_PATH}/utils/sim_utils/inc
87-
COMMAND ${HEXAGON_CC} -o ${HEXAGON_KERNELS_PATH}/ggmlop_cdsp_skel.o -c ${HEXAGON_KERNELS_PATH}/ggmlop_cdsp_skel.c -m${HTP_ARCH_VERSION} -c -Ofast -Wall -Wstrict-prototypes -fno-zero-initialized-in-bss -fdata-sections -fpic -D__V_DYNAMIC__ -mhvx -mhvx-length=128B -I${HEXAGON_SDK_PATH}/incs -I${HEXAGON_SDK_PATH}/libs/qprintf/inc -I${HEXAGON_SDK_PATH}/incs/stddef -I${HEXAGON_SDK_PATH}/ipc/fastrpc/incs -I${HEXAGON_SDK_PATH}/ipc/fastrpc/rpcmem/inc -I${HEXAGON_SDK_PATH}/utils/examples -I${HEXAGON_SDK_PATH}/ipc/fastrpc/rtld/ship/inc -I${HEXAGON_SDK_PATH}/libs/atomic/inc -I${HEXAGON_SDK_PATH}/utils/sim_utils/inc
96+
COMMAND ${HEXAGON_CC} -o ${HEXAGON_KERNELS_PATH}/ggml-dsp.o -c ${HEXAGON_KERNELS_PATH}/ggml-dsp.c -m${HTP_ARCH_VERSION} -c -Ofast -Wall -Wstrict-prototypes -fno-zero-initialized-in-bss -fdata-sections -fpic ${DEBUG_FLAG} -D__V_DYNAMIC__ -mhvx -mhvx-length=128B -fno-finite-math-only -I${HEXAGON_SDK_PATH}/incs -I${HEXAGON_SDK_PATH}/libs/qprintf/inc -I${HEXAGON_SDK_PATH}/incs/stddef -I${HEXAGON_SDK_PATH}/ipc/fastrpc/incs -I${HEXAGON_SDK_PATH}/ipc/fastrpc/rpcmem/inc -I${HEXAGON_SDK_PATH}/utils/examples -I${HEXAGON_SDK_PATH}/ipc/fastrpc/rtld/ship/inc -I${HEXAGON_SDK_PATH}/libs/atomic/inc -I${HEXAGON_SDK_PATH}/utils/sim_utils/inc -I${HEXAGON_SDK_PATH}/rtos/qurt/${HEXAGON_COMPUTE}/include/posix -I${HEXAGON_SDK_PATH}/rtos/qurt/${HEXAGON_COMPUTE}/include/qurt/
97+
COMMAND ${HEXAGON_CC} -o ${HEXAGON_KERNELS_PATH}/ggmlop_cdsp_skel.o -c ${HEXAGON_KERNELS_PATH}/ggmlop_cdsp_skel.c -m${HTP_ARCH_VERSION} -c -Ofast -Wall -Wstrict-prototypes -fno-zero-initialized-in-bss -fdata-sections -fpic -D__V_DYNAMIC__ -mhvx -mhvx-length=128B -fno-finite-math-only -I${HEXAGON_SDK_PATH}/incs -I${HEXAGON_SDK_PATH}/libs/qprintf/inc -I${HEXAGON_SDK_PATH}/incs/stddef -I${HEXAGON_SDK_PATH}/ipc/fastrpc/incs -I${HEXAGON_SDK_PATH}/ipc/fastrpc/rpcmem/inc -I${HEXAGON_SDK_PATH}/utils/examples -I${HEXAGON_SDK_PATH}/ipc/fastrpc/rtld/ship/inc -I${HEXAGON_SDK_PATH}/libs/atomic/inc -I${HEXAGON_SDK_PATH}/utils/sim_utils/inc
8898
COMMAND ${HEXAGON_CC} -m${HTP_ARCH_VERSION} -Wl,--defsym=ISDB_TRUSTED_FLAG=2 -Wl,--defsym=ISDB_SECURE_FLAG=2 -Wl,--no-threads -fpic -shared -Wl,-Bsymbolic -Wl,--wrap=malloc -Wl,--wrap=calloc -Wl,--wrap=free -Wl,--wrap=realloc -Wl,--wrap=memalign -lc -Wl,-soname=${HEXAGON_TARGET} -o ${HEXAGON_KERNELS_PATH}/${HEXAGON_TARGET} -Wl,--start-group ${HEXAGON_KERNELS_PATH}/ggmlop_cdsp_skel.o ${HEXAGON_KERNELS_PATH}/ggml-dsp.o -Wl,--end-group
8999
COMMAND ls -l ${HEXAGON_KERNELS_PATH}/${HEXAGON_TARGET}
90100
COMMENT "build hexagon-kernel"

ggml/src/ggml-hexagon/ggml-hexagon.cpp

Lines changed: 33 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@
129129
class qnn_instance;
130130
struct ggml_backend_hexagon_context;
131131

132-
#if 0//def NDEBUG
132+
#ifdef NDEBUG
133133
#define GGMLHEXAGON_DEBUG 0
134134
#else
135135
#define GGMLHEXAGON_DEBUG 1
@@ -141,6 +141,7 @@ struct ggml_backend_hexagon_context;
141141
#define GGMLHEXAGON_LOG_ERROR(...) ggmlhexagon_log_internal(GGML_LOG_LEVEL_ERROR, __FILE__, __FUNCTION__, __LINE__, __VA_ARGS__)
142142
#define GGMLHEXAGON_LOG_WARN(...) ggmlhexagon_log_internal(GGML_LOG_LEVEL_WARN , __FILE__, __FUNCTION__, __LINE__, __VA_ARGS__)
143143
#define GGMLHEXAGON_LOG_INFO(...) ggmlhexagon_log_internal(GGML_LOG_LEVEL_INFO , __FILE__, __FUNCTION__, __LINE__, __VA_ARGS__)
144+
#define GGMLHEXAGON_LOG_VERBOSE(...) ggmlhexagon_log_internal(GGML_LOG_LEVEL_CONT , __FILE__, __FUNCTION__, __LINE__, __VA_ARGS__)
144145

145146
#if GGMLHEXAGON_DEBUG
146147
#define GGMLHEXAGON_LOG_DEBUG(...) ggmlhexagon_log_internal(GGML_LOG_LEVEL_DEBUG, __FILE__, __FUNCTION__, __LINE__, __VA_ARGS__)
@@ -154,6 +155,10 @@ struct ggml_backend_hexagon_context;
154155
#define SIZE_IN_MB (1 << 20)
155156
#define STATUS_CONTEXT 0x12345678
156157

158+
#if !defined (_WINDOWS)
159+
#pragma weak remote_system_request
160+
#endif
161+
157162
#define CHECK_QNN_API(error, result) \
158163
do { \
159164
error = (result); \
@@ -316,6 +321,7 @@ struct hexagon_appcfg_t {
316321
int hexagon_backend; // 0: HEXAGON_BACKEND_QNNCPU 1: HEXAGON_BACKEND_QNNGPU 2: HEXAGON_BACKEND_QNNNPU / HEXAGON_BACKEND_CDSP
317322
int enable_rpc_ion_mempool; // enable/disable rpc ion memory pool
318323
int enable_rpc_dma_mempool; // enable/disable rpc dma memory pool
324+
int enable_all_q_mulmat; // enable/disable offload all quantized type mulmat to cDSP
319325
const char * cfgfilename;
320326
const char * runtime_libpath;
321327
char ggml_hexagon_version[GGMLHEXAGON_TMPBUF_LEN];
@@ -335,6 +341,7 @@ static struct hexagon_appcfg_t g_hexagon_appcfg = {
335341
.hexagon_backend = HEXAGON_BACKEND_CDSP,
336342
.enable_rpc_ion_mempool = 0,
337343
.enable_rpc_dma_mempool = 0,
344+
.enable_all_q_mulmat = 0,
338345
.cfgfilename = "ggml-hexagon.cfg",
339346
#if defined(__ANDROID__)
340347
//Android command line program
@@ -344,7 +351,7 @@ static struct hexagon_appcfg_t g_hexagon_appcfg = {
344351
#elif defined(_WIN32)
345352
.qnn_runtimelib_path = "C:\\",
346353
#endif
347-
.ggml_hexagon_version = {"1.00"},
354+
.ggml_hexagon_version = {"1.01"},
348355
};
349356

350357
//file:///opt/qcom/aistack/qairt/2.31.0.250130/docs/QNN/general/overview.html#tbl-supported-snapdragon-devices
@@ -891,7 +898,7 @@ class hexagon_perf {
891898
return;
892899
_end_time = ggml_time_us();
893900
_duration = (_end_time - _begin_time);
894-
GGMLHEXAGON_LOG_DEBUG("duration of %s : %lld microseconds\n", _perf_name.c_str(), _duration);
901+
GGMLHEXAGON_LOG_VERBOSE("duration of %s : %lld microseconds\n", _perf_name.c_str(), _duration);
895902
}
896903

897904
private:
@@ -1454,6 +1461,7 @@ static void ggmlhexagon_load_cfg() {
14541461
qnncfg_instance.get_stringvalue("qnn", "precision_mode", precision_mode, "fp32");
14551462
qnncfg_instance.get_intvalue("cdsp", "enable_rpc_ion_mempool", g_hexagon_appcfg.enable_rpc_ion_mempool, 1);
14561463
qnncfg_instance.get_intvalue("cdsp", "enable_rpc_dma_mempool", g_hexagon_appcfg.enable_rpc_dma_mempool, 0);
1464+
qnncfg_instance.get_intvalue("cdsp", "enable_all_q_mulmat", g_hexagon_appcfg.enable_all_q_mulmat, 0);
14571465
GGMLHEXAGON_LOG_INFO("internal ggml_hexagon_version=%s", g_hexagon_appcfg.ggml_hexagon_version);
14581466
GGMLHEXAGON_LOG_INFO("external ggml_hexagon_version=%s", ggml_hexagon_version.c_str());
14591467
GGMLHEXAGON_LOG_INFO("hwaccel_approach=%d(%s)", g_hexagon_appcfg.hwaccel_approach,
@@ -1504,6 +1512,13 @@ static bool ggmlhexagon_check_valid_appcfg() {
15041512
GGMLHEXAGON_LOG_INFO("rpc dma mempool not supported");
15051513
is_valid_appcfg = false;
15061514
}
1515+
1516+
if (1 == g_hexagon_appcfg.enable_all_q_mulmat) {
1517+
if (0 == g_hexagon_appcfg.enable_q_mulmat) {
1518+
GGMLHEXAGON_LOG_INFO("ensure set enable_q_mulmat to 1 firstly when set enable_all_q_mulmat to 1");
1519+
is_valid_appcfg = false;
1520+
}
1521+
}
15071522
}
15081523

15091524
if (!is_valid_appcfg) {
@@ -2743,6 +2758,10 @@ static void ggmlqnn_sdk_logcallback(const char * fmt,
27432758
vsnprintf(reinterpret_cast<char *const>(s_ggmlqnn_sdk_logbuf), GGMLHEXAGON_LOGBUF_LEN, fmt, argp);
27442759
GGMLHEXAGON_LOG_DEBUG("%8.1fms [%-7s] %s\n", ms, log_level_desc, s_ggmlqnn_sdk_logbuf);
27452760
}
2761+
#if !GGMLHEXAGON_DEBUG
2762+
GGML_UNUSED(log_level_desc);
2763+
GGML_UNUSED(ms);
2764+
#endif
27462765
}
27472766

27482767
int qnn_instance::qnn_init(const QnnSaver_Config_t ** saver_config) {
@@ -5075,6 +5094,7 @@ static bool ggmlhexagon_can_handle_op_through_cdsp(ggml_backend_dev_t dev, const
50755094

50765095
const struct ggml_tensor * src0 = op_tensor->src[0];
50775096
const struct ggml_tensor * src1 = op_tensor->src[1];
5097+
const int src0_rank = ggml_n_dims(src0);
50785098
switch (op_tensor->op) {
50795099
case GGML_OP_ADD:
50805100
{
@@ -5086,7 +5106,15 @@ static bool ggmlhexagon_can_handle_op_through_cdsp(ggml_backend_dev_t dev, const
50865106
case GGML_OP_MUL_MAT:
50875107
{
50885108
ggmlhexagon_dump_op_info(op_tensor);
5109+
//FIXME:remove this filter in the future
5110+
if (2 != src0_rank) {
5111+
return false;
5112+
}
50895113
if (1 == g_hexagon_appcfg.enable_q_mulmat) {
5114+
if (1 == g_hexagon_appcfg.enable_all_q_mulmat) {
5115+
return (src0->type == GGML_TYPE_F32 || ggml_is_quantized(src0->type)) && (src1->type == GGML_TYPE_F32);
5116+
}
5117+
50905118
return (src0->type == GGML_TYPE_F32
50915119
|| src0->type == GGML_TYPE_Q4_0 || src0->type == GGML_TYPE_Q8_0
50925120
|| src0->type == GGML_TYPE_Q6_K || src0->type == GGML_TYPE_Q8_K
@@ -5126,9 +5154,9 @@ static bool ggmlhexagon_can_handle_op_through_qnn(ggml_backend_dev_t dev, const
51265154

51275155
struct ggml_tensor * src0 = op_tensor->src[0];
51285156
struct ggml_tensor * src1 = op_tensor->src[1];
5129-
const int64_t ne00 = src0->ne[0];;
5157+
const int64_t ne00 = src0->ne[0];;
51305158
const int src0_rank = ggml_n_dims(src0);
5131-
int src1_rank = 0;
5159+
int src1_rank = 0;
51325160
if (nullptr != src1) {
51335161
src1_rank = ggml_n_dims(src1);
51345162
}

ggml/src/ggml-hexagon/kernels/ggml-dsp.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ extern "C" {
5151
#define GGML_FP16_TO_FP32(x) ggml_lookup_fp16_to_fp32(x)
5252

5353
//NPU performance will be slower when enable GGMLHEXAGON_DEBUG
54-
#if 1//def NDEBUG
54+
#ifdef NDEBUG
5555
#define GGMLHEXAGON_DEBUG 0
5656
#else
5757
#define GGMLHEXAGON_DEBUG 1

scripts/build-run-android.sh

Lines changed: 28 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ QNN_SDK_INSTALL_PATH=/opt/qcom/aistack/qairt/
1818
QNN_SDK_VERSION=2.32.0.250228
1919
QNN_SDK_PATH=${QNN_SDK_INSTALL_PATH}/${QNN_SDK_VERSION}
2020

21-
#5.5.3.0 should be also ok because someone told me can't find 6.2.0.1 on 04/05/2025
21+
#5.5.3.0 should be also ok
2222
HEXAGON_SDK_PATH=/opt/qcom/Hexagon_SDK/6.2.0.1
2323
#available htp arch version:
2424
#v68 --- Snapdragon 888
@@ -132,6 +132,16 @@ function build_arm64
132132
cd -
133133
}
134134

135+
function build_arm64_debug
136+
{
137+
cmake -H. -B./out/android -DCMAKE_BUILD_TYPE=Debug -DGGML_OPENMP=OFF -DCMAKE_TOOLCHAIN_FILE=${ANDROID_NDK}/build/cmake/android.toolchain.cmake -DANDROID_ABI=arm64-v8a -DANDROID_PLATFORM=latest -DCMAKE_C_FLAGS=-march=armv8.7-a -DGGML_HEXAGON=ON -DQNN_SDK_PATH=${QNN_SDK_PATH} -DHEXAGON_SDK_PATH=${HEXAGON_SDK_PATH} -DHTP_ARCH_VERSION=${HTP_ARCH_VERSION}
138+
cd out/android
139+
make -j16
140+
show_pwd
141+
142+
cd -
143+
}
144+
135145

136146
function remove_temp_dir()
137147
{
@@ -177,7 +187,7 @@ function update_qnn_cfg()
177187
}
178188

179189

180-
function build_ggml_qnn()
190+
function build_ggml_hexagon()
181191
{
182192
show_pwd
183193
check_and_download_ndk
@@ -188,6 +198,17 @@ function build_ggml_qnn()
188198
build_arm64
189199
}
190200

201+
function build_ggml_hexagon_debug()
202+
{
203+
show_pwd
204+
check_and_download_ndk
205+
check_and_download_qnn_sdk
206+
check_hexagon_sdk
207+
dump_vars
208+
remove_temp_dir
209+
build_arm64_debug
210+
}
211+
191212

192213
function prepare_run_on_phone()
193214
{
@@ -341,6 +362,7 @@ function show_usage()
341362
echo " $0 help"
342363
echo " $0 print_oplist"
343364
echo " $0 build"
365+
echo " $0 build_debug (enable debug log for developers on ARM-AP side and cDSP side)"
344366
echo " $0 updateqnnlib"
345367
echo " $0 run_testops"
346368
echo " $0 run_testop [ADD/MUL_MAT]"
@@ -371,7 +393,10 @@ elif [ $# == 1 ]; then
371393
print_oplist
372394
exit 1
373395
elif [ "$1" == "build" ]; then
374-
build_ggml_qnn
396+
build_ggml_hexagon
397+
exit 0
398+
elif [ "$1" == "build_debug" ]; then
399+
build_ggml_hexagon_debug
375400
exit 0
376401
elif [ "$1" == "run_testops" ]; then
377402
run_test-ops

scripts/ggml-hexagon.cfg

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[general]
22
#version of ggml-hexagon.cpp on ARM-AP side
3-
version = "1.00"
3+
version = "1.01"
44
#version of ggml-dsp.c on cDSP side
55
ggmldsp_version = "0.60"
66

@@ -44,3 +44,6 @@ precision_mode = "fp16"
4444
enable_rpc_ion_mempool = 0
4545
#enable/disable rpc dma memory pool
4646
enable_rpc_dma_mempool = 0
47+
#enable/disable offload all quantized type mulmat to cDSP
48+
#ensure enable_q_mulmat already be setting to 1
49+
enable_all_q_mulmat = 0

0 commit comments

Comments
 (0)