129129class qnn_instance ;
130130struct ggml_backend_hexagon_context ;
131131
132- #if 0 //def NDEBUG
132+ #ifdef NDEBUG
133133#define GGMLHEXAGON_DEBUG 0
134134#else
135135#define GGMLHEXAGON_DEBUG 1
@@ -141,6 +141,7 @@ struct ggml_backend_hexagon_context;
141141#define GGMLHEXAGON_LOG_ERROR (...) ggmlhexagon_log_internal(GGML_LOG_LEVEL_ERROR, __FILE__, __FUNCTION__, __LINE__, __VA_ARGS__)
142142#define GGMLHEXAGON_LOG_WARN (...) ggmlhexagon_log_internal(GGML_LOG_LEVEL_WARN , __FILE__, __FUNCTION__, __LINE__, __VA_ARGS__)
143143#define GGMLHEXAGON_LOG_INFO (...) ggmlhexagon_log_internal(GGML_LOG_LEVEL_INFO , __FILE__, __FUNCTION__, __LINE__, __VA_ARGS__)
144+ #define GGMLHEXAGON_LOG_VERBOSE (...) ggmlhexagon_log_internal(GGML_LOG_LEVEL_CONT , __FILE__, __FUNCTION__, __LINE__, __VA_ARGS__)
144145
145146#if GGMLHEXAGON_DEBUG
146147#define GGMLHEXAGON_LOG_DEBUG (...) ggmlhexagon_log_internal(GGML_LOG_LEVEL_DEBUG, __FILE__, __FUNCTION__, __LINE__, __VA_ARGS__)
@@ -154,6 +155,10 @@ struct ggml_backend_hexagon_context;
154155#define SIZE_IN_MB (1 << 20 )
155156#define STATUS_CONTEXT 0x12345678
156157
158+ #if !defined (_WINDOWS)
159+ #pragma weak remote_system_request
160+ #endif
161+
157162#define CHECK_QNN_API (error, result ) \
158163 do { \
159164 error = (result); \
@@ -316,9 +321,11 @@ struct hexagon_appcfg_t {
316321 int hexagon_backend; // 0: HEXAGON_BACKEND_QNNCPU 1: HEXAGON_BACKEND_QNNGPU 2: HEXAGON_BACKEND_QNNNPU / HEXAGON_BACKEND_CDSP
317322 int enable_rpc_ion_mempool; // enable/disable rpc ion memory pool
318323 int enable_rpc_dma_mempool; // enable/disable rpc dma memory pool
324+ int enable_all_q_mulmat; // enable/disable offload all quantized type mulmat to cDSP
319325 const char * cfgfilename;
320326 const char * runtime_libpath;
321327 char ggml_hexagon_version[GGMLHEXAGON_TMPBUF_LEN];
328+ char ggml_dsp_version[GGMLHEXAGON_TMPBUF_LEN];
322329};
323330
324331static struct hexagon_appcfg_t g_hexagon_appcfg = {
@@ -335,6 +342,7 @@ static struct hexagon_appcfg_t g_hexagon_appcfg = {
335342 .hexagon_backend = HEXAGON_BACKEND_CDSP,
336343 .enable_rpc_ion_mempool = 0 ,
337344 .enable_rpc_dma_mempool = 0 ,
345+ .enable_all_q_mulmat = 0 ,
338346 .cfgfilename = " ggml-hexagon.cfg" ,
339347#if defined(__ANDROID__)
340348// Android command line program
@@ -344,7 +352,8 @@ static struct hexagon_appcfg_t g_hexagon_appcfg = {
344352#elif defined(_WIN32)
345353 .qnn_runtimelib_path = " C:\\ " ,
346354#endif
347- .ggml_hexagon_version = {" 1.00" },
355+ .ggml_hexagon_version = {" 1.01" },
356+ .ggml_dsp_version = {" 0.60" },
348357};
349358
350359// file:///opt/qcom/aistack/qairt/2.31.0.250130/docs/QNN/general/overview.html#tbl-supported-snapdragon-devices
@@ -857,6 +866,7 @@ static void ggmlhexagon_print_running_timestamp(ggml_backend_hexagon_context * c
857866 memset (timestamp, 0 , GGMLHEXAGON_TMPBUF_LEN);
858867
859868 GGMLHEXAGON_LOG_INFO (" ggml_hexagon_version: %s" , g_hexagon_appcfg.ggml_hexagon_version );
869+ GGMLHEXAGON_LOG_INFO (" ggml_dsp_version: %s" , g_hexagon_appcfg.ggml_dsp_version );
860870 GGMLHEXAGON_LOG_INFO (" hwaccel approach: %d(%s)" , g_hexagon_appcfg.hwaccel_approach ,
861871 ggmlhexagon_get_hwaccel_approach_name (g_hexagon_appcfg.hwaccel_approach ));
862872 GGMLHEXAGON_LOG_INFO (" hexagon_backend: %d(%s)" , g_hexagon_appcfg.hexagon_backend ,
@@ -891,7 +901,7 @@ class hexagon_perf {
891901 return ;
892902 _end_time = ggml_time_us ();
893903 _duration = (_end_time - _begin_time);
894- GGMLHEXAGON_LOG_DEBUG (" duration of %s : %lld microseconds\n " , _perf_name.c_str (), _duration);
904+ GGMLHEXAGON_LOG_VERBOSE (" duration of %s : %lld microseconds\n " , _perf_name.c_str (), _duration);
895905 }
896906
897907private:
@@ -1454,7 +1464,9 @@ static void ggmlhexagon_load_cfg() {
14541464 qnncfg_instance.get_stringvalue (" qnn" , " precision_mode" , precision_mode, " fp32" );
14551465 qnncfg_instance.get_intvalue (" cdsp" , " enable_rpc_ion_mempool" , g_hexagon_appcfg.enable_rpc_ion_mempool , 1 );
14561466 qnncfg_instance.get_intvalue (" cdsp" , " enable_rpc_dma_mempool" , g_hexagon_appcfg.enable_rpc_dma_mempool , 0 );
1467+ qnncfg_instance.get_intvalue (" cdsp" , " enable_all_q_mulmat" , g_hexagon_appcfg.enable_all_q_mulmat , 0 );
14571468 GGMLHEXAGON_LOG_INFO (" internal ggml_hexagon_version=%s" , g_hexagon_appcfg.ggml_hexagon_version );
1469+ GGMLHEXAGON_LOG_INFO (" internal ggml_dsp_version=%s" , g_hexagon_appcfg.ggml_dsp_version );
14581470 GGMLHEXAGON_LOG_INFO (" external ggml_hexagon_version=%s" , ggml_hexagon_version.c_str ());
14591471 GGMLHEXAGON_LOG_INFO (" hwaccel_approach=%d(%s)" , g_hexagon_appcfg.hwaccel_approach ,
14601472 ggmlhexagon_get_hwaccel_approach_name (g_hexagon_appcfg.hwaccel_approach ));
@@ -1504,6 +1516,13 @@ static bool ggmlhexagon_check_valid_appcfg() {
15041516 GGMLHEXAGON_LOG_INFO (" rpc dma mempool not supported" );
15051517 is_valid_appcfg = false ;
15061518 }
1519+
1520+ if (1 == g_hexagon_appcfg.enable_all_q_mulmat ) {
1521+ if (0 == g_hexagon_appcfg.enable_q_mulmat ) {
1522+ GGMLHEXAGON_LOG_INFO (" ensure set enable_q_mulmat to 1 firstly when set enable_all_q_mulmat to 1" );
1523+ is_valid_appcfg = false ;
1524+ }
1525+ }
15071526 }
15081527
15091528 if (!is_valid_appcfg) {
@@ -2743,6 +2762,10 @@ static void ggmlqnn_sdk_logcallback(const char * fmt,
27432762 vsnprintf (reinterpret_cast <char *const >(s_ggmlqnn_sdk_logbuf), GGMLHEXAGON_LOGBUF_LEN, fmt, argp);
27442763 GGMLHEXAGON_LOG_DEBUG (" %8.1fms [%-7s] %s\n " , ms, log_level_desc, s_ggmlqnn_sdk_logbuf);
27452764 }
2765+ #if !GGMLHEXAGON_DEBUG
2766+ GGML_UNUSED (log_level_desc);
2767+ GGML_UNUSED (ms);
2768+ #endif
27462769}
27472770
27482771int qnn_instance::qnn_init (const QnnSaver_Config_t ** saver_config) {
@@ -5075,6 +5098,7 @@ static bool ggmlhexagon_can_handle_op_through_cdsp(ggml_backend_dev_t dev, const
50755098
50765099 const struct ggml_tensor * src0 = op_tensor->src [0 ];
50775100 const struct ggml_tensor * src1 = op_tensor->src [1 ];
5101+ const int src0_rank = ggml_n_dims (src0);
50785102 switch (op_tensor->op ) {
50795103 case GGML_OP_ADD:
50805104 {
@@ -5086,7 +5110,15 @@ static bool ggmlhexagon_can_handle_op_through_cdsp(ggml_backend_dev_t dev, const
50865110 case GGML_OP_MUL_MAT:
50875111 {
50885112 ggmlhexagon_dump_op_info (op_tensor);
5113+ // FIXME:remove this filter in the future
5114+ if (2 != src0_rank) {
5115+ return false ;
5116+ }
50895117 if (1 == g_hexagon_appcfg.enable_q_mulmat ) {
5118+ if (1 == g_hexagon_appcfg.enable_all_q_mulmat ) {
5119+ return (src0->type == GGML_TYPE_F32 || ggml_is_quantized (src0->type )) && (src1->type == GGML_TYPE_F32);
5120+ }
5121+
50905122 return (src0->type == GGML_TYPE_F32
50915123 || src0->type == GGML_TYPE_Q4_0 || src0->type == GGML_TYPE_Q8_0
50925124 || src0->type == GGML_TYPE_Q6_K || src0->type == GGML_TYPE_Q8_K
@@ -5126,9 +5158,9 @@ static bool ggmlhexagon_can_handle_op_through_qnn(ggml_backend_dev_t dev, const
51265158
51275159 struct ggml_tensor * src0 = op_tensor->src [0 ];
51285160 struct ggml_tensor * src1 = op_tensor->src [1 ];
5129- const int64_t ne00 = src0->ne [0 ];;
5130- const int src0_rank = ggml_n_dims (src0);
5131- int src1_rank = 0 ;
5161+ const int64_t ne00 = src0->ne [0 ];;
5162+ const int src0_rank = ggml_n_dims (src0);
5163+ int src1_rank = 0 ;
51325164 if (nullptr != src1) {
51335165 src1_rank = ggml_n_dims (src1);
51345166 }
0 commit comments