129129class   qnn_instance ;
130130struct  ggml_backend_hexagon_context ;
131131
132- #if   0 //def  NDEBUG
132+ #ifdef  NDEBUG
133133#define  GGMLHEXAGON_DEBUG                                0 
134134#else 
135135#define  GGMLHEXAGON_DEBUG                                1 
@@ -141,6 +141,7 @@ struct ggml_backend_hexagon_context;
141141#define  GGMLHEXAGON_LOG_ERROR (...)                      ggmlhexagon_log_internal(GGML_LOG_LEVEL_ERROR, __FILE__, __FUNCTION__, __LINE__, __VA_ARGS__)
142142#define  GGMLHEXAGON_LOG_WARN (...)                       ggmlhexagon_log_internal(GGML_LOG_LEVEL_WARN , __FILE__, __FUNCTION__, __LINE__, __VA_ARGS__)
143143#define  GGMLHEXAGON_LOG_INFO (...)                       ggmlhexagon_log_internal(GGML_LOG_LEVEL_INFO , __FILE__, __FUNCTION__, __LINE__, __VA_ARGS__)
144+ #define  GGMLHEXAGON_LOG_VERBOSE (...)                    ggmlhexagon_log_internal(GGML_LOG_LEVEL_CONT , __FILE__, __FUNCTION__, __LINE__, __VA_ARGS__)
144145
145146#if  GGMLHEXAGON_DEBUG
146147#define  GGMLHEXAGON_LOG_DEBUG (...)                      ggmlhexagon_log_internal(GGML_LOG_LEVEL_DEBUG, __FILE__, __FUNCTION__, __LINE__, __VA_ARGS__)
@@ -154,6 +155,10 @@ struct ggml_backend_hexagon_context;
154155#define  SIZE_IN_MB                                       (1  << 20 )
155156#define  STATUS_CONTEXT                                   0x12345678 
156157
158+ #if  !defined (_WINDOWS)
159+ #pragma  weak remote_system_request
160+ #endif 
161+ 
157162#define  CHECK_QNN_API (error, result )                                            \
158163    do  {                                                                        \
159164        error = (result);                                                       \
@@ -316,6 +321,7 @@ struct hexagon_appcfg_t {
316321    int  hexagon_backend;        //  0: HEXAGON_BACKEND_QNNCPU 1: HEXAGON_BACKEND_QNNGPU 2: HEXAGON_BACKEND_QNNNPU / HEXAGON_BACKEND_CDSP
317322    int  enable_rpc_ion_mempool; //  enable/disable rpc ion memory pool
318323    int  enable_rpc_dma_mempool; //  enable/disable rpc dma memory pool
324+     int  enable_all_q_mulmat;    //  enable/disable offload all quantized type mulmat to cDSP
319325    const  char  * cfgfilename;
320326    const  char  * runtime_libpath;
321327    char  ggml_hexagon_version[GGMLHEXAGON_TMPBUF_LEN];
@@ -335,6 +341,7 @@ static struct hexagon_appcfg_t g_hexagon_appcfg = {
335341        .hexagon_backend         = HEXAGON_BACKEND_CDSP,
336342        .enable_rpc_ion_mempool  = 0 ,
337343        .enable_rpc_dma_mempool  = 0 ,
344+         .enable_all_q_mulmat     = 0 ,
338345        .cfgfilename             = " ggml-hexagon.cfg"  ,
339346#if  defined(__ANDROID__)
340347// Android command line program
@@ -344,7 +351,7 @@ static struct hexagon_appcfg_t g_hexagon_appcfg = {
344351#elif  defined(_WIN32)
345352        .qnn_runtimelib_path     = " C:\\ "  ,
346353#endif 
347-         .ggml_hexagon_version    = {" 1.00 "  },
354+         .ggml_hexagon_version    = {" 1.01 "  },
348355};
349356
350357// file:///opt/qcom/aistack/qairt/2.31.0.250130/docs/QNN/general/overview.html#tbl-supported-snapdragon-devices
@@ -891,7 +898,7 @@ class hexagon_perf {
891898            return ;
892899        _end_time = ggml_time_us ();
893900        _duration = (_end_time - _begin_time);
894-         GGMLHEXAGON_LOG_DEBUG (" duration of %s : %lld microseconds\n "  , _perf_name.c_str (), _duration);
901+         GGMLHEXAGON_LOG_VERBOSE (" duration of %s : %lld microseconds\n "  , _perf_name.c_str (), _duration);
895902    }
896903
897904private: 
@@ -1454,6 +1461,7 @@ static void ggmlhexagon_load_cfg() {
14541461    qnncfg_instance.get_stringvalue (" qnn"  , " precision_mode"  , precision_mode, " fp32"  );
14551462    qnncfg_instance.get_intvalue (" cdsp"  , " enable_rpc_ion_mempool"  , g_hexagon_appcfg.enable_rpc_ion_mempool , 1 );
14561463    qnncfg_instance.get_intvalue (" cdsp"  , " enable_rpc_dma_mempool"  , g_hexagon_appcfg.enable_rpc_dma_mempool , 0 );
1464+     qnncfg_instance.get_intvalue (" cdsp"  , " enable_all_q_mulmat"  , g_hexagon_appcfg.enable_all_q_mulmat , 0 );
14571465    GGMLHEXAGON_LOG_INFO (" internal ggml_hexagon_version=%s"  , g_hexagon_appcfg.ggml_hexagon_version );
14581466    GGMLHEXAGON_LOG_INFO (" external ggml_hexagon_version=%s"  , ggml_hexagon_version.c_str ());
14591467    GGMLHEXAGON_LOG_INFO (" hwaccel_approach=%d(%s)"  , g_hexagon_appcfg.hwaccel_approach ,
@@ -1504,6 +1512,13 @@ static bool ggmlhexagon_check_valid_appcfg() {
15041512            GGMLHEXAGON_LOG_INFO (" rpc dma mempool not supported"  );
15051513            is_valid_appcfg = false ;
15061514        }
1515+ 
1516+         if  (1  == g_hexagon_appcfg.enable_all_q_mulmat ) {
1517+             if  (0  == g_hexagon_appcfg.enable_q_mulmat ) {
1518+                 GGMLHEXAGON_LOG_INFO (" ensure set enable_q_mulmat to 1 firstly when set enable_all_q_mulmat to 1"  );
1519+                 is_valid_appcfg = false ;
1520+             }
1521+         }
15071522    }
15081523
15091524    if  (!is_valid_appcfg) {
@@ -2743,6 +2758,10 @@ static void ggmlqnn_sdk_logcallback(const char * fmt,
27432758        vsnprintf (reinterpret_cast <char  *const >(s_ggmlqnn_sdk_logbuf), GGMLHEXAGON_LOGBUF_LEN, fmt, argp);
27442759        GGMLHEXAGON_LOG_DEBUG (" %8.1fms [%-7s] %s\n "  , ms, log_level_desc, s_ggmlqnn_sdk_logbuf);
27452760    }
2761+ #if  !GGMLHEXAGON_DEBUG
2762+     GGML_UNUSED (log_level_desc);
2763+     GGML_UNUSED (ms);
2764+ #endif 
27462765}
27472766
27482767int  qnn_instance::qnn_init (const  QnnSaver_Config_t ** saver_config) {
@@ -5075,6 +5094,7 @@ static bool ggmlhexagon_can_handle_op_through_cdsp(ggml_backend_dev_t dev, const
50755094
50765095    const  struct  ggml_tensor  * src0 = op_tensor->src [0 ];
50775096    const  struct  ggml_tensor  * src1 = op_tensor->src [1 ];
5097+     const  int  src0_rank = ggml_n_dims (src0);
50785098    switch  (op_tensor->op ) {
50795099        case  GGML_OP_ADD:
50805100        {
@@ -5086,7 +5106,15 @@ static bool ggmlhexagon_can_handle_op_through_cdsp(ggml_backend_dev_t dev, const
50865106        case  GGML_OP_MUL_MAT:
50875107        {
50885108            ggmlhexagon_dump_op_info (op_tensor);
5109+             // FIXME:remove this filter in the future
5110+             if  (2  != src0_rank) {
5111+                 return  false ;
5112+             }
50895113            if  (1  == g_hexagon_appcfg.enable_q_mulmat ) {
5114+                 if  (1  == g_hexagon_appcfg.enable_all_q_mulmat ) {
5115+                     return  (src0->type  == GGML_TYPE_F32 || ggml_is_quantized (src0->type )) && (src1->type  == GGML_TYPE_F32);
5116+                 }
5117+ 
50905118                return  (src0->type  == GGML_TYPE_F32
50915119                        || src0->type  == GGML_TYPE_Q4_0 || src0->type  == GGML_TYPE_Q8_0
50925120                        || src0->type  == GGML_TYPE_Q6_K || src0->type  == GGML_TYPE_Q8_K
@@ -5126,9 +5154,9 @@ static bool ggmlhexagon_can_handle_op_through_qnn(ggml_backend_dev_t dev, const
51265154
51275155    struct  ggml_tensor  * src0 = op_tensor->src [0 ];
51285156    struct  ggml_tensor  * src1 = op_tensor->src [1 ];
5129-     const  int64_t  ne00  = src0->ne [0 ];;
5157+     const  int64_t  ne00         = src0->ne [0 ];;
51305158    const  int  src0_rank = ggml_n_dims (src0);
5131-     int  src1_rank        = 0 ;
5159+     int  src1_rank  = 0 ;
51325160    if  (nullptr  != src1) {
51335161        src1_rank = ggml_n_dims (src1);
51345162    }
0 commit comments