@@ -309,14 +309,13 @@ struct hexagon_appcfg_t {
309309 int enable_perf; // enable/disable perf of op function
310310 int print_tensors_info; // enable/disable print tensors info in op function
311311 int dump_op_info; // enable/disable dump op info in handle_op
312+ int enable_q_mulmat; // enable/disable offload quantized mulmat
312313 int precision_mode; // 0: default 1:fp16
313314 int hvx_threads;
314315 int vtcm_size_in_mb;
315316 int enable_dlbc;
316317 int hwaccel_approach; // 0: HWACCEL_QNN 1: HWACCEL_QNN_SINGLEGRAPH 2: HWACCEL_CDSP
317318 int hexagon_backend; // 0: HEXAGON_BACKEND_QNNCPU 1: HEXAGON_BACKEND_QNNGPU 2: HEXAGON_BACKEND_QNNNPU / HEXAGON_BACKEND_CDSP
318- int enable_mulmat_cdsp; // enable/disable offload mulmat to cDSP
319- int enable_q_mulmat; // enable/disable offload fp32 & quantized mulmat to cDSP
320319 int enable_rpc_ion_mempool; // enable/disable rpc ion memory pool
321320 int enable_rpc_dma_mempool; // enable/disable rpc dma memory pool
322321 const char * cfgfilename;
@@ -328,14 +327,13 @@ static struct hexagon_appcfg_t g_hexagon_appcfg = {
328327 .enable_perf = 0 ,
329328 .print_tensors_info = 0 ,
330329 .dump_op_info = 0 ,
330+ .enable_q_mulmat = 0 ,
331331 .precision_mode = 0 ,
332332 .hvx_threads = 4 ,
333333 .vtcm_size_in_mb = 8 ,
334334 .enable_dlbc = 1 ,
335335 .hwaccel_approach = HWACCEL_CDSP,
336336 .hexagon_backend = HEXAGON_BACKEND_CDSP,
337- .enable_mulmat_cdsp = 0 ,
338- .enable_q_mulmat = 0 ,
339337 .enable_rpc_ion_mempool = 0 ,
340338 .enable_rpc_dma_mempool = 0 ,
341339 .cfgfilename = " ggml-hexagon.cfg" ,
@@ -863,13 +861,12 @@ static void ggmlhexagon_print_running_timestamp(ggml_backend_hexagon_context * c
863861 ggml_backend_hexagon_get_devname (g_hexagon_appcfg.hexagon_backend ));
864862 ggmlhexagon_get_timestring (timestamp);
865863 if (HWACCEL_CDSP == g_hexagon_appcfg.hwaccel_approach ) {
866- GGMLHEXAGON_LOG_INFO (" offload GGML_OP_MULMAT: %s" , g_hexagon_appcfg.enable_mulmat_cdsp ? " YES" : " NO" );
867864 GGMLHEXAGON_LOG_INFO (" offload quantize GGML_OP_MUL_MAT: %s" , g_hexagon_appcfg.enable_q_mulmat ? " YES" : " NO" );
868865 GGMLHEXAGON_LOG_INFO (" using rpc ion memory pool: %s" , g_hexagon_appcfg.enable_rpc_ion_mempool ? " YES" : " NO" );
869866 GGMLHEXAGON_LOG_INFO (" using rpc dma memory pool: %s" , g_hexagon_appcfg.enable_rpc_dma_mempool ? " YES" : " NO" );
870867 ggmlhexagon_probe_dspinfo (ctx);
871868 } else {
872- GGMLHEXAGON_LOG_INFO (" only offload GGML_OP_ADD: NO" );
869+ GGMLHEXAGON_LOG_INFO (" offload quantize GGML_OP_MUL_MAT: %s " , g_hexagon_appcfg. enable_q_mulmat ? " YES " : " NO" );
873870 }
874871 GGMLHEXAGON_LOG_INFO (" running timestamp:%s" , timestamp);
875872}
@@ -1449,12 +1446,11 @@ static void ggmlhexagon_load_cfg() {
14491446 qnncfg_instance.get_intvalue (" general" , " dump_op_info" , g_hexagon_appcfg.dump_op_info , 0 );
14501447 qnncfg_instance.get_intvalue (" general" , " hwaccel_approach" , g_hexagon_appcfg.hwaccel_approach , HWACCEL_CDSP);
14511448 qnncfg_instance.get_intvalue (" general" , " hexagon_backend" , g_hexagon_appcfg.hexagon_backend , HEXAGON_BACKEND_CDSP);
1449+ qnncfg_instance.get_intvalue (" general" , " enable_q_mulmat" , g_hexagon_appcfg.enable_q_mulmat , 0 );
14521450 qnncfg_instance.get_intvalue (" qnn" , " hvx_threads" , g_hexagon_appcfg.hvx_threads , 4 );
14531451 qnncfg_instance.get_intvalue (" qnn" , " vtcm_size_in_mb" , g_hexagon_appcfg.vtcm_size_in_mb , 8 );
14541452 qnncfg_instance.get_intvalue (" qnn" , " enable_dlbc" , g_hexagon_appcfg.enable_dlbc , 1 );
14551453 qnncfg_instance.get_stringvalue (" qnn" , " precision_mode" , precision_mode, " fp32" );
1456- qnncfg_instance.get_intvalue (" cdsp" , " enable_mulmat_cdsp" , g_hexagon_appcfg.enable_mulmat_cdsp , 1 );
1457- qnncfg_instance.get_intvalue (" cdsp" , " enable_q_mulmat" , g_hexagon_appcfg.enable_q_mulmat , 0 );
14581454 qnncfg_instance.get_intvalue (" cdsp" , " enable_rpc_ion_mempool" , g_hexagon_appcfg.enable_rpc_ion_mempool , 1 );
14591455 qnncfg_instance.get_intvalue (" cdsp" , " enable_rpc_dma_mempool" , g_hexagon_appcfg.enable_rpc_dma_mempool , 0 );
14601456 GGMLHEXAGON_LOG_INFO (" hwaccel_approach=%d(%s)" , g_hexagon_appcfg.hwaccel_approach ,
@@ -3017,7 +3013,7 @@ int qnn_instance::init_qnn_graph(const std::string & graph_name, HEXAGONBackend
30173013 _graph_name = graph_name;
30183014 _device_id = device;
30193015
3020- GGMLHEXAGON_LOG_DEBUG (" [%s][%s]created" , ggml_backend_hexagon_get_devname (device), graph_name.c_str ());
3016+ // GGMLHEXAGON_LOG_DEBUG("[%s][%s]created", ggml_backend_hexagon_get_devname(device), graph_name.c_str());
30213017
30223018 Qnn_ErrorHandle_t error = QNN_SUCCESS;
30233019 if (HEXAGON_BACKEND_QNNNPU == device) {
@@ -3070,7 +3066,7 @@ int qnn_instance::init_qnn_graph(const std::string & graph_name, HEXAGONBackend
30703066 }
30713067 graph_configs.push_back (nullptr );
30723068 error = _qnn_interface.qnn_graph_create (_qnn_context_handle, graph_name.c_str (), graph_configs.data (), &_qnn_graph_handle);
3073- GGMLHEXAGON_LOG_DEBUG (" [%s][%s]created graph %p" , ggml_backend_hexagon_get_devname (device), graph_name.c_str (), _qnn_graph_handle);
3069+ // GGMLHEXAGON_LOG_DEBUG("[%s][%s]created graph %p", ggml_backend_hexagon_get_devname(device), graph_name.c_str(), _qnn_graph_handle);
30743070 } else {
30753071 error = _qnn_interface.qnn_graph_create (_qnn_context_handle, graph_name.c_str (), nullptr , &_qnn_graph_handle);
30763072 }
@@ -3280,7 +3276,7 @@ void qnn_instance::htp_set_n_hvx_threads(size_t n_threads) {
32803276 if (QNN_SUCCESS != result) {
32813277 GGMLHEXAGON_LOG_WARN (" failed to set QNN graph config: set hvx threads %d" , n_threads);
32823278 } else {
3283- GGMLHEXAGON_LOG_INFO (" succeed to set QNN graph config: set hvx threads %d" , n_threads);
3279+ // GGMLHEXAGON_LOG_DEBUG ("succeed to set QNN graph config: set hvx threads %d", n_threads);
32843280 }
32853281}
32863282
@@ -3383,7 +3379,7 @@ static Qnn_OpConfig_t ggmlqnn_create_op_config(const char * name, const char * p
33833379 } else {
33843380 snprintf (opcfg_name, GGML_MAX_NAME, " opcfg_%s_%-8d" , name, ggmlqnn_get_idx (QNN_OPCFG_INDEX));
33853381 }
3386- GGMLHEXAGON_LOG_DEBUG (" create qnn opconfig %s" , opcfg_name);
3382+ // GGMLHEXAGON_LOG_DEBUG("create qnn opconfig %s", opcfg_name);
33873383 ggmlqnn_inc_idx (QNN_OPCFG_INDEX);
33883384
33893385 Qnn_OpConfigV1_t v1 = {opcfg_name, package, type,
@@ -3564,7 +3560,7 @@ static void ggmlqnn_compute_elementwise(ggml_backend_hexagon_context * ctx, ggml
35643560 }
35653561 graph_handle = instance->get_qnn_graph_handle ();
35663562
3567- GGMLHEXAGON_LOG_DEBUG (" graph_handle %p" , graph_handle);
3563+ // GGMLHEXAGON_LOG_DEBUG("graph_handle %p", graph_handle);
35683564 // create computational tensor
35693565 p_tensor0 = ggmlqnn_create_compute_tensor (instance, graph_handle, src0, QNN_TENSOR_TYPE_APP_WRITE);
35703566 if (2 == input_param_count) {
@@ -5063,7 +5059,7 @@ static bool ggmlhexagon_can_handle_op_through_cdsp(ggml_backend_dev_t dev, const
50635059 case GGML_OP_MUL_MAT:
50645060 {
50655061 ggmlhexagon_dump_op_info (op_tensor);
5066- if (g_hexagon_appcfg.enable_q_mulmat )
5062+ if (1 == g_hexagon_appcfg.enable_q_mulmat )
50675063 return (src0->type == GGML_TYPE_F32 || src0->type == GGML_TYPE_Q6_K
50685064 ) && (src1->type == GGML_TYPE_F32) && (op_tensor->type == GGML_TYPE_F32);
50695065 else
@@ -5142,10 +5138,13 @@ static bool ggmlhexagon_can_handle_op_through_qnn(ggml_backend_dev_t dev, const
51425138 return false ;
51435139
51445140 if (ctx->device == HEXAGON_BACKEND_QNNNPU) {
5145- return (src0->type == GGML_TYPE_F32
5141+ if (1 == g_hexagon_appcfg.enable_q_mulmat )
5142+ return (src0->type == GGML_TYPE_F32
51465143 || src0->type == GGML_TYPE_Q4_0 || src0->type == GGML_TYPE_Q8_0
51475144 || src0->type == GGML_TYPE_Q6_K || src0->type == GGML_TYPE_Q8_K
51485145 ) && (src1->type == GGML_TYPE_F32) && (op_tensor->type == GGML_TYPE_F32);
5146+ else
5147+ return (src0->type == GGML_TYPE_F32 && src1->type == GGML_TYPE_F32 && op_tensor->type == GGML_TYPE_F32);
51495148 } else {
51505149 return (src0->type == GGML_TYPE_F32 || ggml_is_quantized (src0->type ))
51515150 && (src1->type == GGML_TYPE_F32) && (op_tensor->type == GGML_TYPE_F32);
@@ -5298,10 +5297,8 @@ static bool ggmlhexagon_compute_forward(ggml_backend_t backend, struct ggml_tens
52985297
52995298struct ggml_backend_hexagon_buffer_context {
53005299 ~ggml_backend_hexagon_buffer_context () {
5301- GGMLHEXAGON_LOG_DEBUG (" enter %s" , __func__);
53025300 if (buffer) {
53035301 if ((g_hexagon_appcfg.hwaccel_approach == HWACCEL_CDSP) && (1 == g_hexagon_appcfg.enable_rpc_ion_mempool )) {
5304- GGMLHEXAGON_LOG_DEBUG (" rpcmem %p, size %d" , buffer, buffer_size);
53055302 // do nonthing here because rpc mempool was used for HWACCEL_CDSP
53065303 } else {
53075304 ggml_aligned_free (buffer, 0 );
@@ -5397,7 +5394,6 @@ static const char * ggml_backend_hexagon_buffer_type_name(ggml_backend_buffer_ty
53975394
53985395static ggml_backend_buffer_t ggml_backend_hexagon_buffer_type_alloc_buffer (
53995396 ggml_backend_buffer_type_t buft, size_t size) {
5400- GGMLHEXAGON_LOG_DEBUG (" enter %s, size %d" , __func__, size);
54015397 struct ggml_backend_hexagon_context * ctx = static_cast <ggml_backend_hexagon_context *>(buft->context );
54025398 GGML_ASSERT (nullptr != ctx);
54035399 GGMLHEXAGON_LOG_DEBUG (" device %d(%s)" , ctx->device , ggml_backend_hexagon_get_devname (ctx->device ));
@@ -5413,14 +5409,10 @@ static ggml_backend_buffer_t ggml_backend_hexagon_buffer_type_alloc_buffer(
54135409 size_page = systeminfo.dwPageSize ;
54145410#endif
54155411 size_t size_aligned = size;
5416- GGMLHEXAGON_LOG_DEBUG (" size_aligned %d" , size_aligned);
54175412 if ((size_aligned % size_page) != 0 ) {
54185413 size_aligned += (size_page - (size_aligned % size_page));
54195414 }
5420- GGMLHEXAGON_LOG_DEBUG (" size_aligned %d" , size_aligned);
54215415 if ((g_hexagon_appcfg.hwaccel_approach == HWACCEL_CDSP) && (1 == g_hexagon_appcfg.enable_rpc_ion_mempool )) {
5422- GGMLHEXAGON_LOG_DEBUG (" rpc mempool len %d" , ctx->rpc_mempool_len );
5423- GGMLHEXAGON_LOG_DEBUG (" rpc mempool usage %d" , ctx->rpc_mempool_usage );
54245416 GGML_ASSERT (ctx->rpc_mempool_usage <= ctx->rpc_mempool_len );
54255417 buffer_ctx->buffer = (static_cast <char *>(ctx->rpc_mempool )) + ctx->rpc_mempool_usage ;
54265418 GGMLHEXAGON_LOG_DEBUG (" buffer_ctx->buffer %p" , buffer_ctx->buffer );
@@ -5434,7 +5426,7 @@ static ggml_backend_buffer_t ggml_backend_hexagon_buffer_type_alloc_buffer(
54345426 GGMLHEXAGON_LOG_WARN (" %s: failed to allocate %d MiB\n " , __func__, size / (1 << 20 ));
54355427 return nullptr ;
54365428 } else {
5437- GGMLHEXAGON_LOG_DEBUG (" %s: succeed to allocate %d MiB\n " , __func__, size / (1 << 20 ));
5429+ // GGMLHEXAGON_LOG_DEBUG("%s: succeed to allocate %d MiB\n", __func__, size / (1 << 20));
54385430 }
54395431
54405432 return ggml_backend_buffer_init (buft, ggml_backend_hexagon_buffer_interface, buffer_ctx, size);
@@ -5577,14 +5569,14 @@ static void ggml_backend_hexagon_device_get_memory(ggml_backend_dev_t dev, size_
55775569 *total = rpc_ion_memsize * SIZE_IN_MB;
55785570 *free = (rpc_ion_memsize - rpc_ion_usage) * SIZE_IN_MB;
55795571 GGMLHEXAGON_LOG_DEBUG (" rpc memsize %d M" , rpc_ion_memsize);
5580- GGMLHEXAGON_LOG_DEBUG (" rpc usage %d M" , rpc_ion_usage);
5572+ GGMLHEXAGON_LOG_DEBUG (" rpc usage %d M\n\n " , rpc_ion_usage);
55815573 } else {
55825574 rpc_ion_memsize = ctx->rpc_mempool_capacity ;
55835575 rpc_ion_usage = ctx->rpc_mempool_usage ;
55845576 *total = rpc_ion_memsize;
55855577 *free = (rpc_ion_memsize - rpc_ion_usage);
55865578 GGMLHEXAGON_LOG_DEBUG (" rpc memsize %d M" , rpc_ion_memsize / SIZE_IN_MB);
5587- GGMLHEXAGON_LOG_DEBUG (" rpc usage %d M" , rpc_ion_usage / SIZE_IN_MB);
5579+ GGMLHEXAGON_LOG_DEBUG (" rpc usage %d M\n\n " , rpc_ion_usage / SIZE_IN_MB);
55885580 }
55895581 }
55905582}
@@ -5891,13 +5883,12 @@ ggml_backend_reg_t ggml_backend_hexagon_reg() {
58915883}
58925884
58935885const char * ggml_backend_hexagon_get_devname (size_t dev_num) {
5894- GGMLHEXAGON_LOG_DEBUG (" enter %s" , __func__);
58955886 if (HWACCEL_CDSP == g_hexagon_appcfg.hwaccel_approach ) {
58965887 if (HEXAGON_BACKEND_CDSP == dev_num)
58975888 return " HEXAGON_BACKEND_CDSP" ;
58985889 }
58995890
5900- // fallback
5891+ // fall through
59015892 switch (dev_num) {
59025893 case HEXAGON_BACKEND_QNNCPU:
59035894 return " HEXAGON_BACKEND_QNN_CPU" ;
0 commit comments