@@ -325,6 +325,7 @@ struct hexagon_appcfg_t {
325325    int  enable_all_q_mulmat;    //  enable/disable offload all quantized type mulmat to cDSP
326326    int  profiler_duration;      //  threshold of duration in profiler, per seconds
327327    int  profiler_counts;        //  threshold of counts in profiler
328+     int  thread_counts;          //  thread_counts on cDSP side
328329    const  char  * cfgfilename;
329330    const  char  * runtime_libpath;
330331    char  ggml_hexagon_version[GGMLHEXAGON_TMPBUF_LEN];
@@ -348,6 +349,7 @@ static struct hexagon_appcfg_t g_hexagon_appcfg = {
348349        .enable_all_q_mulmat     = 0 ,
349350        .profiler_duration       = 5 ,
350351        .profiler_counts         = 100 ,
352+         .thread_counts           = 4 ,
351353        .cfgfilename             = " ggml-hexagon.cfg"  ,
352354#if  defined(__ANDROID__)
353355// Android command line program
@@ -357,8 +359,8 @@ static struct hexagon_appcfg_t g_hexagon_appcfg = {
357359#elif  defined(_WIN32)
358360        .qnn_runtimelib_path     = " C:\\ "  ,
359361#endif 
360-         .ggml_hexagon_version    = {" 1.80 "  },
361-         .ggml_dsp_version        = {" 0.60 "  },
362+         .ggml_hexagon_version    = {" 1.81 "  },
363+         .ggml_dsp_version        = {" 0.61 "  },
362364};
363365
364366// file:///opt/qcom/aistack/qairt/2.31.0.250130/docs/QNN/general/overview.html#tbl-supported-snapdragon-devices
@@ -886,10 +888,19 @@ class hexagon_profiler {
886888        // FIXME:hardcode filename of profiler data
887889        std::string filename = std::string (g_hexagon_appcfg.runtime_libpath ) + " /"  ;
888890        if  (HWACCEL_CDSP == g_hexagon_appcfg.hwaccel_approach ) {
889-             if  (0  == g_hexagon_appcfg.enable_rpc_ion_mempool ) {
890-                 filename = filename + " hexagon_perf_cdsp.dat"  ;
891+             if  (g_hexagon_appcfg.thread_counts  > 1 ) {
892+                 // multi-threading feature enabled on cDSP side
893+                 if  (0  == g_hexagon_appcfg.enable_rpc_ion_mempool ) {
894+                     filename = filename + " hexagon_perf_cdsp_mt.dat"  ;
895+                 } else  {
896+                     filename = filename + " hexagon_perf_cdsp_ion_mt.dat"  ;
897+                 }
891898            } else  {
892-                 filename = filename + " hexagon_perf_cdsp_ion.dat"  ;
899+                 if  (0  == g_hexagon_appcfg.enable_rpc_ion_mempool ) {
900+                     filename = filename + " hexagon_perf_cdsp.dat"  ;
901+                 } else  {
902+                     filename = filename + " hexagon_perf_cdsp_ion.dat"  ;
903+                 }
893904            }
894905        } else  {
895906            filename = filename + " hexagon_perf_qnn.dat"  ;
@@ -1782,6 +1793,7 @@ static void ggmlhexagon_load_cfg() {
17821793
17831794    hexagoncfg_instance.get_intvalue (" cdsp"  , " enable_rpc_ion_mempool"  , g_hexagon_appcfg.enable_rpc_ion_mempool , 0 );
17841795    hexagoncfg_instance.get_intvalue (" cdsp"  , " enable_all_q_mulmat"  , g_hexagon_appcfg.enable_all_q_mulmat , 0 );
1796+     hexagoncfg_instance.get_intvalue (" cdsp"  , " thread_counts"  , g_hexagon_appcfg.thread_counts , 4 );
17851797
17861798    GGMLHEXAGON_LOG_INFO (" internal ggml_hexagon_version=%s"  , g_hexagon_appcfg.ggml_hexagon_version );
17871799    GGMLHEXAGON_LOG_INFO (" internal ggml_dsp_version=%s"  , g_hexagon_appcfg.ggml_dsp_version );
@@ -5315,7 +5327,8 @@ static int ggmlhexagon_init_dsp(ggml_backend_hexagon_context * ctx) {
53155327        // FIXME: only support offload fp32 GGML_OP_MUL_MAT to cDSP
53165328        GGMLHEXAGON_LOG_INFO (" only support offload fp32 GGML_OP_ADD and fp32 GGML_OP_MUL_MAT to cDSP currently"  );
53175329        ggmlhexagon_probe_dspinfo (ctx);
5318-         ggmlop_dsp_setclocks (ctx->ggmlop_handle , HAP_DCVS_VCORNER_TURBO_PLUS, 40 , 1 );
5330+         // FIXME: re-use this function to pass thread_counts info to code on cDSP side before fully understand qidl mechanism
5331+         ggmlop_dsp_setclocks (ctx->ggmlop_handle , HAP_DCVS_VCORNER_TURBO_PLUS, 40 , 1 , g_hexagon_appcfg.thread_counts );
53195332        ggmlhexagon_set_rpc_latency (ctx->ggmlop_handle , RPC_POLL_QOS, 100 );
53205333        int  result = ggmlhexagon_init_rpcmempool (ctx);
53215334        if  (0  != result) {
0 commit comments