@@ -751,8 +751,10 @@ static void ggmlhexagon_log_internal(ggml_log_level level, const char * file, co
751751static void ggmlhexagon_print_tensors_info (const char * func_name, const ggml_backend_hexagon_context * ctx,
752752 const ggml_tensor * src0, const ggml_tensor * src1, const ggml_tensor * dst) {
753753 // skip sanity check of params because of performance concern
754- if (0 == g_hexagon_appcfg.print_tensors_info )
755- return ;
754+ if (0 == g_hexagon_appcfg.dump_op_info ) {
755+ if (0 == g_hexagon_appcfg.print_tensors_info )
756+ return ;
757+ }
756758
757759 if (nullptr != func_name && nullptr != ctx) {
758760 GGMLHEXAGON_LOG_DEBUG (" call %s in dev %s\n " , func_name, ctx->name );
@@ -862,13 +864,17 @@ static void ggmlhexagon_get_timestring(char * p_currenttime) {
862864}
863865
864866static void ggmlhexagon_print_running_timestamp (ggml_backend_hexagon_context * ctx) {
865- GGMLHEXAGON_LOG_INFO (" hwaccel approach is %d(%s)" , g_hexagon_appcfg.hwaccel_approach ,
866- ggmlhexagon_get_hwaccel_approach_name (g_hexagon_appcfg.hwaccel_approach ));
867867 char timestamp[GGMLHEXAGON_TMPBUF_LEN];
868868 memset (timestamp, 0 , GGMLHEXAGON_TMPBUF_LEN);
869+
870+ GGMLHEXAGON_LOG_INFO (" hwaccel approach is %d(%s)" , g_hexagon_appcfg.hwaccel_approach ,
871+ ggmlhexagon_get_hwaccel_approach_name (g_hexagon_appcfg.hwaccel_approach ));
872+ GGMLHEXAGON_LOG_INFO (" hexagon_backend=%d(%s)" , g_hexagon_appcfg.hexagon_backend ,
873+ ggml_backend_hexagon_get_devname (g_hexagon_appcfg.hexagon_backend ));
869874 ggmlhexagon_get_timestring (timestamp);
870875 if (HWACCEL_CDSP == g_hexagon_appcfg.hwaccel_approach ) {
871- GGMLHEXAGON_LOG_INFO (" only offload GGML_OP_ADD : %s" , g_hexagon_appcfg.enable_q_mulmat ? " NO" : " YES" );
876+ GGMLHEXAGON_LOG_INFO (" only offload GGML_OP_ADD: %s" , g_hexagon_appcfg.enable_mulmat_cdsp ? " NO" : " YES" );
877+ GGMLHEXAGON_LOG_INFO (" offload quantize GGML_OP_MUL_MAT: %s" , g_hexagon_appcfg.enable_q_mulmat ? " YES" : " NO" );
872878 } else {
873879 GGMLHEXAGON_LOG_INFO (" only offload GGML_OP_ADD: NO" );
874880 }
@@ -1437,7 +1443,7 @@ static void ggmlhexagon_load_cfg() {
14371443 qnncfg_instance.load (cfg_filename);
14381444 qnncfg_instance.dump ([](const std::string & section, const std::string & key, const std::string value) {
14391445 std::ostringstream tmposs;
1440- tmposs << " section[" << std::setw (10 ) << std::left << section << " ],[" << std::setw (25 ) << std::left << key << " ] = [" << value << " ]" << std::endl ;
1446+ tmposs << " section[" << std::setw (10 ) << std::left << section << " ],[" << std::setw (25 ) << std::left << key << " ] = [" << value << " ]" ;
14411447 GGMLHEXAGON_LOG_INFO (" %s" , tmposs.str ().c_str ());
14421448 });
14431449 std::string precision_mode;
@@ -1453,11 +1459,10 @@ static void ggmlhexagon_load_cfg() {
14531459 qnncfg_instance.get_stringvalue (" qnn" , " precision_mode" , precision_mode, " fp32" );
14541460 qnncfg_instance.get_intvalue (" cdsp" , " enable_mulmat_cdsp" , g_hexagon_appcfg.enable_mulmat_cdsp , 0 );
14551461 qnncfg_instance.get_intvalue (" cdsp" , " enable_q_mulmat" , g_hexagon_appcfg.enable_q_mulmat , 0 );
1456- GGMLHEXAGON_LOG_INFO (" print_qnn_internal_log=%d" , g_hexagon_appcfg.print_qnn_internal_log );
14571462 GGMLHEXAGON_LOG_INFO (" hwaccel_approach=%d(%s)" , g_hexagon_appcfg.hwaccel_approach ,
14581463 ggmlhexagon_get_hwaccel_approach_name (g_hexagon_appcfg.hwaccel_approach ));
1459- GGMLHEXAGON_LOG_INFO (" hexagon_backend=%d" , g_hexagon_appcfg.hexagon_backend );
1460- GGMLHEXAGON_LOG_INFO ( " npu inference precision mode=%s " , precision_mode. c_str ( ));
1464+ GGMLHEXAGON_LOG_INFO (" hexagon_backend=%d(%s) " , g_hexagon_appcfg.hexagon_backend ,
1465+ ggml_backend_hexagon_get_devname (g_hexagon_appcfg. hexagon_backend ));
14611466 GGMLHEXAGON_LOG_INFO (" qnn runtime lib path=%s" , g_hexagon_appcfg.runtimelib_path );
14621467 if (precision_mode.find (" fp16" ) != std::string::npos) {
14631468 g_hexagon_appcfg.precision_mode = 1 ;
@@ -4853,7 +4858,7 @@ static int ggmlhexagon_init_dsp(ggml_backend_hexagon_context * ctx) {
48534858 ggmlop_domain_uri_len = strlen (ggmlop_URI) + MAX_DOMAIN_NAMELEN;
48544859 ggmlop_domain_uri = (char *)malloc (ggmlop_domain_uri_len);
48554860 snprintf (ggmlop_domain_uri, ggmlop_domain_uri_len, " %s%s" , ggmlop_URI, uri);
4856- GGMLHEXAGON_LOG_INFO (" ggmlop domain uri:%s" , ggmlop_domain_uri);
4861+ GGMLHEXAGON_LOG_DEBUG (" ggmlop domain uri:%s" , ggmlop_domain_uri);
48574862 hexagon_error = ggmlop_dsp_open (ggmlop_domain_uri, &ctx->ggmlop_handle );
48584863 if (AEE_SUCCESS == hexagon_error) {
48594864 GGMLHEXAGON_LOG_INFO (" succeed to open domain %d(%s)" , domain_id, ggmlhexagon_get_dsp_name (domain_id));
@@ -4976,9 +4981,6 @@ static void ggmlhexagon_compute(ggml_backend_hexagon_context * ctx, struct ggml_
49764981 dsptensor_2.nb [2 ] = dst->nb [2 ];
49774982 dsptensor_2.nb [3 ] = dst->nb [3 ];
49784983
4979- // GGMLQNN_DUMP_DSPTENSOR(&dsptensor_0);
4980- // GGMLQNN_DUMP_DSPTENSOR(&dsptensor_1);
4981- // GGMLQNN_DUMP_DSPTENSOR(&dsptensor_2);
49824984 hexagon_error = op_func (ctx->ggmlop_handle , &dsptensor_0, &dsptensor_1, &dsptensor_2);
49834985 if (AEE_SUCCESS != hexagon_error) {
49844986 GGMLHEXAGON_LOG_WARN (" ggmlop %s computation fail on cdsp" , ggml_op_name (op->op ));
@@ -4991,49 +4993,56 @@ static void ggmlhexagon_compute(ggml_backend_hexagon_context * ctx, struct ggml_
49914993// =================================================================================================
49924994// section-8: implementation of ggml-hexagon backend according to specification in ggml backend subsystem
49934995// =================================================================================================
4994- // hwaccel through cDSP
4995- static bool ggmlhexagon_can_handle_op (const ggml_backend_hexagon_context * ctx, const struct ggml_tensor * op_tensor) {
4996- ggmlhexagon_dump_op_info (op_tensor);
4996+ static bool ggmlhexagon_can_handle_op_through_cdsp (ggml_backend_dev_t dev, const struct ggml_tensor * op_tensor) {
4997+ ggml_backend_hexagon_context * ctx = (ggml_backend_hexagon_context *)dev->context ;
4998+ GGML_UNUSED (ctx);
4999+ if (op_tensor->op == GGML_OP_NONE) {
5000+ return true ;
5001+ }
5002+
49975003 if (!ggmlhexagon_k_op_caps[ggmlhexagon_get_op_index (op_tensor)].supported ) {
49985004 return false ;
49995005 }
50005006
5001- struct ggml_tensor * src0 = op_tensor->src [0 ];
5002- struct ggml_tensor * src1 = op_tensor->src [1 ];
5003- const int64_t ne00 = op_tensor-> src [ 0 ]-> ne [ 0 ] ;
5004- uint32_t src0_rank = ggml_n_dims (src0) ;
5007+ const struct ggml_tensor * src0 = op_tensor->src [0 ];
5008+ const struct ggml_tensor * src1 = op_tensor->src [1 ];
5009+ int64_t ne00 = 0 ;
5010+ uint32_t src0_rank = 0 ;
50055011 uint32_t src1_rank = 0 ;
5012+ if (nullptr != src0) {
5013+ src0_rank = ggml_n_dims (src0);
5014+ ne00 = src0->ne [0 ];
5015+ }
50065016 if (nullptr != src1) {
50075017 src1_rank = ggml_n_dims (src1);
50085018 }
50095019
5010- // available in the early stage, should be removed in the product stage
5011- bool support = false ;
5012- if (g_hexagon_appcfg.enable_mulmat_cdsp )
5013- support = ((op_tensor->op == GGML_OP_ADD) || (op_tensor->op == GGML_OP_MUL_MAT));
5014- else
5015- support = (op_tensor->op == GGML_OP_ADD);
5016- if (!support) {
5017- return false ;
5018- }
5019-
50205020 switch (op_tensor->op ) {
50215021 case GGML_OP_ADD:
50225022 case GGML_OP_SUB:
50235023 {
50245024 if (!ggml_are_same_shape (src0, src1)) {
50255025 return false ;
50265026 }
5027- break ;
5027+
5028+ // FIXME:remove this filter
5029+ if (ne00 < 32 )
5030+ return false ;
5031+
5032+ ggmlhexagon_dump_op_info (op_tensor);
5033+ // FIXME:remove this filter
5034+ return ggmlhexagon_same_types (ctx, op_tensor);
50285035 }
50295036 case GGML_OP_MUL_MAT:
50305037 {
5038+ GGMLHEXAGON_LOG_DEBUG (" mulmat" );
50315039 ggmlhexagon_dump_op_info (op_tensor);
50325040
50335041 // TODO:3d&4d matrix mulmat on cDSP
50345042 if (src0_rank != 2 )
50355043 return false ;
50365044
5045+ ggmlhexagon_dump_op_info (op_tensor);
50375046 if (g_hexagon_appcfg.enable_q_mulmat )
50385047 return (src0->type == GGML_TYPE_F32 || ggml_is_quantized (src0->type ))
50395048 && (src1->type == GGML_TYPE_F32) && (op_tensor->type == GGML_TYPE_F32);
@@ -5043,27 +5052,28 @@ static bool ggmlhexagon_can_handle_op(const ggml_backend_hexagon_context * ctx,
50435052 default :
50445053 break ;
50455054 }
5046- return (src0-> type == GGML_TYPE_F32) && (src1-> type == GGML_TYPE_F32) && (op_tensor-> type == GGML_TYPE_F32) ;
5055+ return false ;
50475056}
50485057
5049- static bool ggmlbackend_can_handle_op (const ggml_backend_hexagon_context * ctx, const struct ggml_tensor * op_tensor) {
5058+ static bool ggmlhexagon_can_handle_op_through_qnn (ggml_backend_dev_t dev, const struct ggml_tensor * op_tensor) {
5059+ ggml_backend_hexagon_context * ctx = (ggml_backend_hexagon_context *)dev->context ;
50505060 if (op_tensor->op == GGML_OP_NONE) {
50515061 return true ;
50525062 }
50535063
5054- if (HWACCEL_CDSP == g_hexagon_appcfg.hwaccel_approach ) {
5055- return ggmlhexagon_can_handle_op (ctx, op_tensor);
5056- }
5057-
50585064 if (!ggmlqnn_k_op_caps[ggmlhexagon_get_op_index (op_tensor)].supported ) {
50595065 return false ;
50605066 }
50615067
50625068 struct ggml_tensor * src0 = op_tensor->src [0 ];
50635069 struct ggml_tensor * src1 = op_tensor->src [1 ];
5064- const int64_t ne00 = op_tensor-> src [ 0 ]-> ne [ 0 ] ;
5065- uint32_t src0_rank = ggml_n_dims (src0) ;
5070+ int64_t ne00 = 0 ;
5071+ uint32_t src0_rank = 0 ;
50665072 uint32_t src1_rank = 0 ;
5073+ if (nullptr != src0) {
5074+ src0_rank = ggml_n_dims (src0);
5075+ ne00 = src0->ne [0 ];
5076+ }
50675077 if (nullptr != src1) {
50685078 src1_rank = ggml_n_dims (src1);
50695079 }
@@ -5542,6 +5552,11 @@ static ggml_backend_t ggml_backend_hexagon_device_init_backend(ggml_backend_dev_
55425552 GGMLHEXAGON_LOG_DEBUG (" user's specified hexagon_backend in cfgfile = %d" , g_hexagon_appcfg.hexagon_backend );
55435553 GGMLHEXAGON_LOG_DEBUG (" user's sepcified qnn runtime lib path in cfgfile = %s" , g_hexagon_appcfg.runtimelib_path );
55445554
5555+ if (HWACCEL_QNN_SINGLEGRAPH == g_hexagon_appcfg.hwaccel_approach ) {
5556+ GGMLHEXAGON_LOG_INFO (" HWACCEL_QNN_SINGLEGRAPH not supported, using default ggml backend" );
5557+ return nullptr ;
5558+ }
5559+
55455560 if (nullptr == params) {
55465561 GGMLHEXAGON_LOG_DEBUG (" program specified param is nullptr" );
55475562 dev_index = (g_hexagon_appcfg.hexagon_backend > 0 ) ? g_hexagon_appcfg.hexagon_backend : 0 ;
@@ -5600,11 +5615,6 @@ static ggml_backend_buffer_t ggml_backend_hexagon_device_buffer_from_host_ptr(gg
56005615 GGML_UNUSED (max_tensor_size);
56015616}
56025617
5603- static bool ggml_backend_hexagon_device_supports_op (ggml_backend_dev_t dev, const struct ggml_tensor * op) {
5604- ggml_backend_hexagon_context * ctx = (ggml_backend_hexagon_context *) dev->context ;
5605- return (ggmlbackend_can_handle_op (ctx,op));
5606- }
5607-
56085618static bool ggml_backend_hexagon_device_supports_buft (ggml_backend_dev_t dev, ggml_backend_buffer_type_t buft) {
56095619 GGML_UNUSED (dev);
56105620 return ggml_backend_buft_is_host (buft);
@@ -5620,7 +5630,7 @@ static struct ggml_backend_device_i ggml_backend_hexagon_device_interface = {
56205630 /* .get_buffer_type = */ ggml_backend_hexagon_device_get_buffer_type,
56215631 /* .get_host_buffer_type = */ nullptr ,
56225632 /* .buffer_from_host_ptr = */ ggml_backend_hexagon_device_buffer_from_host_ptr,
5623- /* .supports_op = */ ggml_backend_hexagon_device_supports_op ,
5633+ /* .supports_op = */ nullptr ,
56245634 /* .supports_buft = */ ggml_backend_hexagon_device_supports_buft,
56255635 /* .offload_op = */ nullptr ,
56265636 /* .event_new = */ nullptr ,
@@ -5719,23 +5729,33 @@ ggml_backend_reg_t ggml_backend_hexagon_reg() {
57195729
57205730 // case-2: normal scenario, such as llama-cli or UI applicaton
57215731 ggmlhexagon_load_cfg ();
5722- GGMLHEXAGON_LOG_INFO (" hwaccel approach=%d(%s)" , g_hexagon_appcfg.hwaccel_approach ,
5732+ GGMLHEXAGON_LOG_DEBUG (" hwaccel approach=%d(%s)" , g_hexagon_appcfg.hwaccel_approach ,
57235733 ggmlhexagon_get_hwaccel_approach_name (g_hexagon_appcfg.hwaccel_approach ));
5724- GGMLHEXAGON_LOG_INFO (" user's specified hexagon_backend=%d" , g_hexagon_appcfg.hexagon_backend );
5725- GGMLHEXAGON_LOG_INFO (" user's specified runtime lib path=%s" , g_hexagon_appcfg.runtimelib_path );
5734+ GGMLHEXAGON_LOG_DEBUG (" user's specified hexagon_backend=%d" , g_hexagon_appcfg.hexagon_backend );
5735+ GGMLHEXAGON_LOG_DEBUG (" user's specified runtime lib path=%s" , g_hexagon_appcfg.runtimelib_path );
57265736 if (g_hexagon_appcfg.hexagon_backend >= GGML_HEXAGON_MAX_DEVICES) {
5727- GGMLHEXAGON_LOG_INFO (" assume default ggml backend" );
5737+ GGMLHEXAGON_LOG_INFO (" using default ggml backend" );
57285738 GGMLHEXAGON_LOG_DEBUG (" leave ggml_backend_hexagon_reg" );
57295739 return nullptr ;
57305740 }
57315741
5742+ if (HWACCEL_QNN_SINGLEGRAPH == g_hexagon_appcfg.hwaccel_approach ) {
5743+ GGMLHEXAGON_LOG_INFO (" HWACCEL_QNN_SINGLEGRAPH not supported, using default ggml backend" );
5744+ return nullptr ;
5745+ }
5746+
57325747 {
57335748 static std::mutex mutex;
57345749 std::lock_guard<std::mutex> lock (mutex);
57355750 if (!initialized) {
57365751 ggml_backend_hexagon_reg_context * ctx = new ggml_backend_hexagon_reg_context;
57375752
57385753 for (int i = 0 ; i < ggml_backend_hexagon_get_device_count (); i++) {
5754+ if (g_hexagon_appcfg.hwaccel_approach == HWACCEL_CDSP) {
5755+ ggml_backend_hexagon_device_interface.supports_op = ggmlhexagon_can_handle_op_through_cdsp;
5756+ } else {
5757+ ggml_backend_hexagon_device_interface.supports_op = ggmlhexagon_can_handle_op_through_qnn;
5758+ }
57395759 ggml_backend_dev_t dev = new ggml_backend_device {
57405760 /* .iface = */ ggml_backend_hexagon_device_interface,
57415761 /* .reg = */ ®,
@@ -5763,18 +5783,18 @@ const char * ggml_backend_hexagon_get_devname(size_t dev_num) {
57635783 if (dev_num == HEXAGON_BACKEND_GGML)
57645784 return " ggml" ;
57655785 else
5766- return " ggml-hexagon " ;
5786+ return " HEXAGON_BACKEND_CDSP " ;
57675787 }
57685788
57695789 switch (dev_num) {
57705790 case HEXAGON_BACKEND_QNNCPU:
5771- return " QNN-CPU " ;
5791+ return " HEXAGON_BACKEND_QNN_CPU " ;
57725792 case HEXAGON_BACKEND_QNNGPU:
5773- return " QNN-GPU " ;
5793+ return " HEXAGON_BACKEND_QNN_GPU " ;
57745794 case HEXAGON_BACKEND_QNNNPU:
5775- return " QNN-NPU " ;
5795+ return " HEXAGON_BACKEND_QNN_NPU " ;
57765796 case HEXAGON_BACKEND_GGML:
5777- return " ggml" ; // "fake" QNN backend, used for compare performance between QNN backend and original GGML
5797+ return " ggml" ; // "fake" QNN backend, used for compare performance between hexagon backend and the default ggml backend
57785798 default :
57795799 return " unknown" ;
57805800 }
@@ -5826,6 +5846,11 @@ ggml_backend_t ggml_backend_hexagon_init(size_t device, const char * qnn_lib_pat
58265846 if (nullptr == qnn_lib_path)
58275847 return nullptr ;
58285848
5849+ if (HWACCEL_QNN_SINGLEGRAPH == g_hexagon_appcfg.hwaccel_approach ) {
5850+ GGMLHEXAGON_LOG_INFO (" HWACCEL_QNN_SINGLEGRAPH not supported, using default ggml backend" );
5851+ return nullptr ;
5852+ }
5853+
58295854 GGMLHEXAGON_LOG_DEBUG (" device %d" , device);
58305855 GGMLHEXAGON_LOG_DEBUG (" qnn_lib_path %s" , qnn_lib_path);
58315856 if (device >= GGML_HEXAGON_MAX_DEVICES) {
0 commit comments