@@ -751,8 +751,10 @@ static void ggmlhexagon_log_internal(ggml_log_level level, const char * file, co
751751static void ggmlhexagon_print_tensors_info (const char * func_name, const ggml_backend_hexagon_context * ctx,
752752 const ggml_tensor * src0, const ggml_tensor * src1, const ggml_tensor * dst) {
753753 // skip sanity check of params because of performance concern
754- if (0 == g_hexagon_appcfg.print_tensors_info )
755- return ;
754+ if (0 == g_hexagon_appcfg.dump_op_info ) {
755+ if (0 == g_hexagon_appcfg.print_tensors_info )
756+ return ;
757+ }
756758
757759 if (nullptr != func_name && nullptr != ctx) {
758760 GGMLHEXAGON_LOG_DEBUG (" call %s in dev %s\n " , func_name, ctx->name );
@@ -862,13 +864,17 @@ static void ggmlhexagon_get_timestring(char * p_currenttime) {
862864}
863865
864866static void ggmlhexagon_print_running_timestamp (ggml_backend_hexagon_context * ctx) {
865- GGMLHEXAGON_LOG_INFO (" hwaccel approach is %d(%s)" , g_hexagon_appcfg.hwaccel_approach ,
866- ggmlhexagon_get_hwaccel_approach_name (g_hexagon_appcfg.hwaccel_approach ));
867867 char timestamp[GGMLHEXAGON_TMPBUF_LEN];
868868 memset (timestamp, 0 , GGMLHEXAGON_TMPBUF_LEN);
869+
870+ GGMLHEXAGON_LOG_INFO (" hwaccel approach is %d(%s)" , g_hexagon_appcfg.hwaccel_approach ,
871+ ggmlhexagon_get_hwaccel_approach_name (g_hexagon_appcfg.hwaccel_approach ));
872+ GGMLHEXAGON_LOG_INFO (" hexagon_backend=%d(%s)" , g_hexagon_appcfg.hexagon_backend ,
873+ ggml_backend_hexagon_get_devname (g_hexagon_appcfg.hexagon_backend ));
869874 ggmlhexagon_get_timestring (timestamp);
870875 if (HWACCEL_CDSP == g_hexagon_appcfg.hwaccel_approach ) {
871- GGMLHEXAGON_LOG_INFO (" only offload GGML_OP_ADD : %s" , g_hexagon_appcfg.enable_q_mulmat ? " NO" : " YES" );
876+ GGMLHEXAGON_LOG_INFO (" only offload GGML_OP_ADD: %s" , g_hexagon_appcfg.enable_mulmat_cdsp ? " NO" : " YES" );
877+ GGMLHEXAGON_LOG_INFO (" offload quantize GGML_OP_MUL_MAT: %s" , g_hexagon_appcfg.enable_q_mulmat ? " YES" : " NO" );
872878 } else {
873879 GGMLHEXAGON_LOG_INFO (" only offload GGML_OP_ADD: NO" );
874880 }
@@ -1437,7 +1443,7 @@ static void ggmlhexagon_load_cfg() {
14371443 qnncfg_instance.load (cfg_filename);
14381444 qnncfg_instance.dump ([](const std::string & section, const std::string & key, const std::string value) {
14391445 std::ostringstream tmposs;
1440- tmposs << " section[" << std::setw (10 ) << std::left << section << " ],[" << std::setw (25 ) << std::left << key << " ] = [" << value << " ]" << std::endl ;
1446+ tmposs << " section[" << std::setw (10 ) << std::left << section << " ],[" << std::setw (25 ) << std::left << key << " ] = [" << value << " ]" ;
14411447 GGMLHEXAGON_LOG_INFO (" %s" , tmposs.str ().c_str ());
14421448 });
14431449 std::string precision_mode;
@@ -1453,11 +1459,10 @@ static void ggmlhexagon_load_cfg() {
14531459 qnncfg_instance.get_stringvalue (" qnn" , " precision_mode" , precision_mode, " fp32" );
14541460 qnncfg_instance.get_intvalue (" cdsp" , " enable_mulmat_cdsp" , g_hexagon_appcfg.enable_mulmat_cdsp , 0 );
14551461 qnncfg_instance.get_intvalue (" cdsp" , " enable_q_mulmat" , g_hexagon_appcfg.enable_q_mulmat , 0 );
1456- GGMLHEXAGON_LOG_INFO (" print_qnn_internal_log=%d" , g_hexagon_appcfg.print_qnn_internal_log );
14571462 GGMLHEXAGON_LOG_INFO (" hwaccel_approach=%d(%s)" , g_hexagon_appcfg.hwaccel_approach ,
14581463 ggmlhexagon_get_hwaccel_approach_name (g_hexagon_appcfg.hwaccel_approach ));
1459- GGMLHEXAGON_LOG_INFO (" hexagon_backend=%d" , g_hexagon_appcfg.hexagon_backend );
1460- GGMLHEXAGON_LOG_INFO ( " npu inference precision mode=%s " , precision_mode. c_str ( ));
1464+ GGMLHEXAGON_LOG_INFO (" hexagon_backend=%d(%s) " , g_hexagon_appcfg.hexagon_backend ,
1465+ ggml_backend_hexagon_get_devname (g_hexagon_appcfg. hexagon_backend ));
14611466 GGMLHEXAGON_LOG_INFO (" qnn runtime lib path=%s" , g_hexagon_appcfg.runtimelib_path );
14621467 if (precision_mode.find (" fp16" ) != std::string::npos) {
14631468 g_hexagon_appcfg.precision_mode = 1 ;
@@ -4853,7 +4858,7 @@ static int ggmlhexagon_init_dsp(ggml_backend_hexagon_context * ctx) {
48534858 ggmlop_domain_uri_len = strlen (ggmlop_URI) + MAX_DOMAIN_NAMELEN;
48544859 ggmlop_domain_uri = (char *)malloc (ggmlop_domain_uri_len);
48554860 snprintf (ggmlop_domain_uri, ggmlop_domain_uri_len, " %s%s" , ggmlop_URI, uri);
4856- GGMLHEXAGON_LOG_INFO (" ggmlop domain uri:%s" , ggmlop_domain_uri);
4861+ GGMLHEXAGON_LOG_DEBUG (" ggmlop domain uri:%s" , ggmlop_domain_uri);
48574862 hexagon_error = ggmlop_dsp_open (ggmlop_domain_uri, &ctx->ggmlop_handle );
48584863 if (AEE_SUCCESS == hexagon_error) {
48594864 GGMLHEXAGON_LOG_INFO (" succeed to open domain %d(%s)" , domain_id, ggmlhexagon_get_dsp_name (domain_id));
@@ -4976,9 +4981,6 @@ static void ggmlhexagon_compute(ggml_backend_hexagon_context * ctx, struct ggml_
49764981 dsptensor_2.nb [2 ] = dst->nb [2 ];
49774982 dsptensor_2.nb [3 ] = dst->nb [3 ];
49784983
4979- // GGMLQNN_DUMP_DSPTENSOR(&dsptensor_0);
4980- // GGMLQNN_DUMP_DSPTENSOR(&dsptensor_1);
4981- // GGMLQNN_DUMP_DSPTENSOR(&dsptensor_2);
49824984 hexagon_error = op_func (ctx->ggmlop_handle , &dsptensor_0, &dsptensor_1, &dsptensor_2);
49834985 if (AEE_SUCCESS != hexagon_error) {
49844986 GGMLHEXAGON_LOG_WARN (" ggmlop %s computation fail on cdsp" , ggml_op_name (op->op ));
@@ -4991,40 +4993,44 @@ static void ggmlhexagon_compute(ggml_backend_hexagon_context * ctx, struct ggml_
49914993// =================================================================================================
49924994// section-8: implementation of ggml-hexagon backend according to specification in ggml backend subsystem
49934995// =================================================================================================
4994- // hwaccel through cDSP
4995- static bool ggmlhexagon_can_handle_op (const ggml_backend_hexagon_context * ctx, const struct ggml_tensor * op_tensor) {
4996- ggmlhexagon_dump_op_info (op_tensor);
4996+ static bool ggmlhexagon_can_handle_op_through_cdsp (ggml_backend_dev_t dev, const struct ggml_tensor * op_tensor) {
4997+ ggml_backend_hexagon_context * ctx = (ggml_backend_hexagon_context *)dev->context ;
4998+ GGML_UNUSED (ctx);
4999+ if (op_tensor->op == GGML_OP_NONE) {
5000+ return true ;
5001+ }
5002+
49975003 if (!ggmlhexagon_k_op_caps[ggmlhexagon_get_op_index (op_tensor)].supported ) {
49985004 return false ;
49995005 }
50005006
5001- struct ggml_tensor * src0 = op_tensor->src [0 ];
5002- struct ggml_tensor * src1 = op_tensor->src [1 ];
5003- const int64_t ne00 = op_tensor-> src [ 0 ]-> ne [ 0 ] ;
5004- uint32_t src0_rank = ggml_n_dims (src0) ;
5007+ const struct ggml_tensor * src0 = op_tensor->src [0 ];
5008+ const struct ggml_tensor * src1 = op_tensor->src [1 ];
5009+ int64_t ne00 = 0 ;
5010+ uint32_t src0_rank = 0 ;
50055011 uint32_t src1_rank = 0 ;
5012+ if (nullptr != src0) {
5013+ src0_rank = ggml_n_dims (src0);
5014+ ne00 = src0->ne [0 ];
5015+ }
50065016 if (nullptr != src1) {
50075017 src1_rank = ggml_n_dims (src1);
50085018 }
50095019
5010- // available in the early stage, should be removed in the product stage
5011- bool support = false ;
5012- if (g_hexagon_appcfg.enable_mulmat_cdsp )
5013- support = ((op_tensor->op == GGML_OP_ADD) || (op_tensor->op == GGML_OP_MUL_MAT));
5014- else
5015- support = (op_tensor->op == GGML_OP_ADD);
5016- if (!support) {
5017- return false ;
5018- }
5019-
50205020 switch (op_tensor->op ) {
50215021 case GGML_OP_ADD:
50225022 case GGML_OP_SUB:
50235023 {
50245024 if (!ggml_are_same_shape (src0, src1)) {
50255025 return false ;
50265026 }
5027- break ;
5027+
5028+ // FIXME:remove this filter
5029+ if (ne00 < 32 )
5030+ return false ;
5031+
5032+ // FIXME:remove this filter
5033+ return ggmlhexagon_same_types (ctx, op_tensor);
50285034 }
50295035 case GGML_OP_MUL_MAT:
50305036 {
@@ -5034,6 +5040,7 @@ static bool ggmlhexagon_can_handle_op(const ggml_backend_hexagon_context * ctx,
50345040 if (src0_rank != 2 )
50355041 return false ;
50365042
5043+ ggmlhexagon_dump_op_info (op_tensor);
50375044 if (g_hexagon_appcfg.enable_q_mulmat )
50385045 return (src0->type == GGML_TYPE_F32 || ggml_is_quantized (src0->type ))
50395046 && (src1->type == GGML_TYPE_F32) && (op_tensor->type == GGML_TYPE_F32);
@@ -5043,27 +5050,28 @@ static bool ggmlhexagon_can_handle_op(const ggml_backend_hexagon_context * ctx,
50435050 default :
50445051 break ;
50455052 }
5046- return (src0-> type == GGML_TYPE_F32) && (src1-> type == GGML_TYPE_F32) && (op_tensor-> type == GGML_TYPE_F32) ;
5053+ return false ;
50475054}
50485055
5049- static bool ggmlbackend_can_handle_op (const ggml_backend_hexagon_context * ctx, const struct ggml_tensor * op_tensor) {
5056+ static bool ggmlhexagon_can_handle_op_through_qnn (ggml_backend_dev_t dev, const struct ggml_tensor * op_tensor) {
5057+ ggml_backend_hexagon_context * ctx = (ggml_backend_hexagon_context *)dev->context ;
50505058 if (op_tensor->op == GGML_OP_NONE) {
50515059 return true ;
50525060 }
50535061
5054- if (HWACCEL_CDSP == g_hexagon_appcfg.hwaccel_approach ) {
5055- return ggmlhexagon_can_handle_op (ctx, op_tensor);
5056- }
5057-
50585062 if (!ggmlqnn_k_op_caps[ggmlhexagon_get_op_index (op_tensor)].supported ) {
50595063 return false ;
50605064 }
50615065
50625066 struct ggml_tensor * src0 = op_tensor->src [0 ];
50635067 struct ggml_tensor * src1 = op_tensor->src [1 ];
5064- const int64_t ne00 = op_tensor-> src [ 0 ]-> ne [ 0 ] ;
5065- uint32_t src0_rank = ggml_n_dims (src0) ;
5068+ int64_t ne00 = 0 ;
5069+ uint32_t src0_rank = 0 ;
50665070 uint32_t src1_rank = 0 ;
5071+ if (nullptr != src0) {
5072+ src0_rank = ggml_n_dims (src0);
5073+ ne00 = src0->ne [0 ];
5074+ }
50675075 if (nullptr != src1) {
50685076 src1_rank = ggml_n_dims (src1);
50695077 }
@@ -5542,6 +5550,11 @@ static ggml_backend_t ggml_backend_hexagon_device_init_backend(ggml_backend_dev_
55425550 GGMLHEXAGON_LOG_DEBUG (" user's specified hexagon_backend in cfgfile = %d" , g_hexagon_appcfg.hexagon_backend );
55435551 GGMLHEXAGON_LOG_DEBUG (" user's sepcified qnn runtime lib path in cfgfile = %s" , g_hexagon_appcfg.runtimelib_path );
55445552
5553+ if (HWACCEL_QNN_SINGLEGRAPH == g_hexagon_appcfg.hwaccel_approach ) {
5554+ GGMLHEXAGON_LOG_INFO (" HWACCEL_QNN_SINGLEGRAPH not supported, using default ggml backend" );
5555+ return nullptr ;
5556+ }
5557+
55455558 if (nullptr == params) {
55465559 GGMLHEXAGON_LOG_DEBUG (" program specified param is nullptr" );
55475560 dev_index = (g_hexagon_appcfg.hexagon_backend > 0 ) ? g_hexagon_appcfg.hexagon_backend : 0 ;
@@ -5600,11 +5613,6 @@ static ggml_backend_buffer_t ggml_backend_hexagon_device_buffer_from_host_ptr(gg
56005613 GGML_UNUSED (max_tensor_size);
56015614}
56025615
5603- static bool ggml_backend_hexagon_device_supports_op (ggml_backend_dev_t dev, const struct ggml_tensor * op) {
5604- ggml_backend_hexagon_context * ctx = (ggml_backend_hexagon_context *) dev->context ;
5605- return (ggmlbackend_can_handle_op (ctx,op));
5606- }
5607-
56085616static bool ggml_backend_hexagon_device_supports_buft (ggml_backend_dev_t dev, ggml_backend_buffer_type_t buft) {
56095617 GGML_UNUSED (dev);
56105618 return ggml_backend_buft_is_host (buft);
@@ -5620,7 +5628,7 @@ static struct ggml_backend_device_i ggml_backend_hexagon_device_interface = {
56205628 /* .get_buffer_type = */ ggml_backend_hexagon_device_get_buffer_type,
56215629 /* .get_host_buffer_type = */ nullptr ,
56225630 /* .buffer_from_host_ptr = */ ggml_backend_hexagon_device_buffer_from_host_ptr,
5623- /* .supports_op = */ ggml_backend_hexagon_device_supports_op ,
5631+ /* .supports_op = */ nullptr ,
56245632 /* .supports_buft = */ ggml_backend_hexagon_device_supports_buft,
56255633 /* .offload_op = */ nullptr ,
56265634 /* .event_new = */ nullptr ,
@@ -5719,23 +5727,33 @@ ggml_backend_reg_t ggml_backend_hexagon_reg() {
57195727
57205728 // case-2: normal scenario, such as llama-cli or UI applicaton
57215729 ggmlhexagon_load_cfg ();
5722- GGMLHEXAGON_LOG_INFO (" hwaccel approach=%d(%s)" , g_hexagon_appcfg.hwaccel_approach ,
5730+ GGMLHEXAGON_LOG_DEBUG (" hwaccel approach=%d(%s)" , g_hexagon_appcfg.hwaccel_approach ,
57235731 ggmlhexagon_get_hwaccel_approach_name (g_hexagon_appcfg.hwaccel_approach ));
5724- GGMLHEXAGON_LOG_INFO (" user's specified hexagon_backend=%d" , g_hexagon_appcfg.hexagon_backend );
5725- GGMLHEXAGON_LOG_INFO (" user's specified runtime lib path=%s" , g_hexagon_appcfg.runtimelib_path );
5732+ GGMLHEXAGON_LOG_DEBUG (" user's specified hexagon_backend=%d" , g_hexagon_appcfg.hexagon_backend );
5733+ GGMLHEXAGON_LOG_DEBUG (" user's specified runtime lib path=%s" , g_hexagon_appcfg.runtimelib_path );
57265734 if (g_hexagon_appcfg.hexagon_backend >= GGML_HEXAGON_MAX_DEVICES) {
5727- GGMLHEXAGON_LOG_INFO (" assume default ggml backend" );
5735+ GGMLHEXAGON_LOG_INFO (" using default ggml backend" );
57285736 GGMLHEXAGON_LOG_DEBUG (" leave ggml_backend_hexagon_reg" );
57295737 return nullptr ;
57305738 }
57315739
5740+ if (HWACCEL_QNN_SINGLEGRAPH == g_hexagon_appcfg.hwaccel_approach ) {
5741+ GGMLHEXAGON_LOG_INFO (" HWACCEL_QNN_SINGLEGRAPH not supported, using default ggml backend" );
5742+ return nullptr ;
5743+ }
5744+
57325745 {
57335746 static std::mutex mutex;
57345747 std::lock_guard<std::mutex> lock (mutex);
57355748 if (!initialized) {
57365749 ggml_backend_hexagon_reg_context * ctx = new ggml_backend_hexagon_reg_context;
57375750
57385751 for (int i = 0 ; i < ggml_backend_hexagon_get_device_count (); i++) {
5752+ if (g_hexagon_appcfg.hwaccel_approach == HWACCEL_CDSP) {
5753+ ggml_backend_hexagon_device_interface.supports_op = ggmlhexagon_can_handle_op_through_cdsp;
5754+ } else {
5755+ ggml_backend_hexagon_device_interface.supports_op = ggmlhexagon_can_handle_op_through_qnn;
5756+ }
57395757 ggml_backend_dev_t dev = new ggml_backend_device {
57405758 /* .iface = */ ggml_backend_hexagon_device_interface,
57415759 /* .reg = */ ®,
@@ -5763,18 +5781,18 @@ const char * ggml_backend_hexagon_get_devname(size_t dev_num) {
57635781 if (dev_num == HEXAGON_BACKEND_GGML)
57645782 return " ggml" ;
57655783 else
5766- return " ggml-hexagon " ;
5784+ return " HEXAGON_BACKEND_CDSP " ;
57675785 }
57685786
57695787 switch (dev_num) {
57705788 case HEXAGON_BACKEND_QNNCPU:
5771- return " QNN-CPU " ;
5789+ return " HEXAGON_BACKEND_QNN_CPU " ;
57725790 case HEXAGON_BACKEND_QNNGPU:
5773- return " QNN-GPU " ;
5791+ return " HEXAGON_BACKEND_QNN_GPU " ;
57745792 case HEXAGON_BACKEND_QNNNPU:
5775- return " QNN-NPU " ;
5793+ return " HEXAGON_BACKEND_QNN_NPU " ;
57765794 case HEXAGON_BACKEND_GGML:
5777- return " ggml" ; // "fake" QNN backend, used for compare performance between QNN backend and original GGML
5795+ return " ggml" ; // "fake" QNN backend, used for compare performance between hexagon backend and the default ggml backend
57785796 default :
57795797 return " unknown" ;
57805798 }
@@ -5826,6 +5844,11 @@ ggml_backend_t ggml_backend_hexagon_init(size_t device, const char * qnn_lib_pat
58265844 if (nullptr == qnn_lib_path)
58275845 return nullptr ;
58285846
5847+ if (HWACCEL_QNN_SINGLEGRAPH == g_hexagon_appcfg.hwaccel_approach ) {
5848+ GGMLHEXAGON_LOG_INFO (" HWACCEL_QNN_SINGLEGRAPH not supported, using default ggml backend" );
5849+ return nullptr ;
5850+ }
5851+
58295852 GGMLHEXAGON_LOG_DEBUG (" device %d" , device);
58305853 GGMLHEXAGON_LOG_DEBUG (" qnn_lib_path %s" , qnn_lib_path);
58315854 if (device >= GGML_HEXAGON_MAX_DEVICES) {
0 commit comments