@@ -868,7 +868,8 @@ static void ggmlhexagon_print_running_timestamp(ggml_backend_hexagon_context * c
868868 memset (timestamp, 0 , GGMLHEXAGON_TMPBUF_LEN);
869869 ggmlhexagon_get_timestring (timestamp);
870870 if (HWACCEL_CDSP == g_hexagon_appcfg.hwaccel_approach ) {
871- GGMLHEXAGON_LOG_INFO (" only offload GGML_OP_ADD : %s" , g_hexagon_appcfg.enable_q_mulmat ? " NO" : " YES" );
871+ GGMLHEXAGON_LOG_INFO (" only offload GGML_OP_ADD: %s" , g_hexagon_appcfg.enable_mulmat_cdsp ? " NO" : " YES" );
872+ GGMLHEXAGON_LOG_INFO (" offload quantize GGML_OP_MUL_MAT: %s" , g_hexagon_appcfg.enable_q_mulmat ? " YES" : " NO" );
872873 } else {
873874 GGMLHEXAGON_LOG_INFO (" only offload GGML_OP_ADD: NO" );
874875 }
@@ -1437,7 +1438,7 @@ static void ggmlhexagon_load_cfg() {
14371438 qnncfg_instance.load (cfg_filename);
14381439 qnncfg_instance.dump ([](const std::string & section, const std::string & key, const std::string value) {
14391440 std::ostringstream tmposs;
1440- tmposs << " section[" << std::setw (10 ) << std::left << section << " ],[" << std::setw (25 ) << std::left << key << " ] = [" << value << " ]" << std::endl ;
1441+ tmposs << " section[" << std::setw (10 ) << std::left << section << " ],[" << std::setw (25 ) << std::left << key << " ] = [" << value << " ]" ;
14411442 GGMLHEXAGON_LOG_INFO (" %s" , tmposs.str ().c_str ());
14421443 });
14431444 std::string precision_mode;
@@ -1453,11 +1454,10 @@ static void ggmlhexagon_load_cfg() {
14531454 qnncfg_instance.get_stringvalue (" qnn" , " precision_mode" , precision_mode, " fp32" );
14541455 qnncfg_instance.get_intvalue (" cdsp" , " enable_mulmat_cdsp" , g_hexagon_appcfg.enable_mulmat_cdsp , 0 );
14551456 qnncfg_instance.get_intvalue (" cdsp" , " enable_q_mulmat" , g_hexagon_appcfg.enable_q_mulmat , 0 );
1456- GGMLHEXAGON_LOG_INFO (" print_qnn_internal_log=%d" , g_hexagon_appcfg.print_qnn_internal_log );
14571457 GGMLHEXAGON_LOG_INFO (" hwaccel_approach=%d(%s)" , g_hexagon_appcfg.hwaccel_approach ,
14581458 ggmlhexagon_get_hwaccel_approach_name (g_hexagon_appcfg.hwaccel_approach ));
1459- GGMLHEXAGON_LOG_INFO (" hexagon_backend=%d" , g_hexagon_appcfg.hexagon_backend );
1460- GGMLHEXAGON_LOG_INFO ( " npu inference precision mode=%s " , precision_mode. c_str ( ));
1459+ GGMLHEXAGON_LOG_INFO (" hexagon_backend=%d(%s) " , g_hexagon_appcfg.hexagon_backend ,
1460+ ggml_backend_hexagon_get_devname (g_hexagon_appcfg. hexagon_backend ));
14611461 GGMLHEXAGON_LOG_INFO (" qnn runtime lib path=%s" , g_hexagon_appcfg.runtimelib_path );
14621462 if (precision_mode.find (" fp16" ) != std::string::npos) {
14631463 g_hexagon_appcfg.precision_mode = 1 ;
@@ -4976,9 +4976,6 @@ static void ggmlhexagon_compute(ggml_backend_hexagon_context * ctx, struct ggml_
49764976 dsptensor_2.nb [2 ] = dst->nb [2 ];
49774977 dsptensor_2.nb [3 ] = dst->nb [3 ];
49784978
4979- // GGMLQNN_DUMP_DSPTENSOR(&dsptensor_0);
4980- // GGMLQNN_DUMP_DSPTENSOR(&dsptensor_1);
4981- // GGMLQNN_DUMP_DSPTENSOR(&dsptensor_2);
49824979 hexagon_error = op_func (ctx->ggmlop_handle , &dsptensor_0, &dsptensor_1, &dsptensor_2);
49834980 if (AEE_SUCCESS != hexagon_error) {
49844981 GGMLHEXAGON_LOG_WARN (" ggmlop %s computation fail on cdsp" , ggml_op_name (op->op ));
@@ -4991,18 +4988,23 @@ static void ggmlhexagon_compute(ggml_backend_hexagon_context * ctx, struct ggml_
49914988// =================================================================================================
49924989// section-8: implementation of ggml-hexagon backend according to specification in ggml backend subsystem
49934990// =================================================================================================
4994- // hwaccel through cDSP
4995- static bool ggmlhexagon_can_handle_op ( const ggml_backend_hexagon_context * ctx, const struct ggml_tensor * op_tensor) {
4991+ static bool ggmlhexagon_can_handle_op_through_cdsp ( ggml_backend_dev_t dev, const struct ggml_tensor * op_tensor) {
4992+ ggml_backend_hexagon_context * ctx = (ggml_backend_hexagon_context *)dev-> context ;
49964993 ggmlhexagon_dump_op_info (op_tensor);
4994+
49974995 if (!ggmlhexagon_k_op_caps[ggmlhexagon_get_op_index (op_tensor)].supported ) {
49984996 return false ;
49994997 }
50004998
50014999 struct ggml_tensor * src0 = op_tensor->src [0 ];
50025000 struct ggml_tensor * src1 = op_tensor->src [1 ];
5003- const int64_t ne00 = op_tensor-> src [ 0 ]-> ne [ 0 ] ;
5004- uint32_t src0_rank = ggml_n_dims (src0) ;
5001+ int64_t ne00 = 0 ;
5002+ uint32_t src0_rank = 0 ;
50055003 uint32_t src1_rank = 0 ;
5004+ if (nullptr != src0) {
5005+ src0_rank = ggml_n_dims (src0);
5006+ ne00 = src0->ne [0 ];
5007+ }
50065008 if (nullptr != src1) {
50075009 src1_rank = ggml_n_dims (src1);
50085010 }
@@ -5024,7 +5026,11 @@ static bool ggmlhexagon_can_handle_op(const ggml_backend_hexagon_context * ctx,
50245026 if (!ggml_are_same_shape (src0, src1)) {
50255027 return false ;
50265028 }
5027- break ;
5029+
5030+ if (ne00 < 32 )
5031+ return false ;
5032+
5033+ return ggmlhexagon_same_types (ctx, op_tensor);
50285034 }
50295035 case GGML_OP_MUL_MAT:
50305036 {
@@ -5046,24 +5052,25 @@ static bool ggmlhexagon_can_handle_op(const ggml_backend_hexagon_context * ctx,
50465052 return (src0->type == GGML_TYPE_F32) && (src1->type == GGML_TYPE_F32) && (op_tensor->type == GGML_TYPE_F32);
50475053}
50485054
5049- static bool ggmlbackend_can_handle_op (const ggml_backend_hexagon_context * ctx, const struct ggml_tensor * op_tensor) {
5055+ static bool ggmlhexagon_can_handle_op_through_qnn (ggml_backend_dev_t dev, const struct ggml_tensor * op_tensor) {
5056+ ggml_backend_hexagon_context * ctx = (ggml_backend_hexagon_context *)dev->context ;
50505057 if (op_tensor->op == GGML_OP_NONE) {
50515058 return true ;
50525059 }
50535060
5054- if (HWACCEL_CDSP == g_hexagon_appcfg.hwaccel_approach ) {
5055- return ggmlhexagon_can_handle_op (ctx, op_tensor);
5056- }
5057-
50585061 if (!ggmlqnn_k_op_caps[ggmlhexagon_get_op_index (op_tensor)].supported ) {
50595062 return false ;
50605063 }
50615064
50625065 struct ggml_tensor * src0 = op_tensor->src [0 ];
50635066 struct ggml_tensor * src1 = op_tensor->src [1 ];
5064- const int64_t ne00 = op_tensor-> src [ 0 ]-> ne [ 0 ] ;
5065- uint32_t src0_rank = ggml_n_dims (src0) ;
5067+ int64_t ne00 = 0 ;
5068+ uint32_t src0_rank = 0 ;
50665069 uint32_t src1_rank = 0 ;
5070+ if (nullptr != src0) {
5071+ src0_rank = ggml_n_dims (src0);
5072+ ne00 = src0->ne [0 ];
5073+ }
50675074 if (nullptr != src1) {
50685075 src1_rank = ggml_n_dims (src1);
50695076 }
@@ -5542,6 +5549,11 @@ static ggml_backend_t ggml_backend_hexagon_device_init_backend(ggml_backend_dev_
55425549 GGMLHEXAGON_LOG_DEBUG (" user's specified hexagon_backend in cfgfile = %d" , g_hexagon_appcfg.hexagon_backend );
55435550 GGMLHEXAGON_LOG_DEBUG (" user's sepcified qnn runtime lib path in cfgfile = %s" , g_hexagon_appcfg.runtimelib_path );
55445551
5552+ if (HWACCEL_QNN_SINGLEGRAPH == g_hexagon_appcfg.hwaccel_approach ) {
5553+ GGMLHEXAGON_LOG_INFO (" HWACCEL_QNN_SINGLEGRAPH not supported, using default ggml backend" );
5554+ return nullptr ;
5555+ }
5556+
55455557 if (nullptr == params) {
55465558 GGMLHEXAGON_LOG_DEBUG (" program specified param is nullptr" );
55475559 dev_index = (g_hexagon_appcfg.hexagon_backend > 0 ) ? g_hexagon_appcfg.hexagon_backend : 0 ;
@@ -5600,11 +5612,6 @@ static ggml_backend_buffer_t ggml_backend_hexagon_device_buffer_from_host_ptr(gg
56005612 GGML_UNUSED (max_tensor_size);
56015613}
56025614
5603- static bool ggml_backend_hexagon_device_supports_op (ggml_backend_dev_t dev, const struct ggml_tensor * op) {
5604- ggml_backend_hexagon_context * ctx = (ggml_backend_hexagon_context *) dev->context ;
5605- return (ggmlbackend_can_handle_op (ctx,op));
5606- }
5607-
56085615static bool ggml_backend_hexagon_device_supports_buft (ggml_backend_dev_t dev, ggml_backend_buffer_type_t buft) {
56095616 GGML_UNUSED (dev);
56105617 return ggml_backend_buft_is_host (buft);
@@ -5620,7 +5627,7 @@ static struct ggml_backend_device_i ggml_backend_hexagon_device_interface = {
56205627 /* .get_buffer_type = */ ggml_backend_hexagon_device_get_buffer_type,
56215628 /* .get_host_buffer_type = */ nullptr ,
56225629 /* .buffer_from_host_ptr = */ ggml_backend_hexagon_device_buffer_from_host_ptr,
5623- /* .supports_op = */ ggml_backend_hexagon_device_supports_op ,
5630+ /* .supports_op = */ nullptr ,
56245631 /* .supports_buft = */ ggml_backend_hexagon_device_supports_buft,
56255632 /* .offload_op = */ nullptr ,
56265633 /* .event_new = */ nullptr ,
@@ -5719,23 +5726,33 @@ ggml_backend_reg_t ggml_backend_hexagon_reg() {
57195726
57205727 // case-2: normal scenario, such as llama-cli or UI applicaton
57215728 ggmlhexagon_load_cfg ();
5722- GGMLHEXAGON_LOG_INFO (" hwaccel approach=%d(%s)" , g_hexagon_appcfg.hwaccel_approach ,
5729+ GGMLHEXAGON_LOG_DEBUG (" hwaccel approach=%d(%s)" , g_hexagon_appcfg.hwaccel_approach ,
57235730 ggmlhexagon_get_hwaccel_approach_name (g_hexagon_appcfg.hwaccel_approach ));
5724- GGMLHEXAGON_LOG_INFO (" user's specified hexagon_backend=%d" , g_hexagon_appcfg.hexagon_backend );
5725- GGMLHEXAGON_LOG_INFO (" user's specified runtime lib path=%s" , g_hexagon_appcfg.runtimelib_path );
5731+ GGMLHEXAGON_LOG_DEBUG (" user's specified hexagon_backend=%d" , g_hexagon_appcfg.hexagon_backend );
5732+ GGMLHEXAGON_LOG_DEBUG (" user's specified runtime lib path=%s" , g_hexagon_appcfg.runtimelib_path );
57265733 if (g_hexagon_appcfg.hexagon_backend >= GGML_HEXAGON_MAX_DEVICES) {
5727- GGMLHEXAGON_LOG_INFO (" assume default ggml backend" );
5734+ GGMLHEXAGON_LOG_INFO (" using default ggml backend" );
57285735 GGMLHEXAGON_LOG_DEBUG (" leave ggml_backend_hexagon_reg" );
57295736 return nullptr ;
57305737 }
57315738
5739+ if (HWACCEL_QNN_SINGLEGRAPH == g_hexagon_appcfg.hwaccel_approach ) {
5740+ GGMLHEXAGON_LOG_INFO (" HWACCEL_QNN_SINGLEGRAPH not supported, using default ggml backend" );
5741+ return nullptr ;
5742+ }
5743+
57325744 {
57335745 static std::mutex mutex;
57345746 std::lock_guard<std::mutex> lock (mutex);
57355747 if (!initialized) {
57365748 ggml_backend_hexagon_reg_context * ctx = new ggml_backend_hexagon_reg_context;
57375749
57385750 for (int i = 0 ; i < ggml_backend_hexagon_get_device_count (); i++) {
5751+ if (g_hexagon_appcfg.hwaccel_approach == HWACCEL_CDSP) {
5752+ ggml_backend_hexagon_device_interface.supports_op = ggmlhexagon_can_handle_op_through_cdsp;
5753+ } else {
5754+ ggml_backend_hexagon_device_interface.supports_op = ggmlhexagon_can_handle_op_through_qnn;
5755+ }
57395756 ggml_backend_dev_t dev = new ggml_backend_device {
57405757 /* .iface = */ ggml_backend_hexagon_device_interface,
57415758 /* .reg = */ ®,
@@ -5763,18 +5780,18 @@ const char * ggml_backend_hexagon_get_devname(size_t dev_num) {
57635780 if (dev_num == HEXAGON_BACKEND_GGML)
57645781 return " ggml" ;
57655782 else
5766- return " ggml-hexagon " ;
5783+ return " HEXAGON_BACKEND_CDSP " ;
57675784 }
57685785
57695786 switch (dev_num) {
57705787 case HEXAGON_BACKEND_QNNCPU:
5771- return " QNN-CPU " ;
5788+ return " HEXAGON_BACKEND_QNN_CPU " ;
57725789 case HEXAGON_BACKEND_QNNGPU:
5773- return " QNN-GPU " ;
5790+ return " HEXAGON_BACKEND_QNN_GPU " ;
57745791 case HEXAGON_BACKEND_QNNNPU:
5775- return " QNN-NPU " ;
5792+ return " HEXAGON_BACKEND_QNN_NPU " ;
57765793 case HEXAGON_BACKEND_GGML:
5777- return " ggml" ; // "fake" QNN backend, used for compare performance between QNN backend and original GGML
5794+ return " ggml" ; // "fake" QNN backend, used for compare performance between hexagon backend and the default ggml backend
57785795 default :
57795796 return " unknown" ;
57805797 }
@@ -5826,6 +5843,11 @@ ggml_backend_t ggml_backend_hexagon_init(size_t device, const char * qnn_lib_pat
58265843 if (nullptr == qnn_lib_path)
58275844 return nullptr ;
58285845
5846+ if (HWACCEL_QNN_SINGLEGRAPH == g_hexagon_appcfg.hwaccel_approach ) {
5847+ GGMLHEXAGON_LOG_INFO (" HWACCEL_QNN_SINGLEGRAPH not supported, using default ggml backend" );
5848+ return nullptr ;
5849+ }
5850+
58295851 GGMLHEXAGON_LOG_DEBUG (" device %d" , device);
58305852 GGMLHEXAGON_LOG_DEBUG (" qnn_lib_path %s" , qnn_lib_path);
58315853 if (device >= GGML_HEXAGON_MAX_DEVICES) {
0 commit comments