1414 * section-6 Hexagon DSP helper function
1515 * section-7 backend helper function / class
1616 * section-8 implementation of ggml-hexagon backend according to specification in ggml backend subsystem
17- * section-9 implementation of general approach through QNN and Hexagon DSP
17+ * section-9 implementation of hwaccel approach through QNN and Hexagon DSP
1818 *
1919 * currently provide following ggml op' implementation through QNN:
2020 * - GGML_OP_ADD/GGML_OP_SUB/GGML_OP_MUL/GGML_OP_DIV/GGML_OP_LOG/GGML_OP_SQRT:
21- * this is a simple skeleton, can expand other ggml ops according to expertise
21+ * this is a simple hwaccel skeleton, can expand other ggml ops according to expertise
2222 * - GGML_OP_MUL_MAT:
23- * this is a complicated skeleton, can expand other ggml ops accordingly
23+ * this is a complicated hwaccel skeleton, can expand other ggml ops accordingly
2424 *
2525 * currently provide following ggml op' implementation through Hexagon DSP:
2626 * - GGML_OP_ADD & GGML_OP_MUL_MAT:
27- * this is a skeleton, can expand other ggml ops accordingly
27+ * this is a hwaccel skeleton, can expand other ggml ops accordingly
2828 *
2929 * Permission is hereby granted, free of charge, to any person obtaining a copy
3030 * of this software and associated documentation files (the "Software"), to
@@ -230,7 +230,7 @@ static void ggmlqnn_compute_diag_mask(ggml_backend_qnn_context * ctx, ggml_ten
230230
231231#define GGMLQNN_CHECK_PARAMS (ctx, src0, src1, dst ) \
232232 do { \
233- if (g_qnn_params.inference_approach != DIRECT_USE_CDSP ) { \
233+ if (g_qnn_params.hwaccel_approach != HWACCEL_CDSP ) { \
234234 if (!ggmlqnn_is_valid_params ((ctx), (src0), (src1), (dst))) { \
235235 return ; \
236236 } \
@@ -270,12 +270,12 @@ enum qnn_profile_level {
270270};
271271
272272// 0: general approach through QNN:offload ggmlop to QNN
273- // 1: general approach through Hexagon cDSP:offload ggmlop to Hexagon cDSP directly
274- // 2: special approach through QNN:mapping entire ggml cgraph to a single QNN graph
275- enum inference_approach {
276- QNN_GENERAL = 0 ,
277- DIRECT_USE_CDSP = 1 ,
278- QNN_SINGLEGRAPH = 2 ,
273+ // 1: special approach through QNN-SINGLEGRAPH:mapping entire ggml cgraph to a single QNN graph
274+ // 2: general approach through Hexagon cDSP:offload ggmlop to Hexagon cDSP directly
275+ enum hwaccel_approach_type {
276+ HWACCEL_QNN = 0 ,
277+ HWACCEL_QNN_SINGLEGRAPH = 1 ,
278+ HWACCEL_CDSP = 2 ,
279279};
280280
281281enum hexagon_dsp_type {
@@ -362,7 +362,7 @@ struct qnn_parameter {
362362 int hvx_threads;
363363 int vtcm_size_in_mb;
364364 int enable_dlbc;
365- int inference_approach ; // 0: QNN_GENERAL 1: DIRECT_USE_CDSP 2: QNN_SINGELGRAPH
365+ int hwaccel_approach ; // 0: HWACCEL_QNN 1: HWACCEL_QNN_SINGLEGRAPH 2: HWACCEL_CDSP
366366 int qnn_backend; // 0: QNN-CPU backend 1: QNN-GPU backend 2: QNN-NPU backend
367367 int enable_mulmat_cdsp; // enable/disable offload mulmat to cDSP
368368 int enable_q_mulmat; // enable/disable offload fp32 & all quantized type mulmat to cDSP
@@ -382,8 +382,8 @@ static struct qnn_parameter g_qnn_params = {
382382 .hvx_threads = 4 ,
383383 .vtcm_size_in_mb = 8 ,
384384 .enable_dlbc = 1 ,
385- .inference_approach = 0 ,
386- .qnn_backend = 2 , // default is QNN-NPU backend
385+ .hwaccel_approach = HWACCEL_CDSP ,
386+ .qnn_backend = QNN_BACKEND_NPU,
387387 .enable_mulmat_cdsp = 0 ,
388388 .enable_q_mulmat = 0 ,
389389 .qnn_cfgfilename = " ggml-qnn.cfg" ,
@@ -1578,13 +1578,12 @@ static void ggmlhexagon_set_rpc_latency(int domain, int qos, int latency) {
15781578
15791579 if (remote_handle_control) {
15801580 struct remote_rpc_control_latency data;
1581- #if 1
1582- data.enable = RPC_PM_QOS;
1583- data.latency = 300 ;
1584- #else
1585- data.enable = RPC_POLL_QOS;
1586- data.latency = 1000;
1587- #endif
1581+ /*
1582+ qos | latency
1583+ -----------------------
1584+ RPC_PM_QOS | 300
1585+ RPC_POLL_QOS | 1000
1586+ */
15881587 data.enable = qos;
15891588 data.latency = latency;
15901589 hexagon_error = remote_handle64_control (DSPRPC_GET_DSP_INFO, DSPRPC_CONTROL_LATENCY, (void *)&data, sizeof (data));
@@ -1926,7 +1925,7 @@ static int ggmlhexagon_init_dsp(ggml_backend_qnn_context * ctx) {
19261925 }
19271926
19281927 if (-1 == domain_id) {
1929- if (NULL != domain_type) {
1928+ if (nullptr != domain_type) {
19301929 if ((strcmp (domain_type, " NSP" ) != 0 && strcmp (domain_type, " HPASS" ) != 0 )) {
19311930 GGMLQNN_LOG_WARN (" invalid domain_type %s. possible values are NSP or HPASS" , domain_type);
19321931 goto bail;
@@ -2188,16 +2187,16 @@ static const char * ggmlqnn_get_htparch_desc(size_t htp_arch) {
21882187 }
21892188}
21902189
2191- static const char * ggmlqnn_get_inference_approach_name (int inference_approach ) {
2192- switch (inference_approach ) {
2193- case QNN_GENERAL :
2194- return " QNN_GENERAL " ;
2195- case DIRECT_USE_CDSP :
2196- return " DIRECT_USE_CDSP " ;
2197- case QNN_SINGLEGRAPH :
2198- return " QNN_SINGLEGRAPH " ;
2190+ static const char * ggmlqnn_get_hwaccel_approach_name (int hwaccle_approach ) {
2191+ switch (hwaccle_approach ) {
2192+ case HWACCEL_QNN :
2193+ return " HWACCEL_QNN " ;
2194+ case HWACCEL_QNN_SINGLEGRAPH :
2195+ return " HWACCEL_QNN_SINGLEGRAPH " ;
2196+ case HWACCEL_CDSP :
2197+ return " HWACCEL_CDSP " ;
21992198 default :
2200- return " unknown approach" ;
2199+ return " unknown hwaccel approach" ;
22012200 }
22022201}
22032202
@@ -3996,7 +3995,7 @@ void qnn_instance::htp_enter_performance_mode() {
39963995}
39973996
39983997static void ggmlqnn_set_runtime_path (size_t device, const std::string & path) {
3999- if ((QNN_BACKEND_NPU == device) || (DIRECT_USE_CDSP == g_qnn_params.inference_approach )) {
3998+ if ((QNN_BACKEND_NPU == device) || (HWACCEL_CDSP == g_qnn_params.hwaccel_approach )) {
40003999 if (0 == setenv (" LD_LIBRARY_PATH" ,
40014000 (path +
40024001 " :/vendor/dsp/cdsp:/vendor/lib64:/vendor/dsp/dsp:/vendor/dsp/images" ).c_str (),
@@ -4224,7 +4223,7 @@ static void ggmlqnn_load_cfg() {
42244223 qnncfg_instance.get_intvalue (" general" , " enable_perf" , g_qnn_params.enable_perf , 0 );
42254224 qnncfg_instance.get_intvalue (" general" , " print_tensors_info" , g_qnn_params.print_tensors_info , 0 );
42264225 qnncfg_instance.get_intvalue (" general" , " dump_op_info" , g_qnn_params.dump_op_info , 0 );
4227- qnncfg_instance.get_intvalue (" general" , " inference_approach " , g_qnn_params.inference_approach , 0 );
4226+ qnncfg_instance.get_intvalue (" general" , " hwaccel_approach " , g_qnn_params.hwaccel_approach , 0 );
42284227 qnncfg_instance.get_intvalue (" general" , " qnn_backend" , g_qnn_params.qnn_backend , 2 );
42294228 qnncfg_instance.get_intvalue (" qnn" , " hvx_threads" , g_qnn_params.hvx_threads , 4 );
42304229 qnncfg_instance.get_intvalue (" qnn" , " vtcm_size_in_mb" , g_qnn_params.vtcm_size_in_mb , 8 );
@@ -4233,8 +4232,8 @@ static void ggmlqnn_load_cfg() {
42334232 qnncfg_instance.get_intvalue (" cdsp" , " enable_mulmat_cdsp" , g_qnn_params.enable_mulmat_cdsp , 0 );
42344233 qnncfg_instance.get_intvalue (" cdsp" , " enable_q_mulmat" , g_qnn_params.enable_q_mulmat , 0 );
42354234 GGMLQNN_LOG_INFO (" print_qnn_internal_log=%d" , g_qnn_params.print_qnn_internal_log );
4236- GGMLQNN_LOG_INFO (" inference_approach =%d(%s)" , g_qnn_params.inference_approach ,
4237- ggmlqnn_get_inference_approach_name (g_qnn_params.inference_approach ));
4235+ GGMLQNN_LOG_INFO (" hwaccel_approach =%d(%s)" , g_qnn_params.hwaccel_approach ,
4236+ ggmlqnn_get_hwaccel_approach_name (g_qnn_params.hwaccel_approach ));
42384237 GGMLQNN_LOG_INFO (" qnn_backend=%d" , g_qnn_params.qnn_backend );
42394238 GGMLQNN_LOG_INFO (" npu inference precision mode=%s" , precision_mode.c_str ());
42404239 GGMLQNN_LOG_INFO (" qnn runtime lib path=%s" , g_qnn_params.qnn_runtimelib_path );
@@ -4325,7 +4324,7 @@ static bool ggmlqnn_can_handle_op(const ggml_backend_qnn_context * ctx, const st
43254324 return true ;
43264325 }
43274326
4328- if (DIRECT_USE_CDSP == g_qnn_params.inference_approach ) {
4327+ if (HWACCEL_CDSP == g_qnn_params.hwaccel_approach ) {
43294328 return ggmlhexagon_can_handle_op (ctx, op_tensor);
43304329 }
43314330
@@ -4686,7 +4685,7 @@ static void ggml_backend_qnn_free(ggml_backend_t backend) {
46864685 ggml_backend_qnn_context * ctx = (ggml_backend_qnn_context *)backend->context ;
46874686 GGMLQNN_LOG_DEBUG (" device idx %d, name:%s" , ctx->device , g_qnn_mgr[ctx->device ].name );
46884687
4689- if (DIRECT_USE_CDSP == g_qnn_params.inference_approach ) {
4688+ if (HWACCEL_CDSP == g_qnn_params.hwaccel_approach ) {
46904689 ggmlhexagon_close_cdsp (ctx);
46914690 }
46924691
@@ -4787,7 +4786,7 @@ static void ggml_backend_qnn_device_get_memory(ggml_backend_dev_t dev, size_t *
47874786 } else if (QNN_BACKEND_NPU == ctx->device ) {
47884787 size_t rpc_ion_memsize = 0 ;
47894788 size_t rpc_ion_usage = 0 ;
4790- if (DIRECT_USE_CDSP != g_qnn_params.inference_approach ) {
4789+ if (HWACCEL_CDSP != g_qnn_params.hwaccel_approach ) {
47914790 rpc_ion_memsize = ctx->instance ->get_rpcmem_capacity ();
47924791 rpc_ion_usage = ctx->instance ->get_rpcmem_usage ();
47934792 } else {
@@ -5013,8 +5012,8 @@ ggml_backend_reg_t ggml_backend_qnn_reg() {
50135012
50145013 // case-2: normal scenario, such as llama-cli or UI applicaton
50155014 ggmlqnn_load_cfg ();
5016- GGMLQNN_LOG_INFO (" inference approach=%d(%s)" , g_qnn_params.inference_approach ,
5017- ggmlqnn_get_inference_approach_name (g_qnn_params.inference_approach ));
5015+ GGMLQNN_LOG_INFO (" inference approach=%d(%s)" , g_qnn_params.hwaccel_approach ,
5016+ ggmlqnn_get_hwaccel_approach_name (g_qnn_params.hwaccel_approach ));
50185017 GGMLQNN_LOG_INFO (" user's specified qnn_backend=%d" , g_qnn_params.qnn_backend );
50195018 GGMLQNN_LOG_INFO (" user's specified qnn runtime lib path=%s" , g_qnn_params.qnn_runtimelib_path );
50205019 if (g_qnn_params.qnn_backend >= GGML_QNN_MAX_DEVICES) {
@@ -5053,7 +5052,7 @@ ggml_backend_reg_t ggml_backend_qnn_reg() {
50535052}
50545053
50555054const char * ggml_backend_qnn_get_devname (size_t dev_num) {
5056- if (DIRECT_USE_CDSP == g_qnn_params.inference_approach ) {
5055+ if (HWACCEL_CDSP == g_qnn_params.hwaccel_approach ) {
50575056 if (dev_num == QNN_BACKEND_GGML)
50585057 return " ggml" ;
50595058 else
@@ -5076,8 +5075,8 @@ const char * ggml_backend_qnn_get_devname(size_t dev_num) {
50765075
50775076static qnn_instance * ggmlqnn_init_qnn_instance (size_t device, const char * qnn_lib_path) {
50785077 int result = 0 ;
5079- GGMLQNN_LOG_INFO (" inference approach=%d(%s)" , g_qnn_params.inference_approach ,
5080- ggmlqnn_get_inference_approach_name (g_qnn_params.inference_approach ));
5078+ GGMLQNN_LOG_INFO (" inference approach=%d(%s)" , g_qnn_params.hwaccel_approach ,
5079+ ggmlqnn_get_hwaccel_approach_name (g_qnn_params.hwaccel_approach ));
50815080
50825081 qnn_instance * instance = nullptr ;
50835082 instance = new qnn_instance (qnn_lib_path, g_qnn_mgr[device].lib , " " );
@@ -5141,7 +5140,7 @@ ggml_backend_t ggml_backend_qnn_init(size_t device, const char * qnn_lib_path) {
51415140 }
51425141
51435142 // don't initialize QNN when inference approach is offload ggml op to Hexagon cDSP directly
5144- if (DIRECT_USE_CDSP != g_qnn_params.inference_approach ) {
5143+ if (HWACCEL_CDSP != g_qnn_params.hwaccel_approach ) {
51455144 qnn_instance * instance = ggmlqnn_init_qnn_instance (device, qnn_lib_path);
51465145 if (nullptr == instance)
51475146 return nullptr ;
@@ -5157,14 +5156,14 @@ ggml_backend_t ggml_backend_qnn_init(size_t device, const char * qnn_lib_path) {
51575156 };
51585157
51595158 g_qnn_mgr[device].backend = qnn_backend;
5160- if (DIRECT_USE_CDSP == g_qnn_params.inference_approach ) {
5159+ if (HWACCEL_CDSP == g_qnn_params.hwaccel_approach ) {
51615160 int result = ggmlhexagon_init_dsp (&g_qnn_mgr[device]);
51625161 if (0 != result) {
51635162 GGMLQNN_LOG_INFO (" init hexagon dsp failure" );
51645163 ggml_backend_qnn_free (qnn_backend);
51655164 return nullptr ;
51665165 }
5167- // ensure test-backend-ops get the correct backend name when inference approach is 1(DIRECT_USE_CDSP )
5166+ // ensure test-backend-ops get the correct backend name when inference approach is 1(HWACCEL_CDSP )
51685167 memcpy (g_qnn_mgr[device].name , " Hexagon-cDSP" , strlen (" Hexagon-cDSP" ));
51695168 }
51705169
@@ -5237,7 +5236,7 @@ static void ggmlqnn_compute_elementwise(ggml_backend_qnn_context * ctx, ggml_ten
52375236 qnn_perf op_perf = qnn_perf (graph_name);
52385237 op_perf.start ();
52395238
5240- if (DIRECT_USE_CDSP == g_qnn_params.inference_approach ) {
5239+ if (HWACCEL_CDSP == g_qnn_params.hwaccel_approach ) {
52415240 ggmlhexagon_compute (ctx, op);
52425241 op_perf.info ();
52435242 return ;
@@ -5629,7 +5628,7 @@ static void ggmlqnn_compute_mul_mat(ggml_backend_qnn_context * ctx, ggml_tensor
56295628 qnn_perf op_perf = qnn_perf (graph_name);
56305629 op_perf.start ();
56315630
5632- if (DIRECT_USE_CDSP == g_qnn_params.inference_approach ) {
5631+ if (HWACCEL_CDSP == g_qnn_params.hwaccel_approach ) {
56335632 ggmlhexagon_compute (ctx, op);
56345633 op_perf.info ();
56355634 return ;
0 commit comments