2424 * - GGML_OP_ADD & GGML_OP_MUL_MAT:
2525 * this is a hwaccel skeleton, can expand other ggml ops accordingly
2626 *
27+ * there is a key point in this PR:
28+ * when hwaccel_approach is HWACCEL_QNN, there are 4 backends:
29+ * HEXAGON_BACKEND_QNNCPU, HEXAGON_BACKEND_QNNGPU, HEXAGON_BACKEND_QNNNPU, HEXAGON_BACKEND_GGML(the default ggml backend);
30+ * when hwaccel_approach is HWACCEL_CDSP, there are 2 backends(which is exactly similar to ggml-opencl or ggml-vulkan):
31+ * HEXAGON_BACKEND_CDSP, HEXAGON_BACKEND_GGML.
32+ *
33+ * the reason for this is to facilitate the performance comparison between the cDSP approach and the QNN approach.
34+ * accordingly, this PR not only support QNN-based approach but also support the cDSP based approach.
2735 */
2836#include < stdio.h>
2937#include < stdlib.h>
8088#include " rpcmem.h"
8189#include " remote.h"
8290#include " os_defines.h"
83- #include " domain.h"
8491#include " AEEStdErr.h"
8592#include " HAP_power.h"
8693#include " HAP_farf.h"
@@ -377,7 +384,7 @@ static struct hexagon_appcfg_t g_hexagon_appcfg = {
377384#elif defined(_WIN32)
378385 .qnn_runtimelib_path = " C:\\ " ,
379386#endif
380- .ggml_hexagon_version = {" 1.13 " },
387+ .ggml_hexagon_version = {" 1.14 " },
381388 .ggml_dsp_version = {" 0.63" },
382389};
383390
@@ -888,29 +895,29 @@ static void ggmlhexagon_print_tensors_info(const char * func_name, const ggml_ba
888895 }
889896
890897 if (nullptr != func_name && nullptr != ctx) {
891- GGMLHEXAGON_LOG_DEBUG (" call %s in dev %s\n " , func_name, ctx->name );
898+ GGMLHEXAGON_LOG_VERBOSE (" call %s in dev %s\n " , func_name, ctx->name );
892899 }
893900 if (nullptr != src0) {
894- GGMLHEXAGON_LOG_DEBUG (
901+ GGMLHEXAGON_LOG_VERBOSE (
895902 " %-6s: type = %i (%s) ne = %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64 " , nb = (%5zi, %5zi, %5zi, %5zi)" ,
896903 src0->name ,
897904 src0->type , ggml_type_name (src0->type ), src0->ne [0 ], src0->ne [1 ], src0->ne [2 ],
898905 src0->ne [3 ],
899906 src0->nb [0 ], src0->nb [1 ], src0->nb [2 ], src0->nb [3 ]);
900907 }
901908 if (nullptr != src1) {
902- GGMLHEXAGON_LOG_DEBUG (
909+ GGMLHEXAGON_LOG_VERBOSE (
903910 " %-6s: type = %i (%s) ne = %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64 " , nb = (%5zi, %5zi, %5zi, %5zi)" ,
904911 src1->name ,
905912 src1->type , ggml_type_name (src1->type ), src1->ne [0 ], src1->ne [1 ], src1->ne [2 ],
906913 src1->ne [3 ],
907914 src1->nb [0 ], src1->nb [1 ], src1->nb [2 ], src1->nb [3 ]);
908915 }
909- GGMLHEXAGON_LOG_DEBUG (" %-6s: type = %i (%s) ne = %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64 " , nb = (%5zi, %5zi, %5zi, %5zi)" ,
916+ GGMLHEXAGON_LOG_VERBOSE (" %-6s: type = %i (%s) ne = %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64 " x %5" PRIi64 " , nb = (%5zi, %5zi, %5zi, %5zi)" ,
910917 dst->name ,
911918 dst->type , ggml_type_name (dst->type ), dst->ne [0 ], dst->ne [1 ], dst->ne [2 ], dst->ne [3 ],
912919 dst->nb [0 ], dst->nb [1 ], dst->nb [2 ], dst->nb [3 ]);
913- GGMLHEXAGON_LOG_DEBUG (" \n " );
920+ GGMLHEXAGON_LOG_VERBOSE (" \n " );
914921}
915922
916923static void ggmlhexagon_dump_op_info (const struct ggml_tensor * tensor) {
@@ -921,7 +928,7 @@ static void ggmlhexagon_dump_op_info(const struct ggml_tensor * tensor) {
921928 const struct ggml_tensor * src0 = tensor->src [0 ];
922929 struct ggml_tensor * src1 = tensor->src [1 ];
923930 struct ggml_tensor * dst = const_cast <ggml_tensor *>(tensor);
924- GGMLHEXAGON_LOG_DEBUG (" op name:%s, tensor type:%s" , ggml_op_name (tensor->op ), ggml_type_name (tensor->type ));
931+ GGMLHEXAGON_LOG_VERBOSE (" op name:%s, tensor type:%s" , ggml_op_name (tensor->op ), ggml_type_name (tensor->type ));
925932 ggmlhexagon_print_tensors_info (nullptr , nullptr , src0, src1, dst);
926933}
927934
@@ -939,7 +946,7 @@ static void ggmlhexagon_dump_tensor_elements(const ggml_tensor * tensor) {
939946 << " " ;
940947 }
941948 if (strlen (tmposs.str ().c_str ()) <= (GGMLHEXAGON_LOGBUF_LEN - 96 )) {
942- GGMLHEXAGON_LOG_DEBUG (" %s\n " , tmposs.str ().c_str ());
949+ GGMLHEXAGON_LOG_VERBOSE (" %s\n " , tmposs.str ().c_str ());
943950 }
944951 tmposs.clear ();
945952 tmposs.str (" " );
@@ -948,7 +955,7 @@ static void ggmlhexagon_dump_tensor_elements(const ggml_tensor * tensor) {
948955 }
949956 }
950957
951- GGMLHEXAGON_LOG_DEBUG (" \n " );
958+ GGMLHEXAGON_LOG_VERBOSE (" \n " );
952959}
953960
954961static void ggmlhexagon_dump_tensor (const ggml_tensor * tensor, const char * name) {
@@ -3375,7 +3382,7 @@ static void ggmlqnn_sdk_logcallback(const char * fmt,
33753382 {
33763383 std::lock_guard<std::mutex> lock (log_mutex);
33773384 memset (s_ggmlqnn_sdk_logbuf, 0 , GGMLHEXAGON_LOGBUF_LEN);
3378- vsnprintf (reinterpret_cast <char *const >(s_ggmlqnn_sdk_logbuf), GGMLHEXAGON_LOGBUF_LEN, fmt, argp);
3385+ vsnprintf (reinterpret_cast <char *>(s_ggmlqnn_sdk_logbuf), GGMLHEXAGON_LOGBUF_LEN, fmt, argp);
33793386 GGMLHEXAGON_LOG_DEBUG (" %8.1fms [%-7s] %s\n " , ms, log_level_desc, s_ggmlqnn_sdk_logbuf);
33803387 }
33813388#if !GGMLHEXAGON_DEBUG
@@ -4599,7 +4606,7 @@ static void ggmlqnn_compute_mul_mat(ggml_backend_hexagon_context * ctx, ggml_ten
45994606 // retrieve computational resource from cached QNN graph
46004607 qnn_singlenode_res_t & graph_item = ctx->qnn_singlenode_graph_map [graph_name];
46014608 graph_handle = std::get<0 >(graph_item);
4602- qnn_ptensors_t &tensors = std::get<1 >(graph_item);
4609+ qnn_ptensors_t & tensors = std::get<1 >(graph_item);
46034610 p_tensor0 = tensors[0 ];
46044611 p_tensor1 = tensors[1 ];
46054612 p_tensor2 = tensors[2 ];
@@ -5849,6 +5856,7 @@ static bool ggmlhexagon_can_handle_op_through_cdsp(ggml_backend_dev_t dev, const
58495856 switch (op_tensor->op ) {
58505857 case GGML_OP_ADD:
58515858 {
5859+ ggmlhexagon_dump_op_info (op_tensor);
58525860 // TODO:workaround approach to fix HWACCEL_CDSP can't works in ASR inference and LLM inference
58535861 // with some LLM models in a standard Android APP
58545862 if (ne00 < 1024 ) {
@@ -5927,8 +5935,9 @@ static bool ggmlhexagon_can_handle_op_through_qnn(ggml_backend_dev_t dev, const
59275935 return false ;
59285936 }
59295937
5930- if (ne00 < 32 )
5938+ if (ne00 < 32 ) {
59315939 return false ;
5940+ }
59325941
59335942 return ggmlhexagon_same_types (ctx, op_tensor);
59345943 }
0 commit comments