@@ -179,8 +179,6 @@ static void ggmlqnn_compute_diag_mask(ggml_backend_qnn_context * ctx, ggml_ten
179179#endif
180180#define GGMLQNN_DUMP_TENSOR (tensor ) ggmlqnn_dump_tensor(tensor, #tensor)
181181
182- #define GGMLQNN_MEM_ADD (alignment ) (sizeof (size_t ) + alignment)
183- #define GGMLQNN_MEM_MASK (alignment ) ((uintptr_t )alignment - 1 )
184182#define QNN_VER_PTR (x ) (&((x).v1))
185183#define RPCMEM_DEFAULT_FLAGS 1
186184#define RPCMEM_HEAP_ID_SYSTEM 25
@@ -4230,18 +4228,13 @@ static bool ggmlhexagon_can_handle_op(const ggml_backend_qnn_context * ctx, cons
42304228 uint32_t src1_rank = 0 ;
42314229 if (nullptr != src0) {
42324230 src0_rank = ggml_n_dims (src0);
4233- } else {
4234- // GGMLQNN_LOG_DEBUG("op name %s\n", ggml_op_name(op_tensor->op));
42354231 }
42364232 if (nullptr != src1) {
42374233 src1_rank = ggml_n_dims (src1);
4238- } else {
4239- // GGMLQNN_LOG_DEBUG("op name %s\n", ggml_op_name(op_tensor->op));
42404234 }
42414235
4242- // TODO: remove this filter in the future, mulmat on cDSP doesn't work as expected
4243- // bool support = ((op_tensor->op == GGML_OP_ADD) || (op_tensor->op == GGML_OP_MUL_MAT));
4244- bool support = (op_tensor->op == GGML_OP_ADD);
4236+ // TODO: only support offload GGML_OP_ADD and GGML_OP_MUL_MAT to cDSP directly
4237+ bool support = ((op_tensor->op == GGML_OP_ADD) || (op_tensor->op == GGML_OP_MUL_MAT));
42454238 if (!support)
42464239 return false ;
42474240
@@ -4251,21 +4244,17 @@ static bool ggmlhexagon_can_handle_op(const ggml_backend_qnn_context * ctx, cons
42514244 if (!ggml_are_same_shape (src0, src1)) {
42524245 return false ;
42534246 }
4254- return ggmlqnn_same_types (ctx, op_tensor);
4247+ return (src0-> type == GGML_TYPE_F32) && (src1-> type == GGML_TYPE_F32) && ( op_tensor-> type == GGML_TYPE_F32 );
42554248 }
42564249
42574250 case GGML_OP_MUL_MAT:
42584251 {
42594252 ggmlqnn_dump_op_info (op_tensor);
4260- if (src0_rank != src1_rank)
4261- return false ;
42624253
4263- // TODO: remove this filter in the future
4264- if (src0_rank != 2 )
4254+ if (src1_rank != 2 )
42654255 return false ;
42664256
4267- return (src0->type == GGML_TYPE_F32 || ggml_is_quantized (src0->type ))
4268- && (src1->type == GGML_TYPE_F32) && (op_tensor->type == GGML_TYPE_F32);
4257+ return (src0->type == GGML_TYPE_F32) && (src1->type == GGML_TYPE_F32) && (op_tensor->type == GGML_TYPE_F32);
42694258
42704259 }
42714260 default :
@@ -5110,6 +5099,8 @@ ggml_backend_t ggml_backend_qnn_init(size_t device, const char * qnn_lib_path) {
51105099 ggml_backend_qnn_free (qnn_backend);
51115100 return nullptr ;
51125101 }
5102+ // ensure test-backend-ops get the correct backend name when inference approach is 1(DIRECT_USE_CDSP)
5103+ memcpy (g_qnn_mgr[device].name , " Hexagon-cDSP" , strlen (" Hexagon-cDSP" ));
51135104 }
51145105
51155106 GGMLQNN_LOG_INFO (" leave %s\n " , __func__);
@@ -5564,11 +5555,6 @@ static void ggmlqnn_compute_mul_mat(ggml_backend_qnn_context * ctx, ggml_tensor
55645555 const enum ggml_type src0_type = src0->type ;
55655556 const uint32_t src0_rank = ggml_n_dims (src0);
55665557 const uint32_t src1_rank = ggml_n_dims (src1);
5567- GGML_ASSERT (src0_rank == src1_rank);
5568- GGML_ASSERT (src0_rank >= 2 ); // QNN SDK's limitation, make QNN SDK happy
5569- if (4 == src0_rank) {
5570- return ggmlqnn_compute_mul_mat_4d (ctx, op);
5571- }
55725558
55735559 ggmlqnn_print_tensors_info (__func__, ctx, src0, src1, dst);
55745560
@@ -5584,6 +5570,12 @@ static void ggmlqnn_compute_mul_mat(ggml_backend_qnn_context * ctx, ggml_tensor
55845570 return ;
55855571 }
55865572
5573+ GGML_ASSERT (src0_rank == src1_rank);
5574+ GGML_ASSERT (src0_rank >= 2 ); // QNN SDK's limitation, make QNN SDK happy
5575+ if (4 == src0_rank) {
5576+ return ggmlqnn_compute_mul_mat_4d (ctx, op);
5577+ }
5578+
55875579 void * wdata = ggmlqnn_type_trait (ctx, op);
55885580 const size_t desired_size = ctx->desired_size ;
55895581
0 commit comments