Skip to content

Commit 257b6e0

Browse files
author
zhouwg
committed
ggml-qnn: original ggml_compute_forward_add and ggml_compute_forward_mul_mat works fine on Hexagon cDSP at the first time
1 parent 158f244 commit 257b6e0

File tree

5 files changed

+268
-188
lines changed

5 files changed

+268
-188
lines changed

ggml/src/ggml-qnn/ggml-qnn.cpp

Lines changed: 13 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -179,8 +179,6 @@ static void ggmlqnn_compute_diag_mask(ggml_backend_qnn_context * ctx, ggml_ten
179179
#endif
180180
#define GGMLQNN_DUMP_TENSOR(tensor) ggmlqnn_dump_tensor(tensor, #tensor)
181181

182-
#define GGMLQNN_MEM_ADD(alignment) (sizeof (size_t) + alignment)
183-
#define GGMLQNN_MEM_MASK(alignment) ((uintptr_t)alignment - 1)
184182
#define QNN_VER_PTR(x) (&((x).v1))
185183
#define RPCMEM_DEFAULT_FLAGS 1
186184
#define RPCMEM_HEAP_ID_SYSTEM 25
@@ -4230,18 +4228,13 @@ static bool ggmlhexagon_can_handle_op(const ggml_backend_qnn_context * ctx, cons
42304228
uint32_t src1_rank = 0;
42314229
if (nullptr != src0) {
42324230
src0_rank = ggml_n_dims(src0);
4233-
} else {
4234-
//GGMLQNN_LOG_DEBUG("op name %s\n", ggml_op_name(op_tensor->op));
42354231
}
42364232
if (nullptr != src1) {
42374233
src1_rank = ggml_n_dims(src1);
4238-
} else {
4239-
//GGMLQNN_LOG_DEBUG("op name %s\n", ggml_op_name(op_tensor->op));
42404234
}
42414235

4242-
//TODO: remove this filter in the future, mulmat on cDSP doesn't work as expected
4243-
//bool support = ((op_tensor->op == GGML_OP_ADD) || (op_tensor->op == GGML_OP_MUL_MAT));
4244-
bool support = (op_tensor->op == GGML_OP_ADD);
4236+
//TODO: only support offload GGML_OP_ADD and GGML_OP_MUL_MAT to cDSP directly
4237+
bool support = ((op_tensor->op == GGML_OP_ADD) || (op_tensor->op == GGML_OP_MUL_MAT));
42454238
if (!support)
42464239
return false;
42474240

@@ -4251,21 +4244,17 @@ static bool ggmlhexagon_can_handle_op(const ggml_backend_qnn_context * ctx, cons
42514244
if (!ggml_are_same_shape(src0, src1)) {
42524245
return false;
42534246
}
4254-
return ggmlqnn_same_types(ctx, op_tensor);
4247+
return (src0->type == GGML_TYPE_F32) && (src1->type == GGML_TYPE_F32) && (op_tensor->type == GGML_TYPE_F32);
42554248
}
42564249

42574250
case GGML_OP_MUL_MAT:
42584251
{
42594252
ggmlqnn_dump_op_info(op_tensor);
4260-
if (src0_rank != src1_rank)
4261-
return false;
42624253

4263-
//TODO: remove this filter in the future
4264-
if (src0_rank != 2)
4254+
if (src1_rank != 2)
42654255
return false;
42664256

4267-
return (src0->type == GGML_TYPE_F32 || ggml_is_quantized(src0->type))
4268-
&& (src1->type == GGML_TYPE_F32) && (op_tensor->type == GGML_TYPE_F32);
4257+
return (src0->type == GGML_TYPE_F32) && (src1->type == GGML_TYPE_F32) && (op_tensor->type == GGML_TYPE_F32);
42694258

42704259
}
42714260
default:
@@ -5110,6 +5099,8 @@ ggml_backend_t ggml_backend_qnn_init(size_t device, const char * qnn_lib_path) {
51105099
ggml_backend_qnn_free(qnn_backend);
51115100
return nullptr;
51125101
}
5102+
//ensure test-backend-ops get the correct backend name when inference approach is 1(DIRECT_USE_CDSP)
5103+
memcpy(g_qnn_mgr[device].name, "Hexagon-cDSP", strlen("Hexagon-cDSP"));
51135104
}
51145105

51155106
GGMLQNN_LOG_INFO("leave %s\n", __func__);
@@ -5564,11 +5555,6 @@ static void ggmlqnn_compute_mul_mat(ggml_backend_qnn_context * ctx, ggml_tensor
55645555
const enum ggml_type src0_type = src0->type;
55655556
const uint32_t src0_rank = ggml_n_dims(src0);
55665557
const uint32_t src1_rank = ggml_n_dims(src1);
5567-
GGML_ASSERT(src0_rank == src1_rank);
5568-
GGML_ASSERT(src0_rank >= 2); //QNN SDK's limitation, make QNN SDK happy
5569-
if (4 == src0_rank) {
5570-
return ggmlqnn_compute_mul_mat_4d(ctx, op);
5571-
}
55725558

55735559
ggmlqnn_print_tensors_info(__func__, ctx, src0, src1, dst);
55745560

@@ -5584,6 +5570,12 @@ static void ggmlqnn_compute_mul_mat(ggml_backend_qnn_context * ctx, ggml_tensor
55845570
return;
55855571
}
55865572

5573+
GGML_ASSERT(src0_rank == src1_rank);
5574+
GGML_ASSERT(src0_rank >= 2); //QNN SDK's limitation, make QNN SDK happy
5575+
if (4 == src0_rank) {
5576+
return ggmlqnn_compute_mul_mat_4d(ctx, op);
5577+
}
5578+
55875579
void * wdata = ggmlqnn_type_trait(ctx, op);
55885580
const size_t desired_size = ctx->desired_size;
55895581

0 commit comments

Comments
 (0)