Skip to content

Commit 3c5817e

Browse files
author
zhouwg
committed
ggml-hexagon: make comprision of mulmat performance between HWACCEL_QNN and HWACCEL_CDSP easily
1 parent 807fdf7 commit 3c5817e

File tree

2 files changed

+28
-34
lines changed

2 files changed

+28
-34
lines changed

ggml/src/ggml-hexagon/ggml-hexagon.cpp

Lines changed: 18 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -309,14 +309,13 @@ struct hexagon_appcfg_t {
309309
int enable_perf; // enable/disable perf of op function
310310
int print_tensors_info; // enable/disable print tensors info in op function
311311
int dump_op_info; // enable/disable dump op info in handle_op
312+
int enable_q_mulmat; // enable/disable offload quantized mulmat
312313
int precision_mode; // 0: default 1:fp16
313314
int hvx_threads;
314315
int vtcm_size_in_mb;
315316
int enable_dlbc;
316317
int hwaccel_approach; // 0: HWACCEL_QNN 1: HWACCEL_QNN_SINGLEGRAPH 2: HWACCEL_CDSP
317318
int hexagon_backend; // 0: HEXAGON_BACKEND_QNNCPU 1: HEXAGON_BACKEND_QNNGPU 2: HEXAGON_BACKEND_QNNNPU / HEXAGON_BACKEND_CDSP
318-
int enable_mulmat_cdsp; // enable/disable offload mulmat to cDSP
319-
int enable_q_mulmat; // enable/disable offload fp32 & quantized mulmat to cDSP
320319
int enable_rpc_ion_mempool; // enable/disable rpc ion memory pool
321320
int enable_rpc_dma_mempool; // enable/disable rpc dma memory pool
322321
const char * cfgfilename;
@@ -328,14 +327,13 @@ static struct hexagon_appcfg_t g_hexagon_appcfg = {
328327
.enable_perf = 0,
329328
.print_tensors_info = 0,
330329
.dump_op_info = 0,
330+
.enable_q_mulmat = 0,
331331
.precision_mode = 0,
332332
.hvx_threads = 4,
333333
.vtcm_size_in_mb = 8,
334334
.enable_dlbc = 1,
335335
.hwaccel_approach = HWACCEL_CDSP,
336336
.hexagon_backend = HEXAGON_BACKEND_CDSP,
337-
.enable_mulmat_cdsp = 0,
338-
.enable_q_mulmat = 0,
339337
.enable_rpc_ion_mempool = 0,
340338
.enable_rpc_dma_mempool = 0,
341339
.cfgfilename = "ggml-hexagon.cfg",
@@ -863,13 +861,12 @@ static void ggmlhexagon_print_running_timestamp(ggml_backend_hexagon_context * c
863861
ggml_backend_hexagon_get_devname(g_hexagon_appcfg.hexagon_backend));
864862
ggmlhexagon_get_timestring(timestamp);
865863
if (HWACCEL_CDSP == g_hexagon_appcfg.hwaccel_approach) {
866-
GGMLHEXAGON_LOG_INFO("offload GGML_OP_MULMAT: %s", g_hexagon_appcfg.enable_mulmat_cdsp ? "YES" : "NO");
867864
GGMLHEXAGON_LOG_INFO("offload quantize GGML_OP_MUL_MAT: %s", g_hexagon_appcfg.enable_q_mulmat ? "YES" : "NO");
868865
GGMLHEXAGON_LOG_INFO("using rpc ion memory pool: %s", g_hexagon_appcfg.enable_rpc_ion_mempool ? "YES" : "NO");
869866
GGMLHEXAGON_LOG_INFO("using rpc dma memory pool: %s", g_hexagon_appcfg.enable_rpc_dma_mempool ? "YES" : "NO");
870867
ggmlhexagon_probe_dspinfo(ctx);
871868
} else {
872-
GGMLHEXAGON_LOG_INFO("only offload GGML_OP_ADD: NO");
869+
GGMLHEXAGON_LOG_INFO("offload quantize GGML_OP_MUL_MAT: %s", g_hexagon_appcfg.enable_q_mulmat ? "YES" : "NO");
873870
}
874871
GGMLHEXAGON_LOG_INFO("running timestamp:%s", timestamp);
875872
}
@@ -1449,12 +1446,11 @@ static void ggmlhexagon_load_cfg() {
14491446
qnncfg_instance.get_intvalue("general", "dump_op_info", g_hexagon_appcfg.dump_op_info, 0);
14501447
qnncfg_instance.get_intvalue("general", "hwaccel_approach", g_hexagon_appcfg.hwaccel_approach, HWACCEL_CDSP);
14511448
qnncfg_instance.get_intvalue("general", "hexagon_backend", g_hexagon_appcfg.hexagon_backend, HEXAGON_BACKEND_CDSP);
1449+
qnncfg_instance.get_intvalue("general", "enable_q_mulmat", g_hexagon_appcfg.enable_q_mulmat, 0);
14521450
qnncfg_instance.get_intvalue("qnn", "hvx_threads", g_hexagon_appcfg.hvx_threads, 4);
14531451
qnncfg_instance.get_intvalue("qnn", "vtcm_size_in_mb", g_hexagon_appcfg.vtcm_size_in_mb, 8);
14541452
qnncfg_instance.get_intvalue("qnn", "enable_dlbc", g_hexagon_appcfg.enable_dlbc, 1);
14551453
qnncfg_instance.get_stringvalue("qnn", "precision_mode", precision_mode, "fp32");
1456-
qnncfg_instance.get_intvalue("cdsp", "enable_mulmat_cdsp", g_hexagon_appcfg.enable_mulmat_cdsp, 1);
1457-
qnncfg_instance.get_intvalue("cdsp", "enable_q_mulmat", g_hexagon_appcfg.enable_q_mulmat, 0);
14581454
qnncfg_instance.get_intvalue("cdsp", "enable_rpc_ion_mempool", g_hexagon_appcfg.enable_rpc_ion_mempool, 1);
14591455
qnncfg_instance.get_intvalue("cdsp", "enable_rpc_dma_mempool", g_hexagon_appcfg.enable_rpc_dma_mempool, 0);
14601456
GGMLHEXAGON_LOG_INFO("hwaccel_approach=%d(%s)", g_hexagon_appcfg.hwaccel_approach,
@@ -3017,7 +3013,7 @@ int qnn_instance::init_qnn_graph(const std::string & graph_name, HEXAGONBackend
30173013
_graph_name = graph_name;
30183014
_device_id = device;
30193015

3020-
GGMLHEXAGON_LOG_DEBUG("[%s][%s]created", ggml_backend_hexagon_get_devname(device), graph_name.c_str());
3016+
//GGMLHEXAGON_LOG_DEBUG("[%s][%s]created", ggml_backend_hexagon_get_devname(device), graph_name.c_str());
30213017

30223018
Qnn_ErrorHandle_t error = QNN_SUCCESS;
30233019
if (HEXAGON_BACKEND_QNNNPU == device) {
@@ -3070,7 +3066,7 @@ int qnn_instance::init_qnn_graph(const std::string & graph_name, HEXAGONBackend
30703066
}
30713067
graph_configs.push_back(nullptr);
30723068
error = _qnn_interface.qnn_graph_create(_qnn_context_handle, graph_name.c_str(), graph_configs.data(), &_qnn_graph_handle);
3073-
GGMLHEXAGON_LOG_DEBUG("[%s][%s]created graph %p", ggml_backend_hexagon_get_devname(device), graph_name.c_str(), _qnn_graph_handle);
3069+
//GGMLHEXAGON_LOG_DEBUG("[%s][%s]created graph %p", ggml_backend_hexagon_get_devname(device), graph_name.c_str(), _qnn_graph_handle);
30743070
} else {
30753071
error = _qnn_interface.qnn_graph_create(_qnn_context_handle, graph_name.c_str(), nullptr, &_qnn_graph_handle);
30763072
}
@@ -3280,7 +3276,7 @@ void qnn_instance::htp_set_n_hvx_threads(size_t n_threads) {
32803276
if (QNN_SUCCESS != result) {
32813277
GGMLHEXAGON_LOG_WARN("failed to set QNN graph config: set hvx threads %d", n_threads);
32823278
} else {
3283-
GGMLHEXAGON_LOG_INFO("succeed to set QNN graph config: set hvx threads %d", n_threads);
3279+
//GGMLHEXAGON_LOG_DEBUG("succeed to set QNN graph config: set hvx threads %d", n_threads);
32843280
}
32853281
}
32863282

@@ -3383,7 +3379,7 @@ static Qnn_OpConfig_t ggmlqnn_create_op_config(const char * name, const char * p
33833379
} else {
33843380
snprintf(opcfg_name, GGML_MAX_NAME, "opcfg_%s_%-8d", name, ggmlqnn_get_idx(QNN_OPCFG_INDEX));
33853381
}
3386-
GGMLHEXAGON_LOG_DEBUG("create qnn opconfig %s", opcfg_name);
3382+
//GGMLHEXAGON_LOG_DEBUG("create qnn opconfig %s", opcfg_name);
33873383
ggmlqnn_inc_idx(QNN_OPCFG_INDEX);
33883384

33893385
Qnn_OpConfigV1_t v1 = {opcfg_name, package, type,
@@ -3564,7 +3560,7 @@ static void ggmlqnn_compute_elementwise(ggml_backend_hexagon_context * ctx, ggml
35643560
}
35653561
graph_handle = instance->get_qnn_graph_handle();
35663562

3567-
GGMLHEXAGON_LOG_DEBUG("graph_handle %p", graph_handle);
3563+
//GGMLHEXAGON_LOG_DEBUG("graph_handle %p", graph_handle);
35683564
//create computational tensor
35693565
p_tensor0 = ggmlqnn_create_compute_tensor(instance, graph_handle, src0, QNN_TENSOR_TYPE_APP_WRITE);
35703566
if (2 == input_param_count) {
@@ -5063,7 +5059,7 @@ static bool ggmlhexagon_can_handle_op_through_cdsp(ggml_backend_dev_t dev, const
50635059
case GGML_OP_MUL_MAT:
50645060
{
50655061
ggmlhexagon_dump_op_info(op_tensor);
5066-
if (g_hexagon_appcfg.enable_q_mulmat)
5062+
if (1 == g_hexagon_appcfg.enable_q_mulmat)
50675063
return (src0->type == GGML_TYPE_F32 || src0->type == GGML_TYPE_Q6_K
50685064
) && (src1->type == GGML_TYPE_F32) && (op_tensor->type == GGML_TYPE_F32);
50695065
else
@@ -5142,10 +5138,13 @@ static bool ggmlhexagon_can_handle_op_through_qnn(ggml_backend_dev_t dev, const
51425138
return false;
51435139

51445140
if (ctx->device == HEXAGON_BACKEND_QNNNPU) {
5145-
return (src0->type == GGML_TYPE_F32
5141+
if (1 == g_hexagon_appcfg.enable_q_mulmat)
5142+
return (src0->type == GGML_TYPE_F32
51465143
|| src0->type == GGML_TYPE_Q4_0 || src0->type == GGML_TYPE_Q8_0
51475144
|| src0->type == GGML_TYPE_Q6_K || src0->type == GGML_TYPE_Q8_K
51485145
) && (src1->type == GGML_TYPE_F32) && (op_tensor->type == GGML_TYPE_F32);
5146+
else
5147+
return (src0->type == GGML_TYPE_F32 && src1->type == GGML_TYPE_F32 && op_tensor->type == GGML_TYPE_F32);
51495148
} else {
51505149
return (src0->type == GGML_TYPE_F32 || ggml_is_quantized(src0->type))
51515150
&& (src1->type == GGML_TYPE_F32) && (op_tensor->type == GGML_TYPE_F32);
@@ -5298,10 +5297,8 @@ static bool ggmlhexagon_compute_forward(ggml_backend_t backend, struct ggml_tens
52985297

52995298
struct ggml_backend_hexagon_buffer_context {
53005299
~ggml_backend_hexagon_buffer_context() {
5301-
GGMLHEXAGON_LOG_DEBUG("enter %s", __func__);
53025300
if (buffer) {
53035301
if ((g_hexagon_appcfg.hwaccel_approach == HWACCEL_CDSP) && (1 == g_hexagon_appcfg.enable_rpc_ion_mempool)) {
5304-
GGMLHEXAGON_LOG_DEBUG("rpcmem %p, size %d", buffer, buffer_size);
53055302
//do nonthing here because rpc mempool was used for HWACCEL_CDSP
53065303
} else {
53075304
ggml_aligned_free(buffer, 0);
@@ -5397,7 +5394,6 @@ static const char * ggml_backend_hexagon_buffer_type_name(ggml_backend_buffer_ty
53975394

53985395
static ggml_backend_buffer_t ggml_backend_hexagon_buffer_type_alloc_buffer(
53995396
ggml_backend_buffer_type_t buft, size_t size) {
5400-
GGMLHEXAGON_LOG_DEBUG("enter %s, size %d", __func__, size);
54015397
struct ggml_backend_hexagon_context * ctx = static_cast<ggml_backend_hexagon_context *>(buft->context);
54025398
GGML_ASSERT(nullptr != ctx);
54035399
GGMLHEXAGON_LOG_DEBUG("device %d(%s)", ctx->device, ggml_backend_hexagon_get_devname(ctx->device));
@@ -5413,14 +5409,10 @@ static ggml_backend_buffer_t ggml_backend_hexagon_buffer_type_alloc_buffer(
54135409
size_page = systeminfo.dwPageSize;
54145410
#endif
54155411
size_t size_aligned = size;
5416-
GGMLHEXAGON_LOG_DEBUG("size_aligned %d", size_aligned);
54175412
if ((size_aligned % size_page) != 0) {
54185413
size_aligned += (size_page - (size_aligned % size_page));
54195414
}
5420-
GGMLHEXAGON_LOG_DEBUG("size_aligned %d", size_aligned);
54215415
if ((g_hexagon_appcfg.hwaccel_approach == HWACCEL_CDSP) && (1 == g_hexagon_appcfg.enable_rpc_ion_mempool)) {
5422-
GGMLHEXAGON_LOG_DEBUG("rpc mempool len %d", ctx->rpc_mempool_len);
5423-
GGMLHEXAGON_LOG_DEBUG("rpc mempool usage %d", ctx->rpc_mempool_usage);
54245416
GGML_ASSERT(ctx->rpc_mempool_usage <= ctx->rpc_mempool_len);
54255417
buffer_ctx->buffer = (static_cast<char*>(ctx->rpc_mempool)) + ctx->rpc_mempool_usage;
54265418
GGMLHEXAGON_LOG_DEBUG("buffer_ctx->buffer %p", buffer_ctx->buffer);
@@ -5434,7 +5426,7 @@ static ggml_backend_buffer_t ggml_backend_hexagon_buffer_type_alloc_buffer(
54345426
GGMLHEXAGON_LOG_WARN("%s: failed to allocate %d MiB\n", __func__, size / (1 << 20));
54355427
return nullptr;
54365428
} else {
5437-
GGMLHEXAGON_LOG_DEBUG("%s: succeed to allocate %d MiB\n", __func__, size / (1 << 20));
5429+
//GGMLHEXAGON_LOG_DEBUG("%s: succeed to allocate %d MiB\n", __func__, size / (1 << 20));
54385430
}
54395431

54405432
return ggml_backend_buffer_init(buft, ggml_backend_hexagon_buffer_interface, buffer_ctx, size);
@@ -5577,14 +5569,14 @@ static void ggml_backend_hexagon_device_get_memory(ggml_backend_dev_t dev, size_
55775569
*total = rpc_ion_memsize * SIZE_IN_MB;
55785570
*free = (rpc_ion_memsize - rpc_ion_usage) * SIZE_IN_MB;
55795571
GGMLHEXAGON_LOG_DEBUG("rpc memsize %d M", rpc_ion_memsize);
5580-
GGMLHEXAGON_LOG_DEBUG("rpc usage %d M", rpc_ion_usage);
5572+
GGMLHEXAGON_LOG_DEBUG("rpc usage %d M\n\n", rpc_ion_usage);
55815573
} else {
55825574
rpc_ion_memsize = ctx->rpc_mempool_capacity;
55835575
rpc_ion_usage = ctx->rpc_mempool_usage;
55845576
*total = rpc_ion_memsize;
55855577
*free = (rpc_ion_memsize - rpc_ion_usage);
55865578
GGMLHEXAGON_LOG_DEBUG("rpc memsize %d M", rpc_ion_memsize / SIZE_IN_MB);
5587-
GGMLHEXAGON_LOG_DEBUG("rpc usage %d M", rpc_ion_usage / SIZE_IN_MB);
5579+
GGMLHEXAGON_LOG_DEBUG("rpc usage %d M\n\n", rpc_ion_usage / SIZE_IN_MB);
55885580
}
55895581
}
55905582
}
@@ -5891,13 +5883,12 @@ ggml_backend_reg_t ggml_backend_hexagon_reg() {
58915883
}
58925884

58935885
const char * ggml_backend_hexagon_get_devname(size_t dev_num) {
5894-
GGMLHEXAGON_LOG_DEBUG("enter %s", __func__);
58955886
if (HWACCEL_CDSP == g_hexagon_appcfg.hwaccel_approach) {
58965887
if (HEXAGON_BACKEND_CDSP == dev_num)
58975888
return "HEXAGON_BACKEND_CDSP";
58985889
}
58995890

5900-
//fallback
5891+
//fall through
59015892
switch (dev_num) {
59025893
case HEXAGON_BACKEND_QNNCPU:
59035894
return "HEXAGON_BACKEND_QNN_CPU";

scripts/ggml-hexagon.cfg

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,16 @@ print_tensors_info = 0
1717
# enable/disable dump op info in handle_op
1818
dump_op_info = 0
1919

20-
# 0: hwaccel approach through QNN
21-
# 1: hwaccel approach through QNN-SINGLEGRAPH: mapping entire ggml cgraph to a single QNN graph
22-
# 2: hwaccel approach through Hexagon cDSP
20+
#enable/disable offload fp32 & quantized type mulmat
21+
#quatized type mulmat works fine in HWACCEL_QNN at the moment
22+
#quatized type mulmat doesn't works fine in HWACCEL_CDSP at the moment
23+
#this item will make mulmat performance comprision easily
24+
enable_q_mulmat = 0
25+
26+
# 0: hwaccel approach through HWACCEL_QNN: offload ggml op to QNN
27+
# 1: hwaccel approach through HWACCEL_QNN_SINGLEGRAPH: mapping entire ggml cgraph to a single QNN graph
28+
# 2: hwaccel approach through HWACCEL_CDSP:offload ggml op to cDSP directly
29+
# HWACCEL_QNN_SINGLEGRAPH not supported at the moment
2330
hwaccel_approach = 2
2431

2532
#hwaccel approach through QNN
@@ -31,10 +38,6 @@ precision_mode = "fp16"
3138

3239
#hwaccel approach through cDSP
3340
[cdsp]
34-
#enable/disable offload mulmat to cDSP
35-
enable_mulmat_cdsp = 1
36-
#enable/disable offload fp32 & quantized type mulmat to cDSP
37-
enable_q_mulmat = 0
3841
#enable/disable rpc ion memory pool
3942
enable_rpc_ion_mempool = 1
4043
#enable/disable rpc dma memory pool

0 commit comments

Comments
 (0)