Skip to content

Commit 5134179

Browse files
author
zhouwg
committed
ggml-qnn: refine code according to self code-review and make code more clear
1 parent ae6402e commit 5134179

File tree

2 files changed

+53
-54
lines changed

2 files changed

+53
-54
lines changed

ggml/src/ggml-qnn/ggml-qnn.cpp

Lines changed: 47 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -14,17 +14,17 @@
1414
* section-6 Hexagon DSP helper function
1515
* section-7 backend helper function / class
1616
* section-8 implementation of ggml-hexagon backend according to specification in ggml backend subsystem
17-
* section-9 implementation of general approach through QNN and Hexagon DSP
17+
* section-9 implementation of hwaccel approach through QNN and Hexagon DSP
1818
*
1919
* currently provide following ggml op' implementation through QNN:
2020
* - GGML_OP_ADD/GGML_OP_SUB/GGML_OP_MUL/GGML_OP_DIV/GGML_OP_LOG/GGML_OP_SQRT:
21-
* this is a simple skeleton, can expand other ggml ops according to expertise
21+
* this is a simple hwaccel skeleton, can expand other ggml ops according to expertise
2222
* - GGML_OP_MUL_MAT:
23-
* this is a complicated skeleton, can expand other ggml ops accordingly
23+
* this is a complicated hwaccel skeleton, can expand other ggml ops accordingly
2424
*
2525
* currently provide following ggml op' implementation through Hexagon DSP:
2626
* - GGML_OP_ADD & GGML_OP_MUL_MAT:
27-
* this is a skeleton, can expand other ggml ops accordingly
27+
* this is a hwaccel skeleton, can expand other ggml ops accordingly
2828
*
2929
* Permission is hereby granted, free of charge, to any person obtaining a copy
3030
* of this software and associated documentation files (the "Software"), to
@@ -230,7 +230,7 @@ static void ggmlqnn_compute_diag_mask(ggml_backend_qnn_context * ctx, ggml_ten
230230

231231
#define GGMLQNN_CHECK_PARAMS(ctx, src0, src1, dst) \
232232
do { \
233-
if (g_qnn_params.inference_approach != DIRECT_USE_CDSP) { \
233+
if (g_qnn_params.hwaccel_approach != HWACCEL_CDSP) { \
234234
if (!ggmlqnn_is_valid_params((ctx), (src0), (src1), (dst))) { \
235235
return; \
236236
} \
@@ -270,12 +270,12 @@ enum qnn_profile_level {
270270
};
271271

272272
//0: general approach through QNN:offload ggmlop to QNN
273-
//1: general approach through Hexagon cDSP:offload ggmlop to Hexagon cDSP directly
274-
//2: special approach through QNN:mapping entire ggml cgraph to a single QNN graph
275-
enum inference_approach {
276-
QNN_GENERAL = 0,
277-
DIRECT_USE_CDSP = 1,
278-
QNN_SINGLEGRAPH = 2,
273+
//1: special approach through QNN-SINGLEGRAPH:mapping entire ggml cgraph to a single QNN graph
274+
//2: general approach through Hexagon cDSP:offload ggmlop to Hexagon cDSP directly
275+
enum hwaccel_approach_type {
276+
HWACCEL_QNN = 0,
277+
HWACCEL_QNN_SINGLEGRAPH = 1,
278+
HWACCEL_CDSP = 2,
279279
};
280280

281281
enum hexagon_dsp_type {
@@ -362,7 +362,7 @@ struct qnn_parameter {
362362
int hvx_threads;
363363
int vtcm_size_in_mb;
364364
int enable_dlbc;
365-
int inference_approach; // 0: QNN_GENERAL 1: DIRECT_USE_CDSP 2: QNN_SINGELGRAPH
365+
int hwaccel_approach; // 0: HWACCEL_QNN 1: HWACCEL_QNN_SINGLEGRAPH 2: HWACCEL_CDSP
366366
int qnn_backend; // 0: QNN-CPU backend 1: QNN-GPU backend 2: QNN-NPU backend
367367
int enable_mulmat_cdsp; // enable/disable offload mulmat to cDSP
368368
int enable_q_mulmat; // enable/disable offload fp32 & all quantized type mulmat to cDSP
@@ -382,8 +382,8 @@ static struct qnn_parameter g_qnn_params = {
382382
.hvx_threads = 4,
383383
.vtcm_size_in_mb = 8,
384384
.enable_dlbc = 1,
385-
.inference_approach = 0,
386-
.qnn_backend = 2, //default is QNN-NPU backend
385+
.hwaccel_approach = HWACCEL_CDSP,
386+
.qnn_backend = QNN_BACKEND_NPU,
387387
.enable_mulmat_cdsp = 0,
388388
.enable_q_mulmat = 0,
389389
.qnn_cfgfilename = "ggml-qnn.cfg",
@@ -1578,13 +1578,12 @@ static void ggmlhexagon_set_rpc_latency(int domain, int qos, int latency) {
15781578

15791579
if (remote_handle_control) {
15801580
struct remote_rpc_control_latency data;
1581-
#if 1
1582-
data.enable = RPC_PM_QOS;
1583-
data.latency = 300;
1584-
#else
1585-
data.enable = RPC_POLL_QOS;
1586-
data.latency = 1000;
1587-
#endif
1581+
/*
1582+
qos | latency
1583+
-----------------------
1584+
RPC_PM_QOS | 300
1585+
RPC_POLL_QOS | 1000
1586+
*/
15881587
data.enable = qos;
15891588
data.latency = latency;
15901589
hexagon_error = remote_handle64_control(DSPRPC_GET_DSP_INFO, DSPRPC_CONTROL_LATENCY, (void*)&data, sizeof(data));
@@ -1926,7 +1925,7 @@ static int ggmlhexagon_init_dsp(ggml_backend_qnn_context * ctx) {
19261925
}
19271926

19281927
if (-1 == domain_id) {
1929-
if (NULL != domain_type) {
1928+
if (nullptr != domain_type) {
19301929
if ((strcmp(domain_type, "NSP") != 0 && strcmp(domain_type, "HPASS") != 0)) {
19311930
GGMLQNN_LOG_WARN("invalid domain_type %s. possible values are NSP or HPASS", domain_type);
19321931
goto bail;
@@ -2188,16 +2187,16 @@ static const char * ggmlqnn_get_htparch_desc(size_t htp_arch) {
21882187
}
21892188
}
21902189

2191-
static const char * ggmlqnn_get_inference_approach_name(int inference_approach) {
2192-
switch (inference_approach) {
2193-
case QNN_GENERAL:
2194-
return "QNN_GENERAL";
2195-
case DIRECT_USE_CDSP:
2196-
return "DIRECT_USE_CDSP";
2197-
case QNN_SINGLEGRAPH:
2198-
return "QNN_SINGLEGRAPH";
2190+
static const char * ggmlqnn_get_hwaccel_approach_name(int hwaccle_approach) {
2191+
switch (hwaccle_approach) {
2192+
case HWACCEL_QNN:
2193+
return "HWACCEL_QNN";
2194+
case HWACCEL_QNN_SINGLEGRAPH:
2195+
return "HWACCEL_QNN_SINGLEGRAPH";
2196+
case HWACCEL_CDSP:
2197+
return "HWACCEL_CDSP";
21992198
default:
2200-
return "unknown approach";
2199+
return "unknown hwaccel approach";
22012200
}
22022201
}
22032202

@@ -3996,7 +3995,7 @@ void qnn_instance::htp_enter_performance_mode() {
39963995
}
39973996

39983997
static void ggmlqnn_set_runtime_path(size_t device, const std::string & path) {
3999-
if ((QNN_BACKEND_NPU == device) || (DIRECT_USE_CDSP == g_qnn_params.inference_approach)) {
3998+
if ((QNN_BACKEND_NPU == device) || (HWACCEL_CDSP == g_qnn_params.hwaccel_approach)) {
40003999
if (0 == setenv("LD_LIBRARY_PATH",
40014000
(path +
40024001
":/vendor/dsp/cdsp:/vendor/lib64:/vendor/dsp/dsp:/vendor/dsp/images").c_str(),
@@ -4224,7 +4223,7 @@ static void ggmlqnn_load_cfg() {
42244223
qnncfg_instance.get_intvalue("general", "enable_perf", g_qnn_params.enable_perf, 0);
42254224
qnncfg_instance.get_intvalue("general", "print_tensors_info", g_qnn_params.print_tensors_info, 0);
42264225
qnncfg_instance.get_intvalue("general", "dump_op_info", g_qnn_params.dump_op_info, 0);
4227-
qnncfg_instance.get_intvalue("general", "inference_approach", g_qnn_params.inference_approach, 0);
4226+
qnncfg_instance.get_intvalue("general", "hwaccel_approach", g_qnn_params.hwaccel_approach, 0);
42284227
qnncfg_instance.get_intvalue("general", "qnn_backend", g_qnn_params.qnn_backend, 2);
42294228
qnncfg_instance.get_intvalue("qnn", "hvx_threads", g_qnn_params.hvx_threads, 4);
42304229
qnncfg_instance.get_intvalue("qnn", "vtcm_size_in_mb", g_qnn_params.vtcm_size_in_mb, 8);
@@ -4233,8 +4232,8 @@ static void ggmlqnn_load_cfg() {
42334232
qnncfg_instance.get_intvalue("cdsp", "enable_mulmat_cdsp", g_qnn_params.enable_mulmat_cdsp, 0);
42344233
qnncfg_instance.get_intvalue("cdsp", "enable_q_mulmat", g_qnn_params.enable_q_mulmat, 0);
42354234
GGMLQNN_LOG_INFO("print_qnn_internal_log=%d", g_qnn_params.print_qnn_internal_log);
4236-
GGMLQNN_LOG_INFO("inference_approach=%d(%s)", g_qnn_params.inference_approach,
4237-
ggmlqnn_get_inference_approach_name(g_qnn_params.inference_approach));
4235+
GGMLQNN_LOG_INFO("hwaccel_approach=%d(%s)", g_qnn_params.hwaccel_approach,
4236+
ggmlqnn_get_hwaccel_approach_name(g_qnn_params.hwaccel_approach));
42384237
GGMLQNN_LOG_INFO("qnn_backend=%d", g_qnn_params.qnn_backend);
42394238
GGMLQNN_LOG_INFO("npu inference precision mode=%s", precision_mode.c_str());
42404239
GGMLQNN_LOG_INFO("qnn runtime lib path=%s", g_qnn_params.qnn_runtimelib_path);
@@ -4325,7 +4324,7 @@ static bool ggmlqnn_can_handle_op(const ggml_backend_qnn_context * ctx, const st
43254324
return true;
43264325
}
43274326

4328-
if (DIRECT_USE_CDSP == g_qnn_params.inference_approach) {
4327+
if (HWACCEL_CDSP == g_qnn_params.hwaccel_approach) {
43294328
return ggmlhexagon_can_handle_op(ctx, op_tensor);
43304329
}
43314330

@@ -4686,7 +4685,7 @@ static void ggml_backend_qnn_free(ggml_backend_t backend) {
46864685
ggml_backend_qnn_context * ctx = (ggml_backend_qnn_context *)backend->context;
46874686
GGMLQNN_LOG_DEBUG("device idx %d, name:%s", ctx->device, g_qnn_mgr[ctx->device].name);
46884687

4689-
if (DIRECT_USE_CDSP == g_qnn_params.inference_approach) {
4688+
if (HWACCEL_CDSP == g_qnn_params.hwaccel_approach) {
46904689
ggmlhexagon_close_cdsp(ctx);
46914690
}
46924691

@@ -4787,7 +4786,7 @@ static void ggml_backend_qnn_device_get_memory(ggml_backend_dev_t dev, size_t *
47874786
} else if (QNN_BACKEND_NPU == ctx->device) {
47884787
size_t rpc_ion_memsize = 0;
47894788
size_t rpc_ion_usage = 0;
4790-
if (DIRECT_USE_CDSP != g_qnn_params.inference_approach) {
4789+
if (HWACCEL_CDSP != g_qnn_params.hwaccel_approach) {
47914790
rpc_ion_memsize = ctx->instance->get_rpcmem_capacity();
47924791
rpc_ion_usage = ctx->instance->get_rpcmem_usage();
47934792
} else {
@@ -5013,8 +5012,8 @@ ggml_backend_reg_t ggml_backend_qnn_reg() {
50135012

50145013
//case-2: normal scenario, such as llama-cli or UI applicaton
50155014
ggmlqnn_load_cfg();
5016-
GGMLQNN_LOG_INFO("inference approach=%d(%s)", g_qnn_params.inference_approach,
5017-
ggmlqnn_get_inference_approach_name(g_qnn_params.inference_approach));
5015+
GGMLQNN_LOG_INFO("inference approach=%d(%s)", g_qnn_params.hwaccel_approach,
5016+
ggmlqnn_get_hwaccel_approach_name(g_qnn_params.hwaccel_approach));
50185017
GGMLQNN_LOG_INFO("user's specified qnn_backend=%d", g_qnn_params.qnn_backend);
50195018
GGMLQNN_LOG_INFO("user's specified qnn runtime lib path=%s", g_qnn_params.qnn_runtimelib_path);
50205019
if (g_qnn_params.qnn_backend >= GGML_QNN_MAX_DEVICES) {
@@ -5053,7 +5052,7 @@ ggml_backend_reg_t ggml_backend_qnn_reg() {
50535052
}
50545053

50555054
const char * ggml_backend_qnn_get_devname(size_t dev_num) {
5056-
if (DIRECT_USE_CDSP == g_qnn_params.inference_approach) {
5055+
if (HWACCEL_CDSP == g_qnn_params.hwaccel_approach) {
50575056
if (dev_num == QNN_BACKEND_GGML)
50585057
return "ggml";
50595058
else
@@ -5076,8 +5075,8 @@ const char * ggml_backend_qnn_get_devname(size_t dev_num) {
50765075

50775076
static qnn_instance * ggmlqnn_init_qnn_instance(size_t device, const char * qnn_lib_path) {
50785077
int result = 0;
5079-
GGMLQNN_LOG_INFO("inference approach=%d(%s)", g_qnn_params.inference_approach,
5080-
ggmlqnn_get_inference_approach_name(g_qnn_params.inference_approach));
5078+
GGMLQNN_LOG_INFO("inference approach=%d(%s)", g_qnn_params.hwaccel_approach,
5079+
ggmlqnn_get_hwaccel_approach_name(g_qnn_params.hwaccel_approach));
50815080

50825081
qnn_instance * instance = nullptr;
50835082
instance = new qnn_instance(qnn_lib_path, g_qnn_mgr[device].lib, "");
@@ -5141,7 +5140,7 @@ ggml_backend_t ggml_backend_qnn_init(size_t device, const char * qnn_lib_path) {
51415140
}
51425141

51435142
//don't initialize QNN when inference approach is offload ggml op to Hexagon cDSP directly
5144-
if (DIRECT_USE_CDSP != g_qnn_params.inference_approach) {
5143+
if (HWACCEL_CDSP != g_qnn_params.hwaccel_approach) {
51455144
qnn_instance * instance = ggmlqnn_init_qnn_instance(device, qnn_lib_path);
51465145
if (nullptr == instance)
51475146
return nullptr;
@@ -5157,14 +5156,14 @@ ggml_backend_t ggml_backend_qnn_init(size_t device, const char * qnn_lib_path) {
51575156
};
51585157

51595158
g_qnn_mgr[device].backend = qnn_backend;
5160-
if (DIRECT_USE_CDSP == g_qnn_params.inference_approach) {
5159+
if (HWACCEL_CDSP == g_qnn_params.hwaccel_approach) {
51615160
int result = ggmlhexagon_init_dsp(&g_qnn_mgr[device]);
51625161
if (0 != result) {
51635162
GGMLQNN_LOG_INFO("init hexagon dsp failure");
51645163
ggml_backend_qnn_free(qnn_backend);
51655164
return nullptr;
51665165
}
5167-
//ensure test-backend-ops get the correct backend name when inference approach is 1(DIRECT_USE_CDSP)
5166+
//ensure test-backend-ops get the correct backend name when inference approach is 1(HWACCEL_CDSP)
51685167
memcpy(g_qnn_mgr[device].name, "Hexagon-cDSP", strlen("Hexagon-cDSP"));
51695168
}
51705169

@@ -5237,7 +5236,7 @@ static void ggmlqnn_compute_elementwise(ggml_backend_qnn_context * ctx, ggml_ten
52375236
qnn_perf op_perf = qnn_perf(graph_name);
52385237
op_perf.start();
52395238

5240-
if (DIRECT_USE_CDSP == g_qnn_params.inference_approach) {
5239+
if (HWACCEL_CDSP == g_qnn_params.hwaccel_approach) {
52415240
ggmlhexagon_compute(ctx, op);
52425241
op_perf.info();
52435242
return;
@@ -5629,7 +5628,7 @@ static void ggmlqnn_compute_mul_mat(ggml_backend_qnn_context * ctx, ggml_tensor
56295628
qnn_perf op_perf = qnn_perf(graph_name);
56305629
op_perf.start();
56315630

5632-
if (DIRECT_USE_CDSP == g_qnn_params.inference_approach) {
5631+
if (HWACCEL_CDSP == g_qnn_params.hwaccel_approach) {
56335632
ggmlhexagon_compute(ctx, op);
56345633
op_perf.info();
56355634
return;

scripts/ggml-qnn.cfg

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,19 +17,19 @@ print_tensors_info = 0
1717
# enable/disable dump op info in handle_op
1818
dump_op_info = 0
1919

20-
# 0: general approach through QNN
21-
# 1: general approach through Hexagon cDSP
22-
# 2: special approach through QNN: mapping entire ggml cgraph to QNN graph
23-
inference_approach = 1
20+
# 0: hwaccel approach through QNN
21+
# 1: hwaccel approach through QNN-SINGLEGRAPH: mapping entire ggml cgraph to a single QNN graph
22+
# 2: hwaccel approach through Hexagon cDSP
23+
hwaccel_approach = 2
2424

25-
#inference approach through QNN
25+
#hwaccel approach through QNN
2626
[qnn]
2727
hvx_threads = 4
2828
vtcm_size_in_mb = 8
2929
enable_dlbc = 1
3030
precision_mode = "fp16"
3131

32-
#inference approach through cDSP
32+
#hwaccel approach through cDSP
3333
[cdsp]
3434
#enable/disable offload mulmat to cDSP
3535
enable_mulmat_cdsp = 0

0 commit comments

Comments
 (0)