Skip to content

Commit c528447

Browse files
author
zhouwg
committed
ggml-hexagon: release ggml-hexagon v1.00
1 parent 3c5817e commit c528447

File tree

2 files changed

+31
-22
lines changed

2 files changed

+31
-22
lines changed

ggml/src/ggml-hexagon/ggml-hexagon.cpp

Lines changed: 28 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,6 @@
128128
// =================================================================================================
129129
class qnn_instance;
130130
struct ggml_backend_hexagon_context;
131-
132131
static void ggmlhexagon_probe_dspinfo(ggml_backend_hexagon_context * ctx);
133132

134133
#if 0//def NDEBUG
@@ -320,6 +319,7 @@ struct hexagon_appcfg_t {
320319
int enable_rpc_dma_mempool; // enable/disable rpc dma memory pool
321320
const char * cfgfilename;
322321
const char * runtimelib_path;
322+
char ggml_hexagon_version[GGMLHEXAGON_TMPBUF_LEN];
323323
};
324324

325325
static struct hexagon_appcfg_t g_hexagon_appcfg = {
@@ -345,6 +345,7 @@ static struct hexagon_appcfg_t g_hexagon_appcfg = {
345345
#elif defined(_WIN32)
346346
.qnn_runtimelib_path = "C:\\",
347347
#endif
348+
.ggml_hexagon_version = {"1.00"},
348349
};
349350

350351
//file:///opt/qcom/aistack/qairt/2.31.0.250130/docs/QNN/general/overview.html#tbl-supported-snapdragon-devices
@@ -855,15 +856,16 @@ static void ggmlhexagon_print_running_timestamp(ggml_backend_hexagon_context * c
855856
char timestamp[GGMLHEXAGON_TMPBUF_LEN];
856857
memset(timestamp, 0, GGMLHEXAGON_TMPBUF_LEN);
857858

858-
GGMLHEXAGON_LOG_INFO("hwaccel approach is %d(%s)", g_hexagon_appcfg.hwaccel_approach,
859+
GGMLHEXAGON_LOG_INFO("ggml_hexagon_version: %s", g_hexagon_appcfg.ggml_hexagon_version);
860+
GGMLHEXAGON_LOG_INFO("hwaccel approach: %d(%s)", g_hexagon_appcfg.hwaccel_approach,
859861
ggmlhexagon_get_hwaccel_approach_name(g_hexagon_appcfg.hwaccel_approach));
860-
GGMLHEXAGON_LOG_INFO("hexagon_backend=%d(%s)", g_hexagon_appcfg.hexagon_backend,
861-
ggml_backend_hexagon_get_devname(g_hexagon_appcfg.hexagon_backend));
862+
GGMLHEXAGON_LOG_INFO("hexagon_backend: %d(%s)", g_hexagon_appcfg.hexagon_backend,
863+
ggml_backend_hexagon_get_devname(g_hexagon_appcfg.hexagon_backend));
862864
ggmlhexagon_get_timestring(timestamp);
863865
if (HWACCEL_CDSP == g_hexagon_appcfg.hwaccel_approach) {
864866
GGMLHEXAGON_LOG_INFO("offload quantize GGML_OP_MUL_MAT: %s", g_hexagon_appcfg.enable_q_mulmat ? "YES" : "NO");
865-
GGMLHEXAGON_LOG_INFO("using rpc ion memory pool: %s", g_hexagon_appcfg.enable_rpc_ion_mempool ? "YES" : "NO");
866-
GGMLHEXAGON_LOG_INFO("using rpc dma memory pool: %s", g_hexagon_appcfg.enable_rpc_dma_mempool ? "YES" : "NO");
867+
GGMLHEXAGON_LOG_INFO("using rpc ion memory pool: %s", g_hexagon_appcfg.enable_rpc_ion_mempool ? "YES" : "NO");
868+
GGMLHEXAGON_LOG_INFO("using rpc dma memory pool: %s", g_hexagon_appcfg.enable_rpc_dma_mempool ? "YES" : "NO");
867869
ggmlhexagon_probe_dspinfo(ctx);
868870
} else {
869871
GGMLHEXAGON_LOG_INFO("offload quantize GGML_OP_MUL_MAT: %s", g_hexagon_appcfg.enable_q_mulmat ? "YES" : "NO");
@@ -1440,6 +1442,8 @@ static void ggmlhexagon_load_cfg() {
14401442
GGMLHEXAGON_LOG_INFO("%s", tmposs.str().c_str());
14411443
});
14421444
std::string precision_mode;
1445+
std::string ggml_hexagon_version;
1446+
qnncfg_instance.get_stringvalue("general", "ggml_hexagon_version", ggml_hexagon_version, "1.00");
14431447
qnncfg_instance.get_intvalue("general", "print_qnn_internal_log", g_hexagon_appcfg.print_qnn_internal_log, 0);
14441448
qnncfg_instance.get_intvalue("general", "enable_perf", g_hexagon_appcfg.enable_perf, 1);
14451449
qnncfg_instance.get_intvalue("general", "print_tensors_info", g_hexagon_appcfg.print_tensors_info, 0);
@@ -1453,6 +1457,8 @@ static void ggmlhexagon_load_cfg() {
14531457
qnncfg_instance.get_stringvalue("qnn", "precision_mode", precision_mode, "fp32");
14541458
qnncfg_instance.get_intvalue("cdsp", "enable_rpc_ion_mempool", g_hexagon_appcfg.enable_rpc_ion_mempool, 1);
14551459
qnncfg_instance.get_intvalue("cdsp", "enable_rpc_dma_mempool", g_hexagon_appcfg.enable_rpc_dma_mempool, 0);
1460+
GGMLHEXAGON_LOG_INFO("ggml_hexagon_version=%s", ggml_hexagon_version.c_str());
1461+
memcpy(g_hexagon_appcfg.ggml_hexagon_version, ggml_hexagon_version.c_str(), strlen(ggml_hexagon_version.c_str()));
14561462
GGMLHEXAGON_LOG_INFO("hwaccel_approach=%d(%s)", g_hexagon_appcfg.hwaccel_approach,
14571463
ggmlhexagon_get_hwaccel_approach_name(g_hexagon_appcfg.hwaccel_approach));
14581464
GGMLHEXAGON_LOG_INFO("hexagon_backend=%d(%s)", g_hexagon_appcfg.hexagon_backend,
@@ -1479,6 +1485,11 @@ static bool ggmlhexagon_check_valid_appcfg() {
14791485
GGMLHEXAGON_LOG_INFO("hwaccel_approach HWACCEL_CDSP must match with hexagon_backend HEXAGON_BACKEND_CDSP");
14801486
is_valid_appcfg = false;
14811487
}
1488+
1489+
if ((1 == g_hexagon_appcfg.enable_rpc_ion_mempool) && (1 == g_hexagon_appcfg.enable_rpc_dma_mempool)) {
1490+
GGMLHEXAGON_LOG_INFO("rpc ion mempool and rpc dma mempool cannot be enabled at the same time");
1491+
is_valid_appcfg = false;
1492+
}
14821493
}
14831494

14841495
if (!is_valid_appcfg) {
@@ -4719,6 +4730,10 @@ static void ggmlhexagon_init_rpcmempool(ggml_backend_hexagon_context * ctx) {
47194730
remote_register_buf(ctx->rpc_mempool, ctx->rpc_mempool_len, ctx->rpc_mempool_handle);
47204731
}
47214732

4733+
if ((g_hexagon_appcfg.hwaccel_approach == HWACCEL_CDSP) && (1 == g_hexagon_appcfg.enable_rpc_dma_mempool)) {
4734+
//TODO
4735+
}
4736+
47224737
return;
47234738
}
47244739

@@ -4790,7 +4805,7 @@ static void ggmlhexagon_deinit_cdsp(ggml_backend_hexagon_context * ctx) {
47904805

47914806
ggmlhexagon_deinit_rpcmempool(ctx);
47924807

4793-
ctx->domain_id = -1;
4808+
ctx->domain_id = -1;
47944809
GGMLHEXAGON_LOG_INFO("leave %s", __func__);
47954810
}
47964811

@@ -5042,12 +5057,8 @@ static bool ggmlhexagon_can_handle_op_through_cdsp(ggml_backend_dev_t dev, const
50425057

50435058
const struct ggml_tensor * src0 = op_tensor->src[0];
50445059
const struct ggml_tensor * src1 = op_tensor->src[1];
5045-
const int64_t ne00 = src0->ne[0];
5046-
const uint32_t src0_rank = ggml_n_dims(src0);
5047-
const uint32_t src1_rank = ggml_n_dims(src1);
50485060
switch (op_tensor->op) {
50495061
case GGML_OP_ADD:
5050-
case GGML_OP_SUB:
50515062
{
50525063
if (!ggml_are_same_shape(src0, src1)) {
50535064
return false;
@@ -5409,7 +5420,7 @@ static ggml_backend_buffer_t ggml_backend_hexagon_buffer_type_alloc_buffer(
54095420
size_page = systeminfo.dwPageSize;
54105421
#endif
54115422
size_t size_aligned = size;
5412-
if ((size_aligned % size_page) != 0) {
5423+
if (0 != (size_aligned % size_page)) {
54135424
size_aligned += (size_page - (size_aligned % size_page));
54145425
}
54155426
if ((g_hexagon_appcfg.hwaccel_approach == HWACCEL_CDSP) && (1 == g_hexagon_appcfg.enable_rpc_ion_mempool)) {
@@ -5423,10 +5434,10 @@ static ggml_backend_buffer_t ggml_backend_hexagon_buffer_type_alloc_buffer(
54235434
}
54245435
buffer_ctx->buffer_size = size_aligned;
54255436
if (nullptr == buffer_ctx->buffer) {
5426-
GGMLHEXAGON_LOG_WARN("%s: failed to allocate %d MiB\n", __func__, size / (1 << 20));
5437+
GGMLHEXAGON_LOG_WARN("%s: failed to allocate %d MiB\n", __func__, size / SIZE_IN_MB);
54275438
return nullptr;
54285439
} else {
5429-
//GGMLHEXAGON_LOG_DEBUG("%s: succeed to allocate %d MiB\n", __func__, size / (1 << 20));
5440+
//GGMLHEXAGON_LOG_DEBUG("%s: succeed to allocate %d MiB\n", __func__, size / SIZE_IN_MB);
54305441
}
54315442

54325443
return ggml_backend_buffer_init(buft, ggml_backend_hexagon_buffer_interface, buffer_ctx, size);
@@ -5478,7 +5489,7 @@ static void ggml_backend_hexagon_free(ggml_backend_t backend) {
54785489
g_hexagon_mgr[ctx->device].instance = nullptr;
54795490
}
54805491

5481-
if (g_hexagon_mgr[ctx->device].backend != nullptr) {
5492+
if (nullptr != g_hexagon_mgr[ctx->device].backend) {
54825493
//print timestamp and dsp information before deinit cdsp, useful for troubleshooting
54835494
ggmlhexagon_print_running_timestamp(ctx);
54845495
if (HWACCEL_CDSP == g_hexagon_appcfg.hwaccel_approach) {
@@ -5595,7 +5606,6 @@ static enum ggml_backend_dev_type ggml_backend_hexagon_device_get_type(ggml_back
55955606

55965607
static void ggml_backend_hexagon_device_get_props(ggml_backend_dev_t dev,
55975608
struct ggml_backend_dev_props * props) {
5598-
GGMLHEXAGON_LOG_DEBUG("enter %s\n", __func__);
55995609
props->name = ggml_backend_hexagon_device_get_name(dev);
56005610
props->description = ggml_backend_hexagon_device_get_description(dev);
56015611
props->type = ggml_backend_hexagon_device_get_type(dev);
@@ -5858,6 +5868,7 @@ ggml_backend_reg_t ggml_backend_hexagon_reg() {
58585868
}
58595869
ctx->devices.push_back(dev);
58605870

5871+
//here is the trick: make cDSP rpc memory pool happy because ggml's backend subsystem need this
58615872
if (g_hexagon_appcfg.hwaccel_approach == HWACCEL_CDSP) {
58625873
GGML_ASSERT(g_hexagon_appcfg.hexagon_backend == HEXAGON_BACKEND_CDSP);
58635874
int result = ggmlhexagon_init_dsp(&g_hexagon_mgr[HEXAGON_BACKEND_CDSP]);
@@ -5888,7 +5899,7 @@ const char * ggml_backend_hexagon_get_devname(size_t dev_num) {
58885899
return "HEXAGON_BACKEND_CDSP";
58895900
}
58905901

5891-
//fall through
5902+
//here is the trick: fall through for various scenarios
58925903
switch (dev_num) {
58935904
case HEXAGON_BACKEND_QNNCPU:
58945905
return "HEXAGON_BACKEND_QNN_CPU";

scripts/ggml-hexagon.cfg

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
[general]
2+
version = "1.00"
23
#0: HEXAGON_BACKEND_QNNCPU
34
#1: HEXAGON_BACKEND_QNNGPU
45
#2: HEXAGON_BACKEND_QNNNPU / HEXAGON_BACKEND_CDSP
@@ -7,17 +8,14 @@ hexagon_backend = 2
78

89
# enable/disable QNN's internal log
910
print_qnn_internal_log = 0
10-
1111
# enable/disable perf of op function
1212
enable_perf = 1
13-
1413
# enable/disable print tensors info in op function
1514
print_tensors_info = 0
16-
1715
# enable/disable dump op info in handle_op
1816
dump_op_info = 0
1917

20-
#enable/disable offload fp32 & quantized type mulmat
18+
#enable/disable offload quantized type mulmat
2119
#quatized type mulmat works fine in HWACCEL_QNN at the moment
2220
#quatized type mulmat doesn't works fine in HWACCEL_CDSP at the moment
2321
#this item will make mulmat performance comprision easily
@@ -39,6 +37,6 @@ precision_mode = "fp16"
3937
#hwaccel approach through cDSP
4038
[cdsp]
4139
#enable/disable rpc ion memory pool
42-
enable_rpc_ion_mempool = 1
40+
enable_rpc_ion_mempool = 0
4341
#enable/disable rpc dma memory pool
4442
enable_rpc_dma_mempool = 0

0 commit comments

Comments
 (0)