Skip to content

Commit b415949

Browse files
author
zhouwg
committed
ggml-hexagon: refine pinned-memory feature
1 parent 9ea9302 commit b415949

File tree

2 files changed

+51
-4
lines changed

2 files changed

+51
-4
lines changed

ggml/src/ggml-hexagon/ggml-hexagon.cpp

Lines changed: 48 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -255,6 +255,28 @@ enum qcom_chipset_soc_model {
255255
#endif
256256
};
257257

258+
//borrowed from Android source code, might not be accurate
259+
enum ion_heap_ids {
260+
INVALID_HEAP_ID = -1,
261+
ION_CP_MM_HEAP_ID = 8,
262+
ION_SECURE_HEAP_ID = 9,
263+
ION_SECURE_DISPLAY_HEAP_ID = 10,
264+
ION_CP_MFC_HEAP_ID = 12,
265+
ION_SPSS_HEAP_ID = 13,
266+
ION_CP_WB_HEAP_ID = 16,
267+
ION_CAMERA_HEAP_ID = 20,
268+
ION_SYSTEM_CONTIG_HEAP_ID = 21,
269+
ION_ADSP_HEAP_ID = 22,
270+
ION_PIL1_HEAP_ID = 23,
271+
ION_SF_HEAP_ID = 24,
272+
ION_SYSTEM_HEAP_ID = 25,
273+
ION_PIL2_HEAP_ID = 26,
274+
ION_QSECOM_HEAP_ID = 27,
275+
ION_AUDIO_HEAP_ID = 28,
276+
ION_MM_FIRMWARE_HEAP_ID = 29,
277+
ION_HEAP_ID_RESERVED = 31
278+
};
279+
258280
struct qcom_socinfo {
259281
uint32_t soc_model;
260282
size_t htp_arch;
@@ -315,6 +337,7 @@ struct hexagon_appcfg_t {
315337
int print_tensors_info; // enable/disable print tensors info in op function
316338
int dump_op_info; // enable/disable dump op info in handle_op
317339
int enable_q_mulmat; // enable/disable offload quantized mulmat
340+
int enable_pinned_memory; // enable/disable pinned-memory feature
318341
int precision_mode; // 0: default 1:fp16
319342
int hvx_threads;
320343
int vtcm_size_in_mb;
@@ -339,6 +362,7 @@ static struct hexagon_appcfg_t g_hexagon_appcfg = {
339362
.print_tensors_info = 0,
340363
.dump_op_info = 0,
341364
.enable_q_mulmat = 0,
365+
.enable_pinned_memory = 0,
342366
.precision_mode = 0,
343367
.hvx_threads = 4,
344368
.vtcm_size_in_mb = 8,
@@ -1775,6 +1799,7 @@ static void ggmlhexagon_load_cfg() {
17751799
hexagoncfg_instance.get_intvalue("general", "enable_profiler", g_hexagon_appcfg.enable_profiler, 0);
17761800
hexagoncfg_instance.get_intvalue("general", "profiler_duration", g_hexagon_appcfg.profiler_duration, 5);
17771801
hexagoncfg_instance.get_intvalue("general", "profiler_counts", g_hexagon_appcfg.profiler_counts, 100);
1802+
hexagoncfg_instance.get_intvalue("general", "enable_pinned_memory", g_hexagon_appcfg.enable_pinned_memory, 0);
17781803

17791804
hexagoncfg_instance.get_intvalue("qnn", "hvx_threads", g_hexagon_appcfg.hvx_threads, 4);
17801805
hexagoncfg_instance.get_intvalue("qnn", "vtcm_size_in_mb", g_hexagon_appcfg.vtcm_size_in_mb, 8);
@@ -1860,6 +1885,7 @@ static void ggmlhexagon_print_running_timestamp(ggml_backend_hexagon_context * c
18601885
ggmlhexagon_get_hwaccel_approach_name(g_hexagon_appcfg.hwaccel_approach));
18611886
GGMLHEXAGON_LOG_INFO("hexagon_backend: %d(%s)", g_hexagon_appcfg.hexagon_backend,
18621887
ggml_backend_hexagon_get_devname(g_hexagon_appcfg.hexagon_backend));
1888+
GGMLHEXAGON_LOG_INFO("enable pinned_memory: %s", g_hexagon_appcfg.enable_pinned_memory ? "YES" : "NO");
18631889
ggmlhexagon_get_timestring(timestamp);
18641890
if (HWACCEL_CDSP == g_hexagon_appcfg.hwaccel_approach) {
18651891
GGMLHEXAGON_LOG_INFO("offload quantize GGML_OP_MUL_MAT: %s", g_hexagon_appcfg.enable_q_mulmat ? "YES" : "NO");
@@ -6139,18 +6165,33 @@ static const char * ggml_backend_hexagon_host_buffer_name(ggml_backend_buffer_t
61396165
}
61406166

61416167
static void ggml_backend_hexagon_host_buffer_free(ggml_backend_buffer_t buffer) {
6142-
ggml_aligned_free(buffer->context, 0);
6168+
if (0 == g_hexagon_appcfg.enable_pinned_memory) {
6169+
ggml_aligned_free(buffer->context, 0);
6170+
} else {
6171+
rpcmem_free(buffer->context);
6172+
}
61436173
}
61446174

61456175
static void * ggml_hexagon_host_malloc(ggml_backend_buffer_type_t buft, size_t size) {
6146-
return ggml_aligned_malloc(size);
6176+
if (0 == g_hexagon_appcfg.enable_pinned_memory) {
6177+
return ggml_aligned_malloc(size);
6178+
} else {
6179+
//TODO: there are no corresponding APIs in existing Hexagon SDK, here try to re-use camera ion heap as a pinned memory
6180+
return rpcmem_alloc(RPCMEM_HEAP_ID_SYSTEM, ION_CAMERA_HEAP_ID | RPCMEM_TRY_MAP_STATIC, size);
6181+
}
61476182
}
61486183

61496184
static ggml_backend_buffer_t ggml_backend_hexagon_host_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) {
61506185
void * host_ptr = ggml_hexagon_host_malloc(buft, size);
61516186

61526187
if (nullptr == host_ptr) {
6188+
GGMLHEXAGON_LOG_INFO("failed to alloc host buffer");
6189+
//TODO: use assertion here before find a better approach to release "correct" host buffer
6190+
// in function ggml_backend_hexagon_host_buffer_free
6191+
GGML_ASSERT(nullptr != host_ptr);
61536192
return ggml_backend_buft_alloc_buffer(ggml_backend_cpu_buffer_type(), size);
6193+
} else {
6194+
GGMLHEXAGON_LOG_INFO("succeed to alloc host buffer %d MiB", size / SIZE_IN_MB);
61546195
}
61556196

61566197
ggml_backend_buffer_t buffer = ggml_backend_cpu_buffer_from_ptr(host_ptr, size);
@@ -6356,9 +6397,12 @@ ggml_backend_reg_t ggml_backend_hexagon_reg() {
63566397
} else {
63576398
ggml_backend_hexagon_device_interface.supports_op = ggmlhexagon_can_handle_op_through_qnn;
63586399
}
6400+
63596401
if ((HWACCEL_CDSP == g_hexagon_appcfg.hwaccel_approach) && (1 == g_hexagon_appcfg.enable_rpc_ion_mempool)) {
6360-
//don't use system memory in this scenario
6361-
ggml_backend_hexagon_device_interface.get_host_buffer_type = nullptr;
6402+
if (0 == g_hexagon_appcfg.enable_pinned_memory) {
6403+
//don't use system memory in this scenario
6404+
ggml_backend_hexagon_device_interface.get_host_buffer_type = nullptr;
6405+
}
63626406
}
63636407

63646408
GGMLHEXAGON_LOG_DEBUG("create backend device for device %d", i);

scripts/ggml-hexagon.cfg

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,9 @@ profiler_counts = 200
8080
# ensure enable_perf = 1 when set enable_profiler = 1;
8181

8282

83+
#enable/disable pinned-memory feature
84+
enable_pinned_memory = 0
85+
8386
#hwaccel approach through QNN(offload ggml op to QNN-NPU)
8487
[qnn]
8588
# enable/disable QNN SDK's internal log, this will very helpful for troubleshooting in HWACCEL_QNN approach

0 commit comments

Comments
 (0)