@@ -255,6 +255,28 @@ enum qcom_chipset_soc_model {
255255#endif
256256};
257257
258+ // borrowed from Android source code, might not be accurate
259+ enum ion_heap_ids {
260+ INVALID_HEAP_ID = -1 ,
261+ ION_CP_MM_HEAP_ID = 8 ,
262+ ION_SECURE_HEAP_ID = 9 ,
263+ ION_SECURE_DISPLAY_HEAP_ID = 10 ,
264+ ION_CP_MFC_HEAP_ID = 12 ,
265+ ION_SPSS_HEAP_ID = 13 ,
266+ ION_CP_WB_HEAP_ID = 16 ,
267+ ION_CAMERA_HEAP_ID = 20 ,
268+ ION_SYSTEM_CONTIG_HEAP_ID = 21 ,
269+ ION_ADSP_HEAP_ID = 22 ,
270+ ION_PIL1_HEAP_ID = 23 ,
271+ ION_SF_HEAP_ID = 24 ,
272+ ION_SYSTEM_HEAP_ID = 25 ,
273+ ION_PIL2_HEAP_ID = 26 ,
274+ ION_QSECOM_HEAP_ID = 27 ,
275+ ION_AUDIO_HEAP_ID = 28 ,
276+ ION_MM_FIRMWARE_HEAP_ID = 29 ,
277+ ION_HEAP_ID_RESERVED = 31
278+ };
279+
258280struct qcom_socinfo {
259281 uint32_t soc_model;
260282 size_t htp_arch;
@@ -315,6 +337,7 @@ struct hexagon_appcfg_t {
315337 int print_tensors_info; // enable/disable print tensors info in op function
316338 int dump_op_info; // enable/disable dump op info in handle_op
317339 int enable_q_mulmat; // enable/disable offload quantized mulmat
340+ int enable_pinned_memory; // enable/disable pinned-memory feature
318341 int precision_mode; // 0: default 1:fp16
319342 int hvx_threads;
320343 int vtcm_size_in_mb;
@@ -339,6 +362,7 @@ static struct hexagon_appcfg_t g_hexagon_appcfg = {
339362 .print_tensors_info = 0 ,
340363 .dump_op_info = 0 ,
341364 .enable_q_mulmat = 0 ,
365+ .enable_pinned_memory = 0 ,
342366 .precision_mode = 0 ,
343367 .hvx_threads = 4 ,
344368 .vtcm_size_in_mb = 8 ,
@@ -1775,6 +1799,7 @@ static void ggmlhexagon_load_cfg() {
17751799 hexagoncfg_instance.get_intvalue (" general" , " enable_profiler" , g_hexagon_appcfg.enable_profiler , 0 );
17761800 hexagoncfg_instance.get_intvalue (" general" , " profiler_duration" , g_hexagon_appcfg.profiler_duration , 5 );
17771801 hexagoncfg_instance.get_intvalue (" general" , " profiler_counts" , g_hexagon_appcfg.profiler_counts , 100 );
1802+ hexagoncfg_instance.get_intvalue (" general" , " enable_pinned_memory" , g_hexagon_appcfg.enable_pinned_memory , 0 );
17781803
17791804 hexagoncfg_instance.get_intvalue (" qnn" , " hvx_threads" , g_hexagon_appcfg.hvx_threads , 4 );
17801805 hexagoncfg_instance.get_intvalue (" qnn" , " vtcm_size_in_mb" , g_hexagon_appcfg.vtcm_size_in_mb , 8 );
@@ -1860,6 +1885,7 @@ static void ggmlhexagon_print_running_timestamp(ggml_backend_hexagon_context * c
18601885 ggmlhexagon_get_hwaccel_approach_name (g_hexagon_appcfg.hwaccel_approach ));
18611886 GGMLHEXAGON_LOG_INFO (" hexagon_backend: %d(%s)" , g_hexagon_appcfg.hexagon_backend ,
18621887 ggml_backend_hexagon_get_devname (g_hexagon_appcfg.hexagon_backend ));
1888+ GGMLHEXAGON_LOG_INFO (" enable pinned_memory: %s" , g_hexagon_appcfg.enable_pinned_memory ? " YES" : " NO" );
18631889 ggmlhexagon_get_timestring (timestamp);
18641890 if (HWACCEL_CDSP == g_hexagon_appcfg.hwaccel_approach ) {
18651891 GGMLHEXAGON_LOG_INFO (" offload quantize GGML_OP_MUL_MAT: %s" , g_hexagon_appcfg.enable_q_mulmat ? " YES" : " NO" );
@@ -6139,18 +6165,33 @@ static const char * ggml_backend_hexagon_host_buffer_name(ggml_backend_buffer_t
61396165}
61406166
61416167static void ggml_backend_hexagon_host_buffer_free (ggml_backend_buffer_t buffer) {
6142- ggml_aligned_free (buffer->context , 0 );
6168+ if (0 == g_hexagon_appcfg.enable_pinned_memory ) {
6169+ ggml_aligned_free (buffer->context , 0 );
6170+ } else {
6171+ rpcmem_free (buffer->context );
6172+ }
61436173}
61446174
61456175static void * ggml_hexagon_host_malloc (ggml_backend_buffer_type_t buft, size_t size) {
6146- return ggml_aligned_malloc (size);
6176+ if (0 == g_hexagon_appcfg.enable_pinned_memory ) {
6177+ return ggml_aligned_malloc (size);
6178+ } else {
6179+ // TODO: there are no corresponding APIs in existing Hexagon SDK, here try to re-use camera ion heap as a pinned memory
6180+ return rpcmem_alloc (RPCMEM_HEAP_ID_SYSTEM, ION_CAMERA_HEAP_ID | RPCMEM_TRY_MAP_STATIC, size);
6181+ }
61476182}
61486183
61496184static ggml_backend_buffer_t ggml_backend_hexagon_host_buffer_type_alloc_buffer (ggml_backend_buffer_type_t buft, size_t size) {
61506185 void * host_ptr = ggml_hexagon_host_malloc (buft, size);
61516186
61526187 if (nullptr == host_ptr) {
6188+ GGMLHEXAGON_LOG_INFO (" failed to alloc host buffer" );
6189+ // TODO: use assertion here before find a better approach to release "correct" host buffer
6190+ // in function ggml_backend_hexagon_host_buffer_free
6191+ GGML_ASSERT (nullptr != host_ptr);
61536192 return ggml_backend_buft_alloc_buffer (ggml_backend_cpu_buffer_type (), size);
6193+ } else {
6194+ GGMLHEXAGON_LOG_INFO (" succeed to alloc host buffer %d MiB" , size / SIZE_IN_MB);
61546195 }
61556196
61566197 ggml_backend_buffer_t buffer = ggml_backend_cpu_buffer_from_ptr (host_ptr, size);
@@ -6356,9 +6397,12 @@ ggml_backend_reg_t ggml_backend_hexagon_reg() {
63566397 } else {
63576398 ggml_backend_hexagon_device_interface.supports_op = ggmlhexagon_can_handle_op_through_qnn;
63586399 }
6400+
63596401 if ((HWACCEL_CDSP == g_hexagon_appcfg.hwaccel_approach ) && (1 == g_hexagon_appcfg.enable_rpc_ion_mempool )) {
6360- // don't use system memory in this scenario
6361- ggml_backend_hexagon_device_interface.get_host_buffer_type = nullptr ;
6402+ if (0 == g_hexagon_appcfg.enable_pinned_memory ) {
6403+ // don't use system memory in this scenario
6404+ ggml_backend_hexagon_device_interface.get_host_buffer_type = nullptr ;
6405+ }
63626406 }
63636407
63646408 GGMLHEXAGON_LOG_DEBUG (" create backend device for device %d" , i);
0 commit comments