@@ -5401,7 +5401,13 @@ static ggml_backend_buffer_i ggml_backend_hexagon_buffer_interface = {
54015401
54025402static const char * ggml_backend_hexagon_buffer_type_name (ggml_backend_buffer_type_t buft) {
54035403 GGML_UNUSED (buft);
5404- return " hexagon-buffer" ;
5404+ if ((g_hexagon_appcfg.hwaccel_approach == HWACCEL_CDSP) && (1 == g_hexagon_appcfg.enable_rpc_ion_mempool )) {
5405+ return " hexagon-ion-buffer" ;
5406+ }
5407+ if ((g_hexagon_appcfg.hwaccel_approach == HWACCEL_CDSP) && (1 == g_hexagon_appcfg.enable_rpc_dma_mempool )) {
5408+ return " hexagon-dma-buffer" ;
5409+ }
5410+ return " hexagon-normal-buffer" ;
54055411}
54065412
54075413static ggml_backend_buffer_t ggml_backend_hexagon_buffer_type_alloc_buffer (
@@ -5425,9 +5431,9 @@ static ggml_backend_buffer_t ggml_backend_hexagon_buffer_type_alloc_buffer(
54255431 size_aligned += (size_page - (size_aligned % size_page));
54265432 }
54275433 if ((g_hexagon_appcfg.hwaccel_approach == HWACCEL_CDSP) && (1 == g_hexagon_appcfg.enable_rpc_ion_mempool )) {
5428- GGML_ASSERT (ctx->rpc_mempool_usage <= ctx->rpc_mempool_len );
5434+ GGML_ASSERT (size + ctx->rpc_mempool_usage <= ctx->rpc_mempool_len );
54295435 buffer_ctx->buffer = (static_cast <char *>(ctx->rpc_mempool )) + ctx->rpc_mempool_usage ;
5430- GGMLHEXAGON_LOG_DEBUG (" buffer_ctx->buffer %p" , buffer_ctx->buffer );
5436+ GGMLHEXAGON_LOG_DEBUG (" size %d(%d M), buffer_ctx->buffer %p" , size, size / SIZE_IN_MB , buffer_ctx->buffer );
54315437 GGML_ASSERT (nullptr != buffer_ctx->buffer );
54325438 ctx->rpc_mempool_usage += size_aligned;
54335439 } else {
@@ -5455,6 +5461,10 @@ static size_t ggml_backend_hexagon_buffer_type_get_max_size(ggml_backend_buffer_
54555461 return (2 * (1 << 29 ));
54565462}
54575463
5464+ static bool ggml_backend_buft_is_hexagon (ggml_backend_buffer_type_t buft) {
5465+ return buft->iface .get_name == ggml_backend_hexagon_buffer_type_name;
5466+ }
5467+
54585468static bool ggml_backend_hexagon_buffer_is_host (ggml_backend_buffer_type_t buft) {
54595469 GGML_UNUSED (buft);
54605470 return true ;
@@ -5579,7 +5589,7 @@ static void ggml_backend_hexagon_device_get_memory(ggml_backend_dev_t dev, size_
55795589 rpc_ion_usage = ctx->instance ->get_rpcmem_usage ();
55805590 } else {
55815591 rpc_ion_memsize = ctx->rpc_mempool_capacity ;
5582- rpc_ion_usage = ctx->rpc_mempool_usage ;
5592+ rpc_ion_usage = ctx->rpc_mempool_usage ;
55835593 }
55845594 *total = rpc_ion_memsize;
55855595 *free = (rpc_ion_memsize - rpc_ion_usage);
@@ -5590,6 +5600,10 @@ static void ggml_backend_hexagon_device_get_memory(ggml_backend_dev_t dev, size_
55905600
55915601static enum ggml_backend_dev_type ggml_backend_hexagon_device_get_type (ggml_backend_dev_t dev) {
55925602 struct ggml_backend_hexagon_context * ctx = static_cast <ggml_backend_hexagon_context *>(dev->context );
5603+ if (HWACCEL_CDSP == g_hexagon_appcfg.hwaccel_approach ) {
5604+ return GGML_BACKEND_DEVICE_TYPE_GPU;
5605+ }
5606+
55935607 if (HEXAGON_BACKEND_QNNCPU == ctx->device )
55945608 return GGML_BACKEND_DEVICE_TYPE_ACCEL;
55955609 else if (HEXAGON_BACKEND_QNNGPU == ctx->device )
@@ -5608,10 +5622,15 @@ static void ggml_backend_hexagon_device_get_props(ggml_backend_dev_t dev,
56085622 ggml_backend_hexagon_device_get_memory (dev, &props->memory_free , &props->memory_total );
56095623 props->caps = {
56105624 /* .async = */ false ,
5611- /* .host_buffer = */ false ,
5612- /* .buffer_from_host_ptr = */ true ,
5625+ /* .host_buffer = */ true ,
5626+ /* .buffer_from_host_ptr = */ false ,
56135627 /* .events = */ false ,
56145628 };
5629+
5630+ if ((HWACCEL_CDSP == g_hexagon_appcfg.hwaccel_approach ) && (1 == g_hexagon_appcfg.enable_rpc_ion_mempool )) {
5631+ // don't use system memory in this scenario
5632+ props->caps .host_buffer = false ;
5633+ }
56155634}
56165635
56175636static ggml_backend_t ggml_backend_hexagon_device_init_backend (ggml_backend_dev_t dev, const char * params) {
@@ -5670,7 +5689,7 @@ static ggml_backend_buffer_type_t ggml_backend_hexagon_buffer_type(size_t device
56705689 /* .context = */ &g_hexagon_mgr[device_index],
56715690 };
56725691
5673- if (g_hexagon_appcfg. hwaccel_approach == HWACCEL_CDSP ) {
5692+ if (HWACCEL_CDSP == g_hexagon_appcfg. hwaccel_approach ) {
56745693 // here is the trick:
56755694 // there only 1 backend_device when g_hexagon_appcfg.hwaccel_approach == HWACCEL_CDSP
56765695 // and we need to re-use the g_hexagon_mgr
@@ -5681,6 +5700,60 @@ static ggml_backend_buffer_type_t ggml_backend_hexagon_buffer_type(size_t device
56815700 return &ggml_backend_buffer_type_hexagon;
56825701}
56835702
5703+ static const char * ggml_backend_hexagon_host_buffer_type_name (ggml_backend_buffer_type_t buft) {
5704+ GGML_UNUSED (buft);
5705+ return " Hexagon_Host" ;
5706+ }
5707+
5708+ static const char * ggml_backend_hexagon_host_buffer_name (ggml_backend_buffer_t buffer) {
5709+ GGML_UNUSED (buffer);
5710+ return " Hexagon_Host" ;
5711+ }
5712+
5713+ static void ggml_backend_hexagon_host_buffer_free (ggml_backend_buffer_t buffer) {
5714+ ggml_aligned_free (buffer->context , 0 );
5715+ }
5716+
5717+ static void * ggml_hexagon_host_malloc (ggml_backend_buffer_type_t buft, size_t size) {
5718+ return ggml_aligned_malloc (size);
5719+ }
5720+
5721+ static ggml_backend_buffer_t ggml_backend_hexagon_host_buffer_type_alloc_buffer (ggml_backend_buffer_type_t buft, size_t size) {
5722+ void * host_ptr = ggml_hexagon_host_malloc (buft, size);
5723+
5724+ if (nullptr == host_ptr) {
5725+ return ggml_backend_buft_alloc_buffer (ggml_backend_cpu_buffer_type (), size);
5726+ }
5727+
5728+ ggml_backend_buffer_t buffer = ggml_backend_cpu_buffer_from_ptr (host_ptr, size);
5729+ buffer->buft = buft;
5730+ buffer->iface .free_buffer = ggml_backend_hexagon_host_buffer_free;
5731+
5732+ return buffer;
5733+ }
5734+
5735+ static ggml_backend_buffer_type_t ggml_backend_hexagon_host_buffer_type () {
5736+ static struct ggml_backend_buffer_type ggml_backend_hexagon_buffer_type_host = {
5737+ /* .iface = */ {
5738+ /* .get_name = */ ggml_backend_hexagon_host_buffer_type_name,
5739+ /* .alloc_buffer = */ ggml_backend_hexagon_host_buffer_type_alloc_buffer,
5740+ /* .get_alignment = */ ggml_backend_cpu_buffer_type ()->iface .get_alignment ,
5741+ /* .get_max_size = */ nullptr ,
5742+ /* .get_alloc_size = */ ggml_backend_cpu_buffer_type ()->iface .get_alloc_size ,
5743+ /* .is_host = */ ggml_backend_cpu_buffer_type ()->iface .is_host ,
5744+ },
5745+ /* .device = */ ggml_backend_reg_dev_get (ggml_backend_hexagon_reg (), 0 ),
5746+ /* .context = */ nullptr ,
5747+ };
5748+
5749+ return &ggml_backend_hexagon_buffer_type_host;
5750+ }
5751+
5752+ static ggml_backend_buffer_type_t ggml_backend_hexagon_device_get_host_buffer_type (ggml_backend_dev_t dev) {
5753+ GGML_UNUSED (dev);
5754+ return ggml_backend_hexagon_host_buffer_type ();
5755+ }
5756+
56845757static ggml_backend_buffer_type_t ggml_backend_hexagon_device_get_buffer_type (ggml_backend_dev_t dev) {
56855758 ggml_backend_hexagon_context * ctx = (ggml_backend_hexagon_context *)dev->context ;
56865759 return ggml_backend_hexagon_buffer_type (ctx->device );
@@ -5695,7 +5768,14 @@ static ggml_backend_buffer_t ggml_backend_hexagon_device_buffer_from_host_ptr(gg
56955768}
56965769
56975770static bool ggml_backend_hexagon_device_supports_buft (ggml_backend_dev_t dev, ggml_backend_buffer_type_t buft) {
5698- GGML_UNUSED (dev);
5771+ if ((HWACCEL_CDSP == g_hexagon_appcfg.hwaccel_approach ) && (1 == g_hexagon_appcfg.enable_rpc_ion_mempool )) {
5772+ if (ggml_backend_buft_is_hexagon (buft)) {
5773+ ggml_backend_hexagon_context * dev_ctx = (ggml_backend_hexagon_context *)dev->context ;
5774+ ggml_backend_hexagon_context * buft_ctx = (ggml_backend_hexagon_context *)buft->context ;
5775+ return buft_ctx->device == dev_ctx->device ;
5776+ }
5777+ }
5778+
56995779 return ggml_backend_buft_is_host (buft);
57005780}
57015781
@@ -5707,7 +5787,7 @@ static struct ggml_backend_device_i ggml_backend_hexagon_device_interface = {
57075787 /* .get_props = */ ggml_backend_hexagon_device_get_props,
57085788 /* .init_backend = */ ggml_backend_hexagon_device_init_backend,
57095789 /* .get_buffer_type = */ ggml_backend_hexagon_device_get_buffer_type,
5710- /* .get_host_buffer_type = */ nullptr ,
5790+ /* .get_host_buffer_type = */ ggml_backend_hexagon_device_get_host_buffer_type ,
57115791 /* .buffer_from_host_ptr = */ ggml_backend_hexagon_device_buffer_from_host_ptr,
57125792 /* .supports_op = */ nullptr ,
57135793 /* .supports_buft = */ ggml_backend_hexagon_device_supports_buft,
@@ -5849,6 +5929,11 @@ ggml_backend_reg_t ggml_backend_hexagon_reg() {
58495929 } else {
58505930 ggml_backend_hexagon_device_interface.supports_op = ggmlhexagon_can_handle_op_through_qnn;
58515931 }
5932+ if (g_hexagon_appcfg.hwaccel_approach == HWACCEL_CDSP && 1 == g_hexagon_appcfg.enable_rpc_ion_mempool ) {
5933+ // don't use system memory in this scenario
5934+ ggml_backend_hexagon_device_interface.get_host_buffer_type = nullptr ;
5935+ }
5936+
58525937 GGMLHEXAGON_LOG_DEBUG (" create backend device for device %d" , i);
58535938 ggml_backend_dev_t dev = new ggml_backend_device{
58545939 /* .iface = */ ggml_backend_hexagon_device_interface,
0 commit comments