@@ -5401,7 +5401,13 @@ static ggml_backend_buffer_i ggml_backend_hexagon_buffer_interface = {
54015401
54025402static const char * ggml_backend_hexagon_buffer_type_name (ggml_backend_buffer_type_t buft) {
54035403 GGML_UNUSED (buft);
5404- return " hexagon-buffer" ;
5404+ if ((g_hexagon_appcfg.hwaccel_approach == HWACCEL_CDSP) && (1 == g_hexagon_appcfg.enable_rpc_ion_mempool )) {
5405+ return " hexagon-ion-buffer" ;
5406+ }
5407+ if ((g_hexagon_appcfg.hwaccel_approach == HWACCEL_CDSP) && (1 == g_hexagon_appcfg.enable_rpc_dma_mempool )) {
5408+ return " hexagon-dma-buffer" ;
5409+ }
5410+ return " hexagon-normal-buffer" ;
54055411}
54065412
54075413static ggml_backend_buffer_t ggml_backend_hexagon_buffer_type_alloc_buffer (
@@ -5425,9 +5431,9 @@ static ggml_backend_buffer_t ggml_backend_hexagon_buffer_type_alloc_buffer(
54255431 size_aligned += (size_page - (size_aligned % size_page));
54265432 }
54275433 if ((g_hexagon_appcfg.hwaccel_approach == HWACCEL_CDSP) && (1 == g_hexagon_appcfg.enable_rpc_ion_mempool )) {
5428- GGML_ASSERT (ctx->rpc_mempool_usage <= ctx->rpc_mempool_len );
5434+ GGML_ASSERT (size + ctx->rpc_mempool_usage <= ctx->rpc_mempool_len );
54295435 buffer_ctx->buffer = (static_cast <char *>(ctx->rpc_mempool )) + ctx->rpc_mempool_usage ;
5430- GGMLHEXAGON_LOG_DEBUG (" buffer_ctx->buffer %p" , buffer_ctx->buffer );
5436+ GGMLHEXAGON_LOG_DEBUG (" size %d(%d M), buffer_ctx->buffer %p" , size, size / SIZE_IN_MB , buffer_ctx->buffer );
54315437 GGML_ASSERT (nullptr != buffer_ctx->buffer );
54325438 ctx->rpc_mempool_usage += size_aligned;
54335439 } else {
@@ -5455,6 +5461,10 @@ static size_t ggml_backend_hexagon_buffer_type_get_max_size(ggml_backend_buffer_
54555461 return (2 * (1 << 29 ));
54565462}
54575463
5464+ static bool ggml_backend_buft_is_hexagon (ggml_backend_buffer_type_t buft) {
5465+ return buft->iface .get_name == ggml_backend_hexagon_buffer_type_name;
5466+ }
5467+
54585468static bool ggml_backend_hexagon_buffer_is_host (ggml_backend_buffer_type_t buft) {
54595469 GGML_UNUSED (buft);
54605470 return true ;
@@ -5590,6 +5600,9 @@ static void ggml_backend_hexagon_device_get_memory(ggml_backend_dev_t dev, size_
55905600
55915601static enum ggml_backend_dev_type ggml_backend_hexagon_device_get_type (ggml_backend_dev_t dev) {
55925602 struct ggml_backend_hexagon_context * ctx = static_cast <ggml_backend_hexagon_context *>(dev->context );
5603+ if (g_hexagon_appcfg.hwaccel_approach == HWACCEL_CDSP) {
5604+ return GGML_BACKEND_DEVICE_TYPE_GPU;
5605+ }
55935606 if (HEXAGON_BACKEND_QNNCPU == ctx->device )
55945607 return GGML_BACKEND_DEVICE_TYPE_ACCEL;
55955608 else if (HEXAGON_BACKEND_QNNGPU == ctx->device )
@@ -5608,10 +5621,13 @@ static void ggml_backend_hexagon_device_get_props(ggml_backend_dev_t dev,
56085621 ggml_backend_hexagon_device_get_memory (dev, &props->memory_free , &props->memory_total );
56095622 props->caps = {
56105623 /* .async = */ false ,
5611- /* .host_buffer = */ false ,
5612- /* .buffer_from_host_ptr = */ true ,
5624+ /* .host_buffer = */ true ,
5625+ /* .buffer_from_host_ptr = */ false ,
56135626 /* .events = */ false ,
56145627 };
5628+ if ((HWACCEL_CDSP == g_hexagon_appcfg.hwaccel_approach ) && (1 == g_hexagon_appcfg.enable_rpc_ion_mempool )) {
5629+ props->caps .host_buffer = false ;
5630+ }
56155631}
56165632
56175633static ggml_backend_t ggml_backend_hexagon_device_init_backend (ggml_backend_dev_t dev, const char * params) {
@@ -5670,7 +5686,7 @@ static ggml_backend_buffer_type_t ggml_backend_hexagon_buffer_type(size_t device
56705686 /* .context = */ &g_hexagon_mgr[device_index],
56715687 };
56725688
5673- if (g_hexagon_appcfg. hwaccel_approach == HWACCEL_CDSP ) {
5689+ if (HWACCEL_CDSP == g_hexagon_appcfg. hwaccel_approach ) {
56745690 // here is the trick:
56755691 // there only 1 backend_device when g_hexagon_appcfg.hwaccel_approach == HWACCEL_CDSP
56765692 // and we need to re-use the g_hexagon_mgr
@@ -5681,6 +5697,60 @@ static ggml_backend_buffer_type_t ggml_backend_hexagon_buffer_type(size_t device
56815697 return &ggml_backend_buffer_type_hexagon;
56825698}
56835699
5700+ static const char * ggml_backend_hexagon_host_buffer_type_name (ggml_backend_buffer_type_t buft) {
5701+ GGML_UNUSED (buft);
5702+ return " Hexagon_Host" ;
5703+ }
5704+
5705+ static const char * ggml_backend_hexagon_host_buffer_name (ggml_backend_buffer_t buffer) {
5706+ GGML_UNUSED (buffer);
5707+ return " Hexagon_Host" ;
5708+ }
5709+
5710+ static void ggml_backend_hexagon_host_buffer_free (ggml_backend_buffer_t buffer) {
5711+ ggml_aligned_free (buffer->context , 0 );
5712+ }
5713+
5714+ static void * ggml_hexagon_host_malloc (ggml_backend_buffer_type_t buft, size_t size) {
5715+ return ggml_aligned_malloc (size);
5716+ }
5717+
5718+ static ggml_backend_buffer_t ggml_backend_hexagon_host_buffer_type_alloc_buffer (ggml_backend_buffer_type_t buft, size_t size) {
5719+ void * host_ptr = ggml_hexagon_host_malloc (buft, size);
5720+
5721+ if (nullptr == host_ptr) {
5722+ return ggml_backend_buft_alloc_buffer (ggml_backend_cpu_buffer_type (), size);
5723+ }
5724+
5725+ ggml_backend_buffer_t buffer = ggml_backend_cpu_buffer_from_ptr (host_ptr, size);
5726+ buffer->buft = buft;
5727+ buffer->iface .free_buffer = ggml_backend_hexagon_host_buffer_free;
5728+
5729+ return buffer;
5730+ }
5731+
5732+ static ggml_backend_buffer_type_t ggml_backend_hexagon_host_buffer_type () {
5733+ static struct ggml_backend_buffer_type ggml_backend_hexagon_buffer_type_host = {
5734+ /* .iface = */ {
5735+ /* .get_name = */ ggml_backend_hexagon_host_buffer_type_name,
5736+ /* .alloc_buffer = */ ggml_backend_hexagon_host_buffer_type_alloc_buffer,
5737+ /* .get_alignment = */ ggml_backend_cpu_buffer_type ()->iface .get_alignment ,
5738+ /* .get_max_size = */ nullptr ,
5739+ /* .get_alloc_size = */ ggml_backend_cpu_buffer_type ()->iface .get_alloc_size ,
5740+ /* .is_host = */ ggml_backend_cpu_buffer_type ()->iface .is_host ,
5741+ },
5742+ /* .device = */ ggml_backend_reg_dev_get (ggml_backend_hexagon_reg (), 0 ),
5743+ /* .context = */ nullptr ,
5744+ };
5745+
5746+ return &ggml_backend_hexagon_buffer_type_host;
5747+ }
5748+
5749+ static ggml_backend_buffer_type_t ggml_backend_hexagon_device_get_host_buffer_type (ggml_backend_dev_t dev) {
5750+ GGML_UNUSED (dev);
5751+ return ggml_backend_hexagon_host_buffer_type ();
5752+ }
5753+
56845754static ggml_backend_buffer_type_t ggml_backend_hexagon_device_get_buffer_type (ggml_backend_dev_t dev) {
56855755 ggml_backend_hexagon_context * ctx = (ggml_backend_hexagon_context *)dev->context ;
56865756 return ggml_backend_hexagon_buffer_type (ctx->device );
@@ -5695,7 +5765,14 @@ static ggml_backend_buffer_t ggml_backend_hexagon_device_buffer_from_host_ptr(gg
56955765}
56965766
56975767static bool ggml_backend_hexagon_device_supports_buft (ggml_backend_dev_t dev, ggml_backend_buffer_type_t buft) {
5698- GGML_UNUSED (dev);
5768+ if ((HWACCEL_CDSP == g_hexagon_appcfg.hwaccel_approach ) && (1 == g_hexagon_appcfg.enable_rpc_ion_mempool )) {
5769+ if (ggml_backend_buft_is_hexagon (buft)) {
5770+ ggml_backend_hexagon_context * dev_ctx = (ggml_backend_hexagon_context *)dev->context ;
5771+ ggml_backend_hexagon_context * buft_ctx = (ggml_backend_hexagon_context *)buft->context ;
5772+ return buft_ctx->device == dev_ctx->device ;
5773+ }
5774+ }
5775+
56995776 return ggml_backend_buft_is_host (buft);
57005777}
57015778
@@ -5707,7 +5784,7 @@ static struct ggml_backend_device_i ggml_backend_hexagon_device_interface = {
57075784 /* .get_props = */ ggml_backend_hexagon_device_get_props,
57085785 /* .init_backend = */ ggml_backend_hexagon_device_init_backend,
57095786 /* .get_buffer_type = */ ggml_backend_hexagon_device_get_buffer_type,
5710- /* .get_host_buffer_type = */ nullptr ,
5787+ /* .get_host_buffer_type = */ ggml_backend_hexagon_device_get_host_buffer_type ,
57115788 /* .buffer_from_host_ptr = */ ggml_backend_hexagon_device_buffer_from_host_ptr,
57125789 /* .supports_op = */ nullptr ,
57135790 /* .supports_buft = */ ggml_backend_hexagon_device_supports_buft,
@@ -5849,6 +5926,11 @@ ggml_backend_reg_t ggml_backend_hexagon_reg() {
58495926 } else {
58505927 ggml_backend_hexagon_device_interface.supports_op = ggmlhexagon_can_handle_op_through_qnn;
58515928 }
5929+ if (g_hexagon_appcfg.hwaccel_approach == HWACCEL_CDSP && 1 == g_hexagon_appcfg.enable_rpc_ion_mempool ) {
5930+ // don't use system memory in this scenario
5931+ ggml_backend_hexagon_device_interface.get_host_buffer_type = nullptr ;
5932+ }
5933+
58525934 GGMLHEXAGON_LOG_DEBUG (" create backend device for device %d" , i);
58535935 ggml_backend_dev_t dev = new ggml_backend_device{
58545936 /* .iface = */ ggml_backend_hexagon_device_interface,
0 commit comments