@@ -127,11 +127,6 @@ static void ggml_zdnn_mul_mat_op(ggml_backend_zdnn_context * ctx, const ggml_ten
127127 const int64_t output_rows = ne1;
128128 const int64_t output_cols = ne0;
129129
130- // TODO: Weights are somehow not going through `ggml_backend_zdnn_buffer_set_tensor` during model loading.
131- // So we need to load the weights here. Remove this when the issue is fixed.
132- // Problem might be residing in `ggml_backend_zdnn_device_supports_buft`.
133- if (weights_extra->ztensor .is_transformed == false ) ggml_zdnn_load_tensor (weights_extra->ztensor , weights->data );
134-
135130 // GGML_LOG_INFO("%s: tensor '%s' tensor dimensions: [%ld, %ld, %ld, %ld] pre_tfm_desc dimensions: [%ld, %ld, %ld, %ld]\n",
136131 // __func__, weights_extra->name,
137132 // weights->ne[3], weights->ne[2], weights->ne[1], weights->ne[0],
@@ -355,6 +350,9 @@ static void ggml_backend_zdnn_buffer_free_buffer(ggml_backend_buffer_t buffer) {
355350
356351 for (const auto & buf_ptr : ctx->buffers ) {
357352 ggml_backend_zdnn_buffer * buf = buf_ptr.get ();
353+
354+ // Free any extra buffer allocated for the tensor. E.g., bias for GGML_OP_MUL_MAT
355+ if (buf->extra != nullptr ) free (buf->extra ->data );
358356 if (buf->ztensor .buffer_size > 0 ) ZDNN_CHECK (zdnn_free_ztensor_buffer (&buf->ztensor ));
359357 }
360358
@@ -432,8 +430,11 @@ static void ggml_backend_zdnn_buffer_set_tensor(ggml_backend_buffer_t buffer, gg
432430 memcpy ((char *)tensor->data + offset, data, size);
433431
434432 ggml_backend_zdnn_buffer * extra = (ggml_backend_zdnn_buffer *)tensor->extra ;
435- if (extra->ztensor .is_transformed ) zdnn_reset_ztensor (&extra->ztensor );
436- ggml_zdnn_load_tensor (extra->ztensor , tensor->data );
433+
434+ // Fixes the LLAMA_SET_ROWS bug
435+ // see: https://github.com/ggml-org/llama.cpp/issues/15414
436+ if (tensor->buffer ->usage == GGML_BACKEND_BUFFER_USAGE_COMPUTE && extra->ztensor .is_transformed ) zdnn_reset_ztensor (&extra->ztensor );
437+ if (extra->ztensor .is_transformed == false ) ggml_zdnn_load_tensor (extra->ztensor , tensor->data );
437438
438439 GGML_UNUSED (buffer);
439440}
@@ -538,29 +539,6 @@ ggml_backend_buffer_type_t ggml_backend_zdnn_buffer_type(void) {
538539 return &ggml_backend_buffer_type_zdnn;
539540}
540541
541- static const char * ggml_backend_zdnn_buffer_from_ptr_type_get_name (ggml_backend_buffer_type_t buft) {
542- return GGML_ZDNN_NAME " _Mapped" ;
543-
544- GGML_UNUSED (buft);
545- }
546-
547- static ggml_backend_buffer_type_t ggml_backend_zdnn_buffer_from_ptr_type (void ) {
548- static ggml_backend_buffer_type ggml_backend_buffer_from_ptr_type_zdnn = {
549- /* .iface = */ {
550- /* .get_name = */ ggml_backend_zdnn_buffer_from_ptr_type_get_name,
551- /* .alloc_buffer = */ ggml_backend_zdnn_buffer_type_alloc_buffer,
552- /* .get_alignment = */ ggml_backend_zdnn_buffer_type_get_alignment,
553- /* .get_max_size = */ NULL ,
554- /* .get_alloc_size = */ NULL , // defaults to ggml_nbytes
555- /* .is_host = */ ggml_backend_zdnn_buffer_type_is_host,
556- },
557- /* .device = */ &g_ggml_backend_zdnn_device,
558- /* .context = */ NULL ,
559- };
560-
561- return &ggml_backend_buffer_from_ptr_type_zdnn;
562- }
563-
564542//
565543// backend
566544//
@@ -648,7 +626,7 @@ static void ggml_backend_zdnn_device_get_props(ggml_backend_dev_t dev, ggml_back
648626 props->caps = (ggml_backend_dev_caps) {
649627 /* .async = */ false ,
650628 /* .host_buffer = */ false ,
651- /* .buffer_from_host_ptr = */ true ,
629+ /* .buffer_from_host_ptr = */ false ,
652630 /* .events = */ false
653631 };
654632}
@@ -679,48 +657,6 @@ static ggml_backend_buffer_type_t ggml_backend_zdnn_device_get_buffer_type(ggml_
679657 GGML_UNUSED (dev);
680658}
681659
682- static ggml_backend_buffer_t ggml_backend_zdnn_device_buffer_from_ptr (ggml_backend_dev_t dev, void * ptr, size_t size, size_t max_tensor_size) {
683- ggml_backend_zdnn_buffer_context * ctx = new ggml_backend_zdnn_buffer_context ();
684-
685- ctx->all_data = ptr;
686- ctx->all_size = size;
687- ctx->owned = false ;
688- ctx->n_buffers = 0 ;
689-
690- const size_t size_page = sysconf (_SC_PAGESIZE);
691-
692- // page-align the data ptr
693- {
694- const uintptr_t offs = (uintptr_t ) ptr % size_page;
695- ptr = (void *)((char *)ptr - offs);
696- size += offs;
697- }
698-
699- size_t size_aligned = size;
700- if ((size_aligned % size_page) != 0 ) {
701- size_aligned += size_page - (size_aligned % size_page);
702- }
703-
704- ggml_backend_zdnn_device_context * ctx_dev = (ggml_backend_zdnn_device_context *)dev->context ;
705-
706- GGML_ASSERT (ctx_dev->zdnn_device >= 0 );
707- int device = ctx_dev->zdnn_device ; GGML_UNUSED (device);
708-
709- std::unique_ptr<ggml_backend_zdnn_buffer> zdnn_buffer = std::make_unique<ggml_backend_zdnn_buffer>();
710- zdnn_buffer->data = ptr;
711- zdnn_buffer->size = size;
712- ctx->buffers .push_back (std::move (zdnn_buffer));
713-
714- GGML_LOG_INFO (" %s: allocated buffer, size = %8.2f MiB\n " ,
715- __func__, size_aligned / 1024.0 / 1024.0 );
716-
717- ++ctx->n_buffers ;
718-
719- return ggml_backend_buffer_init (ggml_backend_zdnn_buffer_from_ptr_type (), ggml_backend_zdnn_buffer_i, ctx, size);
720-
721- GGML_UNUSED (max_tensor_size);
722- }
723-
724660static bool ggml_backend_zdnn_device_supports_op (ggml_backend_dev_t dev, const ggml_tensor * op) {
725661 ggml_backend_zdnn_device_context * ctx_dev = (ggml_backend_zdnn_device_context *) dev->context ;
726662
@@ -729,8 +665,7 @@ static bool ggml_backend_zdnn_device_supports_op(ggml_backend_dev_t dev, const g
729665
730666static bool ggml_backend_zdnn_device_supports_buft (ggml_backend_dev_t dev, ggml_backend_buffer_type_t buft) {
731667 return
732- buft->iface .get_name == ggml_backend_zdnn_buffer_type_get_name ||
733- buft->iface .get_name == ggml_backend_zdnn_buffer_from_ptr_type_get_name;
668+ buft->iface .get_name == ggml_backend_zdnn_buffer_type_get_name;
734669
735670 GGML_UNUSED (dev);
736671}
@@ -744,7 +679,7 @@ static ggml_backend_device_i ggml_backend_zdnn_device_i = {
744679 /* .init_backend = */ ggml_backend_zdnn_device_init,
745680 /* .get_buffer_type = */ ggml_backend_zdnn_device_get_buffer_type,
746681 /* .get_host_buffer_type = */ NULL ,
747- /* .buffer_from_host_ptr = */ ggml_backend_zdnn_device_buffer_from_ptr ,
682+ /* .buffer_from_host_ptr = */ NULL ,
748683 /* .supports_op = */ ggml_backend_zdnn_device_supports_op,
749684 /* .supports_buft = */ ggml_backend_zdnn_device_supports_buft,
750685 /* .offload_op = */ NULL ,
0 commit comments