Skip to content

Commit 6380d6a

Browse files
authored
ggml-zdnn: rm user mapped buffers (ggml-org#15965)
* ggml-zdnn: rm user mapped buffers Signed-off-by: Aaron Teo <[email protected]> * ggml-zdnn: rm dead code Signed-off-by: Aaron Teo <[email protected]> * ggml-zdnn: attempt to fix missing extra data buffer free Signed-off-by: Aaron Teo <[email protected]> --------- Signed-off-by: Aaron Teo <[email protected]>
1 parent aa0c461 commit 6380d6a

File tree

1 file changed

+11
-76
lines changed

1 file changed

+11
-76
lines changed

ggml/src/ggml-zdnn/ggml-zdnn.cpp

Lines changed: 11 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -127,11 +127,6 @@ static void ggml_zdnn_mul_mat_op(ggml_backend_zdnn_context * ctx, const ggml_ten
127127
const int64_t output_rows = ne1;
128128
const int64_t output_cols = ne0;
129129

130-
// TODO: Weights are somehow not going through `ggml_backend_zdnn_buffer_set_tensor` during model loading.
131-
// So we need to load the weights here. Remove this when the issue is fixed.
132-
// Problem might be residing in `ggml_backend_zdnn_device_supports_buft`.
133-
if (weights_extra->ztensor.is_transformed == false) ggml_zdnn_load_tensor(weights_extra->ztensor, weights->data);
134-
135130
// GGML_LOG_INFO("%s: tensor '%s' tensor dimensions: [%ld, %ld, %ld, %ld] pre_tfm_desc dimensions: [%ld, %ld, %ld, %ld]\n",
136131
// __func__, weights_extra->name,
137132
// weights->ne[3], weights->ne[2], weights->ne[1], weights->ne[0],
@@ -355,6 +350,9 @@ static void ggml_backend_zdnn_buffer_free_buffer(ggml_backend_buffer_t buffer) {
355350

356351
for (const auto & buf_ptr : ctx->buffers) {
357352
ggml_backend_zdnn_buffer * buf = buf_ptr.get();
353+
354+
// Free any extra buffer allocated for the tensor. E.g., bias for GGML_OP_MUL_MAT
355+
if (buf->extra != nullptr) free(buf->extra->data);
358356
if (buf->ztensor.buffer_size > 0) ZDNN_CHECK(zdnn_free_ztensor_buffer(&buf->ztensor));
359357
}
360358

@@ -432,8 +430,11 @@ static void ggml_backend_zdnn_buffer_set_tensor(ggml_backend_buffer_t buffer, gg
432430
memcpy((char *)tensor->data + offset, data, size);
433431

434432
ggml_backend_zdnn_buffer * extra = (ggml_backend_zdnn_buffer *)tensor->extra;
435-
if (extra->ztensor.is_transformed) zdnn_reset_ztensor(&extra->ztensor);
436-
ggml_zdnn_load_tensor(extra->ztensor, tensor->data);
433+
434+
// Fixes the LLAMA_SET_ROWS bug
435+
// see: https://github.com/ggml-org/llama.cpp/issues/15414
436+
if (tensor->buffer->usage == GGML_BACKEND_BUFFER_USAGE_COMPUTE && extra->ztensor.is_transformed) zdnn_reset_ztensor(&extra->ztensor);
437+
if (extra->ztensor.is_transformed == false) ggml_zdnn_load_tensor(extra->ztensor, tensor->data);
437438

438439
GGML_UNUSED(buffer);
439440
}
@@ -538,29 +539,6 @@ ggml_backend_buffer_type_t ggml_backend_zdnn_buffer_type(void) {
538539
return &ggml_backend_buffer_type_zdnn;
539540
}
540541

541-
static const char * ggml_backend_zdnn_buffer_from_ptr_type_get_name(ggml_backend_buffer_type_t buft) {
542-
return GGML_ZDNN_NAME "_Mapped";
543-
544-
GGML_UNUSED(buft);
545-
}
546-
547-
static ggml_backend_buffer_type_t ggml_backend_zdnn_buffer_from_ptr_type(void) {
548-
static ggml_backend_buffer_type ggml_backend_buffer_from_ptr_type_zdnn = {
549-
/* .iface = */ {
550-
/* .get_name = */ ggml_backend_zdnn_buffer_from_ptr_type_get_name,
551-
/* .alloc_buffer = */ ggml_backend_zdnn_buffer_type_alloc_buffer,
552-
/* .get_alignment = */ ggml_backend_zdnn_buffer_type_get_alignment,
553-
/* .get_max_size = */ NULL,
554-
/* .get_alloc_size = */ NULL, // defaults to ggml_nbytes
555-
/* .is_host = */ ggml_backend_zdnn_buffer_type_is_host,
556-
},
557-
/* .device = */ &g_ggml_backend_zdnn_device,
558-
/* .context = */ NULL,
559-
};
560-
561-
return &ggml_backend_buffer_from_ptr_type_zdnn;
562-
}
563-
564542
//
565543
// backend
566544
//
@@ -648,7 +626,7 @@ static void ggml_backend_zdnn_device_get_props(ggml_backend_dev_t dev, ggml_back
648626
props->caps = (ggml_backend_dev_caps) {
649627
/* .async = */ false,
650628
/* .host_buffer = */ false,
651-
/* .buffer_from_host_ptr = */ true,
629+
/* .buffer_from_host_ptr = */ false,
652630
/* .events = */ false
653631
};
654632
}
@@ -679,48 +657,6 @@ static ggml_backend_buffer_type_t ggml_backend_zdnn_device_get_buffer_type(ggml_
679657
GGML_UNUSED(dev);
680658
}
681659

682-
static ggml_backend_buffer_t ggml_backend_zdnn_device_buffer_from_ptr(ggml_backend_dev_t dev, void * ptr, size_t size, size_t max_tensor_size) {
683-
ggml_backend_zdnn_buffer_context * ctx = new ggml_backend_zdnn_buffer_context();
684-
685-
ctx->all_data = ptr;
686-
ctx->all_size = size;
687-
ctx->owned = false;
688-
ctx->n_buffers = 0;
689-
690-
const size_t size_page = sysconf(_SC_PAGESIZE);
691-
692-
// page-align the data ptr
693-
{
694-
const uintptr_t offs = (uintptr_t) ptr % size_page;
695-
ptr = (void *)((char *)ptr - offs);
696-
size += offs;
697-
}
698-
699-
size_t size_aligned = size;
700-
if ((size_aligned % size_page) != 0) {
701-
size_aligned += size_page - (size_aligned % size_page);
702-
}
703-
704-
ggml_backend_zdnn_device_context * ctx_dev = (ggml_backend_zdnn_device_context *)dev->context;
705-
706-
GGML_ASSERT(ctx_dev->zdnn_device >= 0);
707-
int device = ctx_dev->zdnn_device; GGML_UNUSED(device);
708-
709-
std::unique_ptr<ggml_backend_zdnn_buffer> zdnn_buffer = std::make_unique<ggml_backend_zdnn_buffer>();
710-
zdnn_buffer->data = ptr;
711-
zdnn_buffer->size = size;
712-
ctx->buffers.push_back(std::move(zdnn_buffer));
713-
714-
GGML_LOG_INFO("%s: allocated buffer, size = %8.2f MiB\n",
715-
__func__, size_aligned / 1024.0 / 1024.0);
716-
717-
++ctx->n_buffers;
718-
719-
return ggml_backend_buffer_init(ggml_backend_zdnn_buffer_from_ptr_type(), ggml_backend_zdnn_buffer_i, ctx, size);
720-
721-
GGML_UNUSED(max_tensor_size);
722-
}
723-
724660
static bool ggml_backend_zdnn_device_supports_op(ggml_backend_dev_t dev, const ggml_tensor * op) {
725661
ggml_backend_zdnn_device_context * ctx_dev = (ggml_backend_zdnn_device_context *) dev->context;
726662

@@ -729,8 +665,7 @@ static bool ggml_backend_zdnn_device_supports_op(ggml_backend_dev_t dev, const g
729665

730666
static bool ggml_backend_zdnn_device_supports_buft(ggml_backend_dev_t dev, ggml_backend_buffer_type_t buft) {
731667
return
732-
buft->iface.get_name == ggml_backend_zdnn_buffer_type_get_name ||
733-
buft->iface.get_name == ggml_backend_zdnn_buffer_from_ptr_type_get_name;
668+
buft->iface.get_name == ggml_backend_zdnn_buffer_type_get_name;
734669

735670
GGML_UNUSED(dev);
736671
}
@@ -744,7 +679,7 @@ static ggml_backend_device_i ggml_backend_zdnn_device_i = {
744679
/* .init_backend = */ ggml_backend_zdnn_device_init,
745680
/* .get_buffer_type = */ ggml_backend_zdnn_device_get_buffer_type,
746681
/* .get_host_buffer_type = */ NULL,
747-
/* .buffer_from_host_ptr = */ ggml_backend_zdnn_device_buffer_from_ptr,
682+
/* .buffer_from_host_ptr = */ NULL,
748683
/* .supports_op = */ ggml_backend_zdnn_device_supports_op,
749684
/* .supports_buft = */ ggml_backend_zdnn_device_supports_buft,
750685
/* .offload_op = */ NULL,

0 commit comments

Comments
 (0)