diff --git a/ggml/src/ggml-metal/ggml-metal-device.m b/ggml/src/ggml-metal/ggml-metal-device.m index 9983640b43eb8..1623b17de6b04 100644 --- a/ggml/src/ggml-metal/ggml-metal-device.m +++ b/ggml/src/ggml-metal/ggml-metal-device.m @@ -817,6 +817,7 @@ bool ggml_metal_device_supports_op(ggml_metal_device_t dev, const struct ggml_te // if false, the Metal buffer data is allocated in private GPU memory and is not shared with the host bool is_shared; + bool owned; // multiple buffers are used only to avoid the maximum buffer size limitation when using mmap int n_buffers; @@ -949,6 +950,7 @@ ggml_metal_buffer_t ggml_metal_buffer_init(ggml_metal_device_t dev, size_t size, if (shared) { res->all_data = ggml_metal_host_malloc(size_aligned); res->is_shared = true; + res->owned = true; } else { // dummy, non-NULL value - we'll populate this after creating the Metal buffer below res->all_data = (void *) 0x000000400ULL; @@ -1007,6 +1009,7 @@ ggml_metal_buffer_t ggml_metal_buffer_map(ggml_metal_device_t dev, void * ptr, s res->all_size = size; res->is_shared = true; + res->owned = false; res->n_buffers = 0; @@ -1100,7 +1103,7 @@ void ggml_metal_buffer_free(ggml_metal_buffer_t buf) { ggml_metal_buffer_rset_free(buf); - if (buf->is_shared) { + if (buf->is_shared && buf->owned) { #if TARGET_OS_OSX vm_deallocate((vm_map_t)mach_task_self(), (vm_address_t)buf->all_data, buf->all_size); #else