Skip to content

Commit 910c625

Browse files
committed
metal : take into account the actual allocated memory of the tensor
ggml-ci
1 parent 86a1951 commit 910c625

File tree

1 file changed

+8
-1
lines changed

1 file changed

+8
-1
lines changed

ggml/src/ggml-metal/ggml-metal-common.cpp

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#include "ggml-metal-common.h"
22

33
#include "ggml-impl.h"
4+
#include "ggml-backend-impl.h"
45

56
#include <vector>
67

@@ -51,11 +52,17 @@ static ggml_mem_range ggml_mem_range_from_tensor(const ggml_tensor * tensor, ggm
5152
ggml_mem_range mrp;
5253

5354
if (tensor->buffer) {
55+
// take the actual allocated size
56+
// this can be larger than the tensor size if the buffer type allocates extra memory
57+
// ref: https://github.com/ggml-org/llama.cpp/pull/15966
58+
ggml_backend_buffer_type_t buft = tensor->buffer->buft;
59+
const size_t alloc_size = buft->iface.get_alloc_size ? buft->iface.get_alloc_size(buft, tensor) : ggml_nbytes(tensor);
60+
5461
// when the tensor is allocated, use the actual memory address range of the buffer
5562
mrp = {
5663
/*.pb =*/ (uint64_t) tensor->buffer,
5764
/*.p0 =*/ (uint64_t) tensor->data,
58-
/*.p1 =*/ (uint64_t) tensor->data + ggml_nbytes(tensor),
65+
/*.p1 =*/ (uint64_t) tensor->data + alloc_size,
5966
/*.pt =*/ pt,
6067
};
6168
} else {

0 commit comments

Comments
 (0)