Skip to content

Commit 7b0d76b

Browse files
committed
fix memory leak when buffers are reused due to same buffer type appearing multiple times
* make vbuffer allocation follow the same logic as backend_buffer did before
1 parent 059afdb commit 7b0d76b

File tree

1 file changed

+19
-25
lines changed

1 file changed

+19
-25
lines changed

ggml/src/ggml-alloc.c

Lines changed: 19 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -374,28 +374,16 @@ static size_t ggml_dyn_tallocr_max_size(struct ggml_dyn_tallocr * alloc) {
374374
// virtual buffer with contiguous memory range, split into multiple backend buffers (chunks)
375375

376376
struct vbuffer {
377-
ggml_backend_buffer_type_t buft;
378377
ggml_backend_buffer_t chunks[GGML_VBUFFER_MAX_CHUNKS];
379378
};
380379

381-
static struct vbuffer * ggml_vbuffer_new(ggml_backend_buffer_type_t buft) {
382-
struct vbuffer * buf = calloc(1, sizeof(struct vbuffer));
383-
buf->buft = buft;
384-
return buf;
385-
}
386-
387-
static void ggml_vbuffer_free_chunks(struct vbuffer * buf) {
388-
for (int i = 0; i < GGML_VBUFFER_MAX_CHUNKS; ++i) {
389-
ggml_backend_buffer_free(buf->chunks[i]);
390-
buf->chunks[i] = NULL;
391-
}
392-
}
393-
394380
static void ggml_vbuffer_free(struct vbuffer * buf) {
395381
if (buf == NULL) {
396382
return;
397383
}
398-
ggml_vbuffer_free_chunks(buf);
384+
for (int i = 0; i < GGML_VBUFFER_MAX_CHUNKS; ++i) {
385+
ggml_backend_buffer_free(buf->chunks[i]);
386+
}
399387
free(buf);
400388
}
401389

@@ -413,17 +401,22 @@ static size_t ggml_vbuffer_size(struct vbuffer * buf) {
413401
return size;
414402
}
415403

416-
static bool ggml_vbuffer_alloc(struct vbuffer * buf, const struct ggml_dyn_tallocr * talloc, enum ggml_backend_buffer_usage usage) {
404+
static struct vbuffer * ggml_vbuffer_alloc(ggml_backend_buffer_type_t buft, const struct ggml_dyn_tallocr * talloc, enum ggml_backend_buffer_usage usage) {
405+
struct vbuffer * buf = (struct vbuffer *)calloc(1, sizeof(struct vbuffer));
406+
if (buf == NULL) {
407+
return NULL;
408+
}
409+
417410
for (int n = 0; n < talloc->n_chunks; n++) {
418411
size_t chunk_size = talloc->max_size[n];
419-
buf->chunks[n] = ggml_backend_buft_alloc_buffer(buf->buft, chunk_size);
412+
buf->chunks[n] = ggml_backend_buft_alloc_buffer(buft, chunk_size);
420413
if (buf->chunks[n] == NULL) {
421-
ggml_vbuffer_free_chunks(buf);
422-
return false;
414+
ggml_vbuffer_free(buf);
415+
return NULL;
423416
}
424417
ggml_backend_buffer_set_usage(buf->chunks[n], usage);
425418
}
426-
return true;
419+
return buf;
427420
}
428421

429422
static void ggml_vbuffer_tensor_alloc(struct vbuffer * buf, struct ggml_tensor * tensor, struct buffer_address buf_addr) {
@@ -497,7 +490,7 @@ ggml_gallocr_t ggml_gallocr_new_n(ggml_backend_buffer_type_t * bufts, int n_bufs
497490

498491
for (int i = 0; i < n_bufs; i++) {
499492
galloc->bufts[i] = bufts[i];
500-
galloc->buffers[i] = ggml_vbuffer_new(bufts[i]);
493+
galloc->buffers[i] = NULL;
501494

502495
// check if the same buffer type is used multiple times and reuse the same allocator
503496
for (int j = 0; j < i; j++) {
@@ -862,17 +855,18 @@ bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, c
862855
}
863856
}
864857

865-
size_t cur_size = ggml_vbuffer_size(galloc->buffers[i]);
858+
size_t cur_size = galloc->buffers[i] ? ggml_vbuffer_size(galloc->buffers[i]) : 0;
866859
size_t new_size = ggml_dyn_tallocr_max_size(galloc->buf_tallocs[i]);
867860

868861
// even if there are no tensors allocated in this buffer, we still need to allocate it to initialize views
869-
if (new_size > cur_size || ggml_vbuffer_n_chunks(galloc->buffers[i]) == 0) {
862+
if (new_size > cur_size || galloc->buffers[i] == NULL) {
870863
#ifndef NDEBUG
871864
GGML_LOG_DEBUG("%s: reallocating %s buffer from size %.02f MiB to %.02f MiB\n", __func__, ggml_backend_buft_name(galloc->bufts[i]), cur_size / 1024.0 / 1024.0, new_size / 1024.0 / 1024.0);
872865
#endif
873866

874-
ggml_vbuffer_free_chunks(galloc->buffers[i]);
875-
if (!ggml_vbuffer_alloc(galloc->buffers[i], galloc->buf_tallocs[i], GGML_BACKEND_BUFFER_USAGE_COMPUTE)) {
867+
ggml_vbuffer_free(galloc->buffers[i]);
868+
galloc->buffers[i] = ggml_vbuffer_alloc(galloc->bufts[i], galloc->buf_tallocs[i], GGML_BACKEND_BUFFER_USAGE_COMPUTE);
869+
if (galloc->buffers[i] == NULL) {
876870
GGML_LOG_ERROR("%s: failed to allocate %s buffer of size %zu\n", __func__, ggml_backend_buft_name(galloc->bufts[i]), new_size);
877871
return false;
878872
}

0 commit comments

Comments
 (0)