Skip to content

Commit 0b3ed04

Browse files
Aclyfirecoperana
authored andcommitted
ggml : fix graph reallocation with multiple chunks (#16396)
reallocation is needed if a single chunk grows in size, even if total allocation size stays the same or is lower # Conflicts: # ggml/src/ggml-alloc.c # tests/test-alloc.cpp
1 parent 2bc71fe commit 0b3ed04

File tree

1 file changed

+16
-14
lines changed

1 file changed

+16
-14
lines changed

ggml/src/ggml-alloc.c

Lines changed: 16 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -396,12 +396,8 @@ static void ggml_dyn_tallocr_free(struct ggml_dyn_tallocr * alloc) {
396396
free(alloc);
397397
}
398398

399-
static size_t ggml_dyn_tallocr_max_size(struct ggml_dyn_tallocr * alloc) {
400-
size_t max_size = 0;
401-
for (int i = 0; i < alloc->n_chunks; i++) {
402-
max_size += alloc->chunks[i]->max_size;
403-
}
404-
return max_size;
399+
static size_t ggml_dyn_tallocr_max_size(struct ggml_dyn_tallocr * alloc, int chunk) {
400+
return chunk < alloc->n_chunks ? alloc->chunks[chunk]->max_size : 0;
405401
}
406402

407403

@@ -421,10 +417,8 @@ static void ggml_vbuffer_free(struct vbuffer * buf) {
421417
free(buf);
422418
}
423419

424-
static int ggml_vbuffer_n_chunks(struct vbuffer * buf) {
425-
int n = 0;
426-
while (n < GGML_VBUFFER_MAX_CHUNKS && buf->chunks[n]) n++;
427-
return n;
420+
static size_t ggml_vbuffer_chunk_size(struct vbuffer * buf, int chunk) {
421+
return buf->chunks[chunk] ? ggml_backend_buffer_get_size(buf->chunks[chunk]) : 0;
428422
}
429423

430424
static size_t ggml_vbuffer_size(struct vbuffer * buf) {
@@ -898,12 +892,20 @@ bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, c
898892
}
899893
}
900894

901-
size_t cur_size = galloc->buffers[i] ? ggml_vbuffer_size(galloc->buffers[i]) : 0;
902-
size_t new_size = ggml_dyn_tallocr_max_size(galloc->buf_tallocs[i]);
903-
904895
// even if there are no tensors allocated in this buffer, we still need to allocate it to initialize views
905-
if (new_size > cur_size || galloc->buffers[i] == NULL) {
896+
bool realloc = galloc->buffers[i] == NULL;
897+
size_t new_size = 0;
898+
for (int c = 0; c < galloc->buf_tallocs[i]->n_chunks; c++) {
899+
size_t cur_chunk_size = galloc->buffers[i] ? ggml_vbuffer_chunk_size(galloc->buffers[i], c) : 0;
900+
size_t new_chunk_size = ggml_dyn_tallocr_max_size(galloc->buf_tallocs[i], c);
901+
new_size += new_chunk_size;
902+
if (new_chunk_size > cur_chunk_size) {
903+
realloc = true;
904+
}
905+
}
906+
if (realloc) {
906907
#ifndef NDEBUG
908+
size_t cur_size = galloc->buffers[i] ? ggml_vbuffer_size(galloc->buffers[i]) : 0;
907909
fprintf(stderr, "%s: reallocating %s buffer from size %.02f MiB to %.02f MiB\n", __func__, ggml_backend_buft_name(galloc->bufts[i]), cur_size / 1024.0 / 1024.0, new_size / 1024.0 / 1024.0);
908910
#endif
909911

0 commit comments

Comments
 (0)