Skip to content

Commit 57381c5

Browse files
committed
continue to use leftover unallocated space of previous chunks after a new one has been created
1 parent 7b0d76b commit 57381c5

File tree

2 files changed

+26
-4
lines changed

2 files changed

+26
-4
lines changed

ggml/src/ggml-alloc.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -185,11 +185,13 @@ static struct buffer_address ggml_dyn_tallocr_alloc(struct ggml_dyn_tallocr * al
185185
// the last block represents memory still available in an existing chunk
186186
struct free_block * block = &alloc->free_blocks[alloc->n_free_blocks - 1];
187187
max_avail = MAX(max_avail, block->size);
188-
best_fit_block = alloc->n_free_blocks - 1;
189188
if (block->size < size) {
190189
// not enough space in existing chunk, start the next one
191-
ggml_dyn_tallocr_new_chunk(alloc, &alloc->free_blocks[best_fit_block], size);
190+
GGML_ASSERT(alloc->n_free_blocks < MAX_FREE_BLOCKS && "out of free blocks");
191+
ggml_dyn_tallocr_new_chunk(alloc, &alloc->free_blocks[alloc->n_free_blocks], size);
192+
alloc->n_free_blocks++;
192193
}
194+
best_fit_block = alloc->n_free_blocks - 1;
193195
}
194196

195197
struct free_block * block = &alloc->free_blocks[best_fit_block];

tests/test-alloc.cpp

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -319,6 +319,25 @@ static void test_tensor_larger_than_max_size() {
319319
GGML_ASSERT(backend.context->allocated_total() == 24);
320320
}
321321

322+
// Fill up leftover unallocated space of a chunk after allocating a large tensor that
323+
// requires a new chunk.
324+
static void test_fill_leftover_space() {
325+
dummy_backend backend = dummy_backend_init(16);
326+
auto [ctx, graph, ctx_ptr] = make_context();
327+
328+
ggml_tensor * x[4];
329+
x[0] = make_input_with_size(ctx, 8);
330+
x[1] = ggml_pad(ctx, x[0], 2, 0, 0, 0);
331+
x[3] = ggml_mean(ctx, x[1]);
332+
assign_names(ctx);
333+
334+
ggml_gallocr_ptr galloc = allocate_graph(graph, x[3], &backend.buffer_type);
335+
check_all_allocated(graph);
336+
check_no_overlap(graph);
337+
check_max_size(ctx);
338+
GGML_ASSERT(backend.context->allocated_total() <= 12 + 16);
339+
}
340+
322341
// Check that views don't require any extra memory
323342
static void test_view_inplace() {
324343
dummy_backend backend = dummy_backend_init(32);
@@ -473,8 +492,8 @@ static void test_buffer_size_zero() {
473492

474493
ggml_backend_buffer_type_t bufts[2] = { &backend_a.buffer_type, &backend_b.buffer_type };
475494
ggml_gallocr_ptr galloc = ggml_gallocr_ptr(ggml_gallocr_new_n(bufts, 2));
476-
bool res1 = ggml_gallocr_reserve_n(galloc.get(), graph, node_buffer_ids, leaf_buffer_ids);
477-
bool res2 = ggml_gallocr_alloc_graph(galloc.get(), graph);
495+
bool res1 = ggml_gallocr_reserve_n(galloc.get(), graph, node_buffer_ids, leaf_buffer_ids);
496+
bool res2 = ggml_gallocr_alloc_graph(galloc.get(), graph);
478497
GGML_ASSERT(res1 && res2);
479498

480499
check_all_allocated(graph);
@@ -493,6 +512,7 @@ int main() {
493512
run("test_max_size_too_many_tensors", test_max_size_too_many_tensors);
494513
run("test_max_size_tensor_too_large", test_max_size_tensor_too_large);
495514
run("test_tensor_larger_than_max_size", test_tensor_larger_than_max_size);
515+
run("test_fill_leftover_space", test_fill_leftover_space);
496516
run("test_view_inplace", test_view_inplace);
497517
run("test_reuse_and_free", test_reuse_and_free);
498518
run("test_merge_free_block(32)", []() { test_merge_free_block(32); });

0 commit comments

Comments
 (0)