Skip to content

Commit 5a916c7

Browse files
committed
refactor: move adding new free block and new chunk into separate functions
1 parent 29087f0 commit 5a916c7

File tree

1 file changed

+52
-41
lines changed

1 file changed

+52
-41
lines changed

ggml/src/ggml-alloc.c

Lines changed: 52 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,27 @@ static struct free_block_range ggml_dyn_tallocr_free_block_range(const struct gg
144144
return range;
145145
}
146146

147+
void ggml_dyn_tallocr_insert_block(struct ggml_dyn_tallocr * alloc, struct buffer_address addr, size_t size) {
148+
int total_blocks = alloc->free_blocks_begin[alloc->n_chunks];
149+
GGML_ASSERT(total_blocks < MAX_FREE_BLOCKS && "out of free blocks");
150+
// insert the new block in the correct position to keep the array sorted by address (to make merging blocks faster)
151+
int insert_pos = alloc->free_blocks_begin[addr.chunk];
152+
int blocks_end = alloc->free_blocks_begin[addr.chunk + 1];
153+
while (insert_pos < blocks_end && alloc->free_blocks[insert_pos].addr.offset < addr.offset) {
154+
insert_pos++;
155+
}
156+
// shift all blocks from insert_pos onward to make room for the new block
157+
for (int i = total_blocks; i > insert_pos; i--) {
158+
alloc->free_blocks[i] = alloc->free_blocks[i-1];
159+
}
160+
// insert the new block
161+
alloc->free_blocks[insert_pos].addr = addr;
162+
alloc->free_blocks[insert_pos].size = size;
163+
for (int c = addr.chunk + 1; c < alloc->n_chunks + 1; ++c) {
164+
alloc->free_blocks_begin[c]++;
165+
}
166+
}
167+
147168
void ggml_dyn_tallocr_remove_block(struct ggml_dyn_tallocr * alloc, int idx) {
148169
int chunk = alloc->free_blocks[idx].addr.chunk;
149170
// shift all elements after idx by 1 to the left, overwriting the element at idx
@@ -157,6 +178,27 @@ void ggml_dyn_tallocr_remove_block(struct ggml_dyn_tallocr * alloc, int idx) {
157178
}
158179
}
159180

181+
// add a new chunk by creating a block of unclaimed space after the last chunk
182+
int ggml_dyn_tallocr_new_chunk(struct ggml_dyn_tallocr * alloc, size_t min_size) {
183+
if (alloc->n_chunks >= GGML_VBUFFER_MAX_CHUNKS) {
184+
return -1;
185+
}
186+
int i = alloc->free_blocks_begin[alloc->n_chunks];
187+
alloc->free_blocks[i].addr.chunk = alloc->n_chunks;
188+
alloc->free_blocks[i].addr.offset = 0;
189+
// available space in a chunk is limited to max_chunk_size, but can be higher if:
190+
// 1. a single tensor exceeds the maximum, and cannot fit any other way
191+
// 2. we are running out of chunks
192+
// backends will either manage to allocate the larger size, or report an error.
193+
alloc->free_blocks[i].size = MAX(min_size, alloc->max_chunk_size);
194+
if (alloc->n_chunks == GGML_VBUFFER_MAX_CHUNKS - 1) {
195+
alloc->free_blocks[i].size = SIZE_MAX/2;
196+
}
197+
alloc->free_blocks_begin[alloc->n_chunks + 1] = i + 1;
198+
alloc->n_chunks++;
199+
return i;
200+
}
201+
160202
#ifdef GGML_ALLOCATOR_DEBUG
161203
static void add_allocated_tensor(struct ggml_dyn_tallocr * alloc, struct buffer_address addr, const struct ggml_tensor * tensor) {
162204
for (int i = 0; i < 1024; i++) {
@@ -187,7 +229,7 @@ static struct buffer_address ggml_dyn_tallocr_alloc(struct ggml_dyn_tallocr * al
187229
int best_fit_block = -1;
188230
size_t max_avail = 0;
189231

190-
// find the best fitting free block besides the last block
232+
// find the best fitting free block in any chunk besides the last block
191233
for (int c = 0; c < alloc->n_chunks; ++c) {
192234
struct free_block_range blocks = ggml_dyn_tallocr_free_block_range(alloc, c);
193235
size_t best_fit_size = SIZE_MAX;
@@ -202,7 +244,7 @@ static struct buffer_address ggml_dyn_tallocr_alloc(struct ggml_dyn_tallocr * al
202244
}
203245

204246
if (best_fit_block == -1) {
205-
// no suitable block found, try the last block (ie. growing a chunks size)
247+
// no suitable block found, try the last block (this will grow a chunks size)
206248
for (int c = 0; c < alloc->n_chunks; ++c) {
207249
struct free_block_range blocks = ggml_dyn_tallocr_free_block_range(alloc, c);
208250
if (blocks.size > 0) {
@@ -218,28 +260,13 @@ static struct buffer_address ggml_dyn_tallocr_alloc(struct ggml_dyn_tallocr * al
218260

219261
if (best_fit_block == -1) {
220262
// none of the existing chunks have enough space left
221-
if (alloc->n_chunks < GGML_VBUFFER_MAX_CHUNKS) {
222-
// add a new chunk by creating a block of unclaimed space after the last chunk
223-
int i = alloc->free_blocks_begin[alloc->n_chunks];
224-
alloc->free_blocks[i].addr.chunk = alloc->n_chunks;
225-
alloc->free_blocks[i].addr.offset = 0;
226-
// available space in a chunk is limited to max_chunk_size, but can be higher if:
227-
// 1. a single tensor exceeds the maximum, and cannot fit any other way
228-
// 2. we are running out of chunks
229-
// backends will either manage to allocate the larger size, or report an error.
230-
alloc->free_blocks[i].size = MAX(size, alloc->max_chunk_size);
231-
if (alloc->n_chunks == GGML_VBUFFER_MAX_CHUNKS - 1) {
232-
alloc->free_blocks[i].size = SIZE_MAX/2;
233-
}
234-
alloc->free_blocks_begin[alloc->n_chunks + 1] = i + 1;
235-
alloc->n_chunks++;
236-
best_fit_block = i;
237-
} else {
238-
// since the last chunk always has virtually endless memory, this should never happen
239-
GGML_LOG_ERROR("%s: not enough space in the buffer to allocate %zu bytes, largest block available %zu bytes\n",
240-
__func__, size, max_avail);
241-
GGML_ABORT("graph allocation: failed to reserve memory");
242-
}
263+
best_fit_block = ggml_dyn_tallocr_new_chunk(alloc, size);
264+
}
265+
if (best_fit_block == -1) {
266+
// since the last chunk always has virtually endless memory, this should never happen
267+
GGML_LOG_ERROR("%s: not enough space in the buffer to allocate %zu bytes, largest block available %zu bytes\n",
268+
__func__, size, max_avail);
269+
GGML_ABORT("graph allocation: failed to reserve memory");
243270
}
244271

245272
struct free_block * block = &alloc->free_blocks[best_fit_block];
@@ -336,23 +363,7 @@ static void ggml_dyn_tallocr_free_tensor(struct ggml_dyn_tallocr * alloc, struct
336363
}
337364
}
338365
// otherwise, add a new block
339-
int n_free_blocks = alloc->free_blocks_begin[alloc->n_chunks];
340-
GGML_ASSERT(n_free_blocks < MAX_FREE_BLOCKS && "out of free blocks");
341-
// insert the new block in the correct position to keep the array sorted by address (to make merging blocks faster)
342-
int insert_pos = blocks.begin;
343-
while (insert_pos < blocks.end && alloc->free_blocks[insert_pos].addr.offset < addr.offset) {
344-
insert_pos++;
345-
}
346-
// shift all blocks from insert_pos onward to make room for the new block
347-
for (int i = n_free_blocks; i > insert_pos; i--) {
348-
alloc->free_blocks[i] = alloc->free_blocks[i-1];
349-
}
350-
// insert the new block
351-
alloc->free_blocks[insert_pos].addr = addr;
352-
alloc->free_blocks[insert_pos].size = size;
353-
for (int c = addr.chunk + 1; c < alloc->n_chunks + 1; c++) {
354-
alloc->free_blocks_begin[c]++;
355-
}
366+
ggml_dyn_tallocr_insert_block(alloc, addr, size);
356367

357368
GGML_UNUSED(tensor);
358369
}

0 commit comments

Comments
 (0)