Skip to content

Commit 69964e0

Browse files
committed
allocate chunks individually with a separate free-blocks list for each one
* needs a bit more memory/allocations/indirections, but code is simpler
1 parent 5a916c7 commit 69964e0

File tree

1 file changed

+73
-88
lines changed

1 file changed

+73
-88
lines changed

ggml/src/ggml-alloc.c

Lines changed: 73 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -110,17 +110,21 @@ static bool ggml_buffer_address_less(struct buffer_address a, struct buffer_addr
110110
}
111111

112112
struct free_block {
113-
struct buffer_address addr;
113+
size_t offset;
114114
size_t size;
115115
};
116116

117+
struct tallocr_chunk {
118+
struct free_block free_blocks[MAX_FREE_BLOCKS];
119+
int n_free_blocks;
120+
size_t max_size;
121+
};
122+
117123
struct ggml_dyn_tallocr {
118124
size_t alignment;
119-
int n_chunks;
120-
int free_blocks_begin[GGML_VBUFFER_MAX_CHUNKS + 1]; // end[chunk] == begin[chunk+1]
121-
struct free_block free_blocks[MAX_FREE_BLOCKS];
122-
size_t max_size[GGML_VBUFFER_MAX_CHUNKS];
123125
size_t max_chunk_size;
126+
struct tallocr_chunk * chunks[GGML_VBUFFER_MAX_CHUNKS];
127+
int n_chunks;
124128

125129
#ifdef GGML_ALLOCATOR_DEBUG
126130
struct {
@@ -130,73 +134,49 @@ struct ggml_dyn_tallocr {
130134
#endif
131135
};
132136

133-
struct free_block_range {
134-
int begin;
135-
int end;
136-
int size;
137-
};
138-
139-
static struct free_block_range ggml_dyn_tallocr_free_block_range(const struct ggml_dyn_tallocr * alloc, int chunk) {
140-
struct free_block_range range;
141-
range.begin = alloc->free_blocks_begin[chunk];
142-
range.end = alloc->free_blocks_begin[chunk + 1];
143-
range.size = range.end - range.begin;
144-
return range;
145-
}
146-
147-
void ggml_dyn_tallocr_insert_block(struct ggml_dyn_tallocr * alloc, struct buffer_address addr, size_t size) {
148-
int total_blocks = alloc->free_blocks_begin[alloc->n_chunks];
149-
GGML_ASSERT(total_blocks < MAX_FREE_BLOCKS && "out of free blocks");
137+
void ggml_dyn_tallocr_insert_block(struct tallocr_chunk * chunk, size_t offset, size_t size) {
138+
GGML_ASSERT(chunk->n_free_blocks < MAX_FREE_BLOCKS && "out of free blocks");
150139
// insert the new block in the correct position to keep the array sorted by address (to make merging blocks faster)
151-
int insert_pos = alloc->free_blocks_begin[addr.chunk];
152-
int blocks_end = alloc->free_blocks_begin[addr.chunk + 1];
153-
while (insert_pos < blocks_end && alloc->free_blocks[insert_pos].addr.offset < addr.offset) {
140+
int insert_pos = 0;
141+
while (insert_pos < chunk->n_free_blocks && chunk->free_blocks[insert_pos].offset < offset) {
154142
insert_pos++;
155143
}
156144
// shift all blocks from insert_pos onward to make room for the new block
157-
for (int i = total_blocks; i > insert_pos; i--) {
158-
alloc->free_blocks[i] = alloc->free_blocks[i-1];
145+
for (int i = chunk->n_free_blocks; i > insert_pos; i--) {
146+
chunk->free_blocks[i] = chunk->free_blocks[i-1];
159147
}
160148
// insert the new block
161-
alloc->free_blocks[insert_pos].addr = addr;
162-
alloc->free_blocks[insert_pos].size = size;
163-
for (int c = addr.chunk + 1; c < alloc->n_chunks + 1; ++c) {
164-
alloc->free_blocks_begin[c]++;
165-
}
149+
chunk->free_blocks[insert_pos].offset = offset;
150+
chunk->free_blocks[insert_pos].size = size;
151+
chunk->n_free_blocks++;
166152
}
167153

168-
void ggml_dyn_tallocr_remove_block(struct ggml_dyn_tallocr * alloc, int idx) {
169-
int chunk = alloc->free_blocks[idx].addr.chunk;
154+
void ggml_dyn_tallocr_remove_block(struct tallocr_chunk * chunk, int idx) {
170155
// shift all elements after idx by 1 to the left, overwriting the element at idx
171-
int n_free_blocks = alloc->free_blocks_begin[alloc->n_chunks];
172-
for (int i = idx; i < n_free_blocks; i++) {
173-
alloc->free_blocks[i] = alloc->free_blocks[i + 1];
174-
}
175-
// adjust first element index of all chunks after the current one
176-
for (int c = chunk + 1; c < alloc->n_chunks + 1; c++) {
177-
alloc->free_blocks_begin[c]--;
156+
for (int i = idx; i < chunk->n_free_blocks; i++) {
157+
chunk->free_blocks[i] = chunk->free_blocks[i+1];
178158
}
159+
chunk->n_free_blocks--;
179160
}
180161

181-
// add a new chunk by creating a block of unclaimed space after the last chunk
182162
int ggml_dyn_tallocr_new_chunk(struct ggml_dyn_tallocr * alloc, size_t min_size) {
183163
if (alloc->n_chunks >= GGML_VBUFFER_MAX_CHUNKS) {
184164
return -1;
185165
}
186-
int i = alloc->free_blocks_begin[alloc->n_chunks];
187-
alloc->free_blocks[i].addr.chunk = alloc->n_chunks;
188-
alloc->free_blocks[i].addr.offset = 0;
166+
struct tallocr_chunk * chunk = calloc(1, sizeof(struct tallocr_chunk));
167+
chunk->n_free_blocks = 1;
168+
chunk->free_blocks[0].offset = 0;
189169
// available space in a chunk is limited to max_chunk_size, but can be higher if:
190170
// 1. a single tensor exceeds the maximum, and cannot fit any other way
191171
// 2. we are running out of chunks
192172
// backends will either manage to allocate the larger size, or report an error.
193-
alloc->free_blocks[i].size = MAX(min_size, alloc->max_chunk_size);
173+
chunk->free_blocks[0].size = MAX(min_size, alloc->max_chunk_size);
194174
if (alloc->n_chunks == GGML_VBUFFER_MAX_CHUNKS - 1) {
195-
alloc->free_blocks[i].size = SIZE_MAX/2;
175+
chunk->free_blocks[0].size = SIZE_MAX/2;
196176
}
197-
alloc->free_blocks_begin[alloc->n_chunks + 1] = i + 1;
177+
alloc->chunks[alloc->n_chunks] = chunk;
198178
alloc->n_chunks++;
199-
return i;
179+
return alloc->n_chunks - 1;
200180
}
201181

202182
#ifdef GGML_ALLOCATOR_DEBUG
@@ -226,17 +206,19 @@ static struct buffer_address ggml_dyn_tallocr_alloc(struct ggml_dyn_tallocr * al
226206

227207
AT_PRINTF("%s: allocating %s (%zu bytes) - ", __func__, tensor->name, size);
228208

209+
int best_fit_chunk = -1;
229210
int best_fit_block = -1;
230211
size_t max_avail = 0;
231212

232213
// find the best fitting free block in any chunk besides the last block
233214
for (int c = 0; c < alloc->n_chunks; ++c) {
234-
struct free_block_range blocks = ggml_dyn_tallocr_free_block_range(alloc, c);
215+
struct tallocr_chunk * chunk = alloc->chunks[c];
235216
size_t best_fit_size = SIZE_MAX;
236-
for (int i = blocks.begin; i < blocks.end - 1; i++) {
237-
struct free_block * block = &alloc->free_blocks[i];
217+
for (int i = 0; i < chunk->n_free_blocks - 1; i++) {
218+
struct free_block * block = &chunk->free_blocks[i];
238219
max_avail = MAX(max_avail, block->size);
239220
if (block->size >= size && block->size <= best_fit_size) {
221+
best_fit_chunk = c;
240222
best_fit_block = i;
241223
best_fit_size = block->size;
242224
}
@@ -246,12 +228,13 @@ static struct buffer_address ggml_dyn_tallocr_alloc(struct ggml_dyn_tallocr * al
246228
if (best_fit_block == -1) {
247229
// no suitable block found, try the last block (this will grow a chunks size)
248230
for (int c = 0; c < alloc->n_chunks; ++c) {
249-
struct free_block_range blocks = ggml_dyn_tallocr_free_block_range(alloc, c);
250-
if (blocks.size > 0) {
251-
struct free_block * block = &alloc->free_blocks[blocks.end - 1];
231+
struct tallocr_chunk * chunk = alloc->chunks[c];
232+
if (chunk->n_free_blocks > 0) {
233+
struct free_block * block = &chunk->free_blocks[chunk->n_free_blocks - 1];
252234
max_avail = MAX(max_avail, block->size);
253235
if (block->size >= size) {
254-
best_fit_block = blocks.end - 1;
236+
best_fit_chunk = c;
237+
best_fit_block = chunk->n_free_blocks - 1;
255238
break;
256239
}
257240
}
@@ -260,7 +243,8 @@ static struct buffer_address ggml_dyn_tallocr_alloc(struct ggml_dyn_tallocr * al
260243

261244
if (best_fit_block == -1) {
262245
// none of the existing chunks have enough space left
263-
best_fit_block = ggml_dyn_tallocr_new_chunk(alloc, size);
246+
best_fit_chunk = ggml_dyn_tallocr_new_chunk(alloc, size);
247+
best_fit_block = 0;
264248
}
265249
if (best_fit_block == -1) {
266250
// since the last chunk always has virtually endless memory, this should never happen
@@ -269,13 +253,14 @@ static struct buffer_address ggml_dyn_tallocr_alloc(struct ggml_dyn_tallocr * al
269253
GGML_ABORT("graph allocation: failed to reserve memory");
270254
}
271255

272-
struct free_block * block = &alloc->free_blocks[best_fit_block];
273-
struct buffer_address addr = block->addr;
274-
block->addr.offset += size;
256+
struct tallocr_chunk * chunk = alloc->chunks[best_fit_chunk];
257+
struct free_block * block = &chunk->free_blocks[best_fit_block];
258+
struct buffer_address addr = {.chunk = best_fit_chunk, .offset = block->offset };
259+
block->offset += size;
275260
block->size -= size;
276261
if (block->size == 0) {
277262
// remove block if empty
278-
ggml_dyn_tallocr_remove_block(alloc, best_fit_block);
263+
ggml_dyn_tallocr_remove_block(chunk, best_fit_block);
279264
}
280265

281266
AT_PRINTF("block %d, offset %zu, chunk %d\n", best_fit_block, addr.offset, addr.chunk);
@@ -311,7 +296,7 @@ static struct buffer_address ggml_dyn_tallocr_alloc(struct ggml_dyn_tallocr * al
311296
}
312297
#endif
313298

314-
alloc->max_size[addr.chunk] = MAX(alloc->max_size[addr.chunk], addr.offset + size);
299+
chunk->max_size = MAX(chunk->max_size, addr.offset + size);
315300

316301
return addr;
317302

@@ -329,51 +314,50 @@ static void ggml_dyn_tallocr_free_tensor(struct ggml_dyn_tallocr * alloc, struct
329314
remove_allocated_tensor(alloc, addr, tensor);
330315
#endif
331316

332-
struct free_block_range blocks = ggml_dyn_tallocr_free_block_range(alloc, addr.chunk);
317+
struct tallocr_chunk * chunk = alloc->chunks[addr.chunk];
333318

334319
// see if we can merge with an existing block
335-
for (int i = blocks.begin; i < blocks.end; i++) {
336-
struct free_block * block = &alloc->free_blocks[i];
320+
for (int i = 0; i < chunk->n_free_blocks; i++) {
321+
struct free_block * block = &chunk->free_blocks[i];
337322
// check if ptr is at the end of the block
338-
if (block->addr.offset + block->size == addr.offset) {
323+
if (block->offset + block->size == addr.offset) {
339324
block->size += size;
340325
// check if we can merge with the next block
341-
if (i < blocks.end - 1) {
342-
struct free_block * next = &alloc->free_blocks[i+1];
343-
if (block->addr.offset + block->size == next->addr.offset) {
326+
if (i < chunk->n_free_blocks - 1) {
327+
struct free_block * next = &chunk->free_blocks[i+1];
328+
if (block->offset + block->size == next->offset) {
344329
block->size += next->size;
345-
ggml_dyn_tallocr_remove_block(alloc, i+1);
330+
ggml_dyn_tallocr_remove_block(chunk, i+1);
346331
}
347332
}
348333
return;
349334
}
350335
// check if ptr is at the beginning of the block
351-
if (addr.offset + size == block->addr.offset) {
352-
block->addr.offset = addr.offset;
336+
if (addr.offset + size == block->offset) {
337+
block->offset = addr.offset;
353338
block->size += size;
354339
// check if we can merge with the previous block
355-
if (i > blocks.begin) {
356-
struct free_block * prev = &alloc->free_blocks[i-1];
357-
if (prev->addr.offset + prev->size == block->addr.offset) {
340+
if (i > 0) {
341+
struct free_block * prev = &chunk->free_blocks[i-1];
342+
if (prev->offset + prev->size == block->offset) {
358343
prev->size += block->size;
359-
ggml_dyn_tallocr_remove_block(alloc, i);
344+
ggml_dyn_tallocr_remove_block(chunk, i);
360345
}
361346
}
362347
return;
363348
}
364349
}
365350
// otherwise, add a new block
366-
ggml_dyn_tallocr_insert_block(alloc, addr, size);
351+
ggml_dyn_tallocr_insert_block(chunk, addr.offset, size);
367352

368353
GGML_UNUSED(tensor);
369354
}
370355

371356
static void ggml_dyn_tallocr_reset(struct ggml_dyn_tallocr * alloc) {
372357
for (int i = 0; i < GGML_VBUFFER_MAX_CHUNKS; i++) {
373-
alloc->free_blocks_begin[i] = 0;
374-
alloc->max_size[i] = 0;
358+
free(alloc->chunks[i]);
359+
alloc->chunks[i] = NULL;
375360
}
376-
alloc->free_blocks_begin[GGML_VBUFFER_MAX_CHUNKS] = 0;
377361
alloc->n_chunks = 0;
378362

379363
#ifdef GGML_ALLOCATOR_DEBUG
@@ -387,12 +371,10 @@ static struct ggml_dyn_tallocr * ggml_dyn_tallocr_new(size_t alignment, size_t m
387371
struct ggml_dyn_tallocr * alloc = (struct ggml_dyn_tallocr *)malloc(sizeof(struct ggml_dyn_tallocr));
388372

389373
*alloc = (struct ggml_dyn_tallocr) {
390-
/*.alignment = */ alignment,
391-
/*.n_chunks = */ 0,
392-
/*.free_blocks_begin = */ {0},
393-
/*.free_blocks = */ {{{0}, 0}},
394-
/*.max_size = */ {0},
395-
/*.max_chunk_size = */ MIN(max_buffer_size, SIZE_MAX/2), // clamp to avoid overflows
374+
/*.alignment = */ alignment,
375+
/*.max_chunk_size = */ MIN(max_buffer_size, SIZE_MAX/2), // clamp to avoid overflows
376+
/*.chunks = */ {NULL},
377+
/*.n_chunks = */ 0,
396378
#ifdef GGML_ALLOCATOR_DEBUG
397379
/*.allocated_tensors = */ {{0}},
398380
#endif
@@ -404,13 +386,16 @@ static struct ggml_dyn_tallocr * ggml_dyn_tallocr_new(size_t alignment, size_t m
404386
}
405387

406388
static void ggml_dyn_tallocr_free(struct ggml_dyn_tallocr * alloc) {
389+
for (int i = 0; i < alloc->n_chunks; ++i) {
390+
free(alloc->chunks[i]);
391+
}
407392
free(alloc);
408393
}
409394

410395
static size_t ggml_dyn_tallocr_max_size(struct ggml_dyn_tallocr * alloc) {
411396
size_t max_size = 0;
412397
for (int i = 0; i < alloc->n_chunks; i++) {
413-
max_size += alloc->max_size[i];
398+
max_size += alloc->chunks[i]->max_size;
414399
}
415400
return max_size;
416401
}
@@ -453,7 +438,7 @@ static struct vbuffer * ggml_vbuffer_alloc(ggml_backend_buffer_type_t buft, cons
453438
}
454439

455440
for (int n = 0; n < talloc->n_chunks; n++) {
456-
size_t chunk_size = talloc->max_size[n];
441+
size_t chunk_size = talloc->chunks[n]->max_size;
457442
buf->chunks[n] = ggml_backend_buft_alloc_buffer(buft, chunk_size);
458443
if (buf->chunks[n] == NULL) {
459444
ggml_vbuffer_free(buf);

0 commit comments

Comments
 (0)