@@ -116,8 +116,8 @@ struct free_block {
116116
117117struct ggml_dyn_tallocr {
118118 size_t alignment ;
119- int n_free_blocks ;
120119 int n_chunks ;
120+ int free_blocks_begin [GGML_VBUFFER_MAX_CHUNKS + 1 ]; // end[chunk] == begin[chunk+1]
121121 struct free_block free_blocks [MAX_FREE_BLOCKS ];
122122 size_t max_size [GGML_VBUFFER_MAX_CHUNKS ];
123123 size_t max_chunk_size ;
@@ -130,14 +130,31 @@ struct ggml_dyn_tallocr {
130130#endif
131131};
132132
133- // allocations are split into n chunks of size max_size[i]. tensor allocations may not cross chunk boundaries.
134- static void ggml_dyn_tallocr_new_chunk (struct ggml_dyn_tallocr * alloc , struct free_block * block , size_t min_size ) {
135- GGML_ASSERT (alloc -> n_chunks >= 1 );
136- block -> addr .chunk = alloc -> n_chunks ;
137- block -> addr .offset = 0 ;
138- block -> size = MAX (min_size , alloc -> max_chunk_size );
139- alloc -> n_chunks ++ ;
140- GGML_ASSERT (alloc -> n_chunks <= GGML_VBUFFER_MAX_CHUNKS );
133+ struct free_block_range {
134+ int begin ;
135+ int end ;
136+ int size ;
137+ };
138+
139+ static struct free_block_range ggml_dyn_tallocr_free_block_range (const struct ggml_dyn_tallocr * alloc , int chunk ) {
140+ struct free_block_range range ;
141+ range .begin = alloc -> free_blocks_begin [chunk ];
142+ range .end = alloc -> free_blocks_begin [chunk + 1 ];
143+ range .size = range .end - range .begin ;
144+ return range ;
145+ }
146+
147+ void ggml_dyn_tallocr_remove_block (struct ggml_dyn_tallocr * alloc , int idx ) {
148+ int chunk = alloc -> free_blocks [idx ].addr .chunk ;
149+ // shift all elements after idx by 1 to the left, overwriting the element at idx
150+ int n_free_blocks = alloc -> free_blocks_begin [alloc -> n_chunks ];
151+ for (int i = idx ; i < n_free_blocks ; i ++ ) {
152+ alloc -> free_blocks [i ] = alloc -> free_blocks [i + 1 ];
153+ }
154+ // adjust first element index of all chunks after the current one
155+ for (int c = chunk + 1 ; c < alloc -> n_chunks + 1 ; c ++ ) {
156+ alloc -> free_blocks_begin [c ]-- ;
157+ }
141158}
142159
143160#ifdef GGML_ALLOCATOR_DEBUG
@@ -167,31 +184,62 @@ static struct buffer_address ggml_dyn_tallocr_alloc(struct ggml_dyn_tallocr * al
167184
168185 AT_PRINTF ("%s: allocating %s (%zu bytes) - " , __func__ , tensor -> name , size );
169186
187+ int best_fit_block = -1 ;
170188 size_t max_avail = 0 ;
171189
172190 // find the best fitting free block besides the last block
173- int best_fit_block = -1 ;
174- size_t best_fit_size = SIZE_MAX ;
175- for (int i = 0 ; i < alloc -> n_free_blocks - 1 ; i ++ ) {
176- struct free_block * block = & alloc -> free_blocks [i ];
177- max_avail = MAX (max_avail , block -> size );
178- if (block -> size >= size && block -> size <= best_fit_size ) {
179- best_fit_block = i ;
180- best_fit_size = block -> size ;
191+ for (int c = 0 ; c < alloc -> n_chunks ; ++ c ) {
192+ struct free_block_range blocks = ggml_dyn_tallocr_free_block_range (alloc , c );
193+ size_t best_fit_size = SIZE_MAX ;
194+ for (int i = blocks .begin ; i < blocks .end - 1 ; i ++ ) {
195+ struct free_block * block = & alloc -> free_blocks [i ];
196+ max_avail = MAX (max_avail , block -> size );
197+ if (block -> size >= size && block -> size <= best_fit_size ) {
198+ best_fit_block = i ;
199+ best_fit_size = block -> size ;
200+ }
201+ }
202+ }
203+
204+ if (best_fit_block == -1 ) {
205+ // no suitable block found, try the last block (ie. growing a chunks size)
206+ for (int c = 0 ; c < alloc -> n_chunks ; ++ c ) {
207+ struct free_block_range blocks = ggml_dyn_tallocr_free_block_range (alloc , c );
208+ if (blocks .size > 0 ) {
209+ struct free_block * block = & alloc -> free_blocks [blocks .end - 1 ];
210+ max_avail = MAX (max_avail , block -> size );
211+ if (block -> size >= size ) {
212+ best_fit_block = blocks .end - 1 ;
213+ break ;
214+ }
215+ }
181216 }
182217 }
183218
184219 if (best_fit_block == -1 ) {
185- // the last block represents memory still available in an existing chunk
186- struct free_block * block = & alloc -> free_blocks [alloc -> n_free_blocks - 1 ];
187- max_avail = MAX (max_avail , block -> size );
188- if (block -> size < size ) {
189- // not enough space in existing chunk, start the next one
190- GGML_ASSERT (alloc -> n_free_blocks < MAX_FREE_BLOCKS && "out of free blocks" );
191- ggml_dyn_tallocr_new_chunk (alloc , & alloc -> free_blocks [alloc -> n_free_blocks ], size );
192- alloc -> n_free_blocks ++ ;
220+ // none of the existing chunks have enough space left
221+ if (alloc -> n_chunks < GGML_VBUFFER_MAX_CHUNKS ) {
222+ // add a new chunk by creating a block of unclaimed space after the last chunk
223+ int i = alloc -> free_blocks_begin [alloc -> n_chunks ];
224+ alloc -> free_blocks [i ].addr .chunk = alloc -> n_chunks ;
225+ alloc -> free_blocks [i ].addr .offset = 0 ;
226+ // available space in a chunk is limited to max_chunk_size, but can be higher if:
227+ // 1. a single tensor exceeds the maximum, and cannot fit any other way
228+ // 2. we are running out of chunks
229+ // backends will either manage to allocate the larger size, or report an error.
230+ alloc -> free_blocks [i ].size = MAX (size , alloc -> max_chunk_size );
231+ if (alloc -> n_chunks == GGML_VBUFFER_MAX_CHUNKS - 1 ) {
232+ alloc -> free_blocks [i ].size = SIZE_MAX /2 ;
233+ }
234+ alloc -> free_blocks_begin [alloc -> n_chunks + 1 ] = i + 1 ;
235+ alloc -> n_chunks ++ ;
236+ best_fit_block = i ;
237+ } else {
238+ // since the last chunk always has virtually endless memory, this should never happen
239+ GGML_LOG_ERROR ("%s: not enough space in the buffer to allocate %zu bytes, largest block available %zu bytes\n" ,
240+ __func__ , size , max_avail );
241+ GGML_ABORT ("graph allocation: failed to reserve memory" );
193242 }
194- best_fit_block = alloc -> n_free_blocks - 1 ;
195243 }
196244
197245 struct free_block * block = & alloc -> free_blocks [best_fit_block ];
@@ -200,15 +248,7 @@ static struct buffer_address ggml_dyn_tallocr_alloc(struct ggml_dyn_tallocr * al
200248 block -> size -= size ;
201249 if (block -> size == 0 ) {
202250 // remove block if empty
203- alloc -> n_free_blocks -- ;
204- for (int j = best_fit_block ; j < alloc -> n_free_blocks ; j ++ ) {
205- alloc -> free_blocks [j ] = alloc -> free_blocks [j + 1 ];
206- }
207- // if there are no remaining blocks all memory in current chunk was used up -> start the next one
208- if (alloc -> n_free_blocks == 0 ) {
209- alloc -> n_free_blocks = 1 ;
210- ggml_dyn_tallocr_new_chunk (alloc , & alloc -> free_blocks [0 ], 0 );
211- }
251+ ggml_dyn_tallocr_remove_block (alloc , best_fit_block );
212252 }
213253
214254 AT_PRINTF ("block %d, offset %zu, chunk %d\n" , best_fit_block , addr .offset , addr .chunk );
@@ -255,31 +295,27 @@ static struct buffer_address ggml_dyn_tallocr_alloc(struct ggml_dyn_tallocr * al
255295static void ggml_dyn_tallocr_free_tensor (struct ggml_dyn_tallocr * alloc , struct buffer_address addr , size_t size , const struct ggml_tensor * tensor ) {
256296 size = aligned_offset (NULL , size , alloc -> alignment );
257297
258- AT_PRINTF ("%s: freeing %s at {chunk=%d, offset=%zu} (%zu bytes) - n_free_blocks = %d\n" , __func__ , tensor -> name , addr .chunk , addr .offset , size , alloc -> n_free_blocks );
298+ AT_PRINTF ("%s: freeing %s at {chunk=%d, offset=%zu} (%zu bytes) - n_free_blocks = %d\n" ,
299+ __func__ , tensor -> name , addr .chunk , addr .offset , size , alloc -> free_blocks_begin [alloc -> n_chunks ]);
259300
260301#ifdef GGML_ALLOCATOR_DEBUG
261302 remove_allocated_tensor (alloc , addr , tensor );
262303#endif
263304
305+ struct free_block_range blocks = ggml_dyn_tallocr_free_block_range (alloc , addr .chunk );
306+
264307 // see if we can merge with an existing block
265- for (int i = 0 ; i < alloc -> n_free_blocks ; i ++ ) {
308+ for (int i = blocks . begin ; i < blocks . end ; i ++ ) {
266309 struct free_block * block = & alloc -> free_blocks [i ];
267- // can only merge with blocks within the same chunk
268- if (addr .chunk != block -> addr .chunk ) {
269- continue ;
270- }
271310 // check if ptr is at the end of the block
272311 if (block -> addr .offset + block -> size == addr .offset ) {
273312 block -> size += size ;
274- // check if we can merge with the next block (within the same chunk)
275- if (i < alloc -> n_free_blocks - 1 ) {
313+ // check if we can merge with the next block
314+ if (i < blocks . end - 1 ) {
276315 struct free_block * next = & alloc -> free_blocks [i + 1 ];
277- if (block -> addr .offset + block -> size == next -> addr .offset && block -> addr . chunk == next -> addr . chunk ) {
316+ if (block -> addr .offset + block -> size == next -> addr .offset ) {
278317 block -> size += next -> size ;
279- alloc -> n_free_blocks -- ;
280- for (int j = i + 1 ; j < alloc -> n_free_blocks ; j ++ ) {
281- alloc -> free_blocks [j ] = alloc -> free_blocks [j + 1 ];
282- }
318+ ggml_dyn_tallocr_remove_block (alloc , i + 1 );
283319 }
284320 }
285321 return ;
@@ -288,50 +324,46 @@ static void ggml_dyn_tallocr_free_tensor(struct ggml_dyn_tallocr * alloc, struct
288324 if (addr .offset + size == block -> addr .offset ) {
289325 block -> addr .offset = addr .offset ;
290326 block -> size += size ;
291- // check if we can merge with the previous block (within the same chunk)
292- if (i > 0 ) {
327+ // check if we can merge with the previous block
328+ if (i > blocks . begin ) {
293329 struct free_block * prev = & alloc -> free_blocks [i - 1 ];
294- if (prev -> addr .offset + prev -> size == block -> addr .offset && prev -> addr . chunk == block -> addr . chunk ) {
330+ if (prev -> addr .offset + prev -> size == block -> addr .offset ) {
295331 prev -> size += block -> size ;
296- alloc -> n_free_blocks -- ;
297- for (int j = i ; j < alloc -> n_free_blocks ; j ++ ) {
298- alloc -> free_blocks [j ] = alloc -> free_blocks [j + 1 ];
299- }
332+ ggml_dyn_tallocr_remove_block (alloc , i );
300333 }
301334 }
302335 return ;
303336 }
304337 }
305338 // otherwise, add a new block
306- GGML_ASSERT (alloc -> n_free_blocks < MAX_FREE_BLOCKS && "out of free blocks" );
339+ int n_free_blocks = alloc -> free_blocks_begin [alloc -> n_chunks ];
340+ GGML_ASSERT (n_free_blocks < MAX_FREE_BLOCKS && "out of free blocks" );
307341 // insert the new block in the correct position to keep the array sorted by address (to make merging blocks faster)
308- int insert_pos = 0 ;
309- while (insert_pos < alloc -> n_free_blocks && ggml_buffer_address_less ( alloc -> free_blocks [insert_pos ].addr , addr ) ) {
342+ int insert_pos = blocks . begin ;
343+ while (insert_pos < blocks . end && alloc -> free_blocks [insert_pos ].addr . offset < addr . offset ) {
310344 insert_pos ++ ;
311345 }
312346 // shift all blocks from insert_pos onward to make room for the new block
313- for (int i = alloc -> n_free_blocks ; i > insert_pos ; i -- ) {
347+ for (int i = n_free_blocks ; i > insert_pos ; i -- ) {
314348 alloc -> free_blocks [i ] = alloc -> free_blocks [i - 1 ];
315349 }
316350 // insert the new block
317351 alloc -> free_blocks [insert_pos ].addr = addr ;
318352 alloc -> free_blocks [insert_pos ].size = size ;
319- alloc -> n_free_blocks ++ ;
353+ for (int c = addr .chunk + 1 ; c < alloc -> n_chunks + 1 ; c ++ ) {
354+ alloc -> free_blocks_begin [c ]++ ;
355+ }
320356
321357 GGML_UNUSED (tensor );
322358}
323359
324360static void ggml_dyn_tallocr_reset (struct ggml_dyn_tallocr * alloc ) {
325- alloc -> n_free_blocks = 1 ;
326- alloc -> n_chunks = 1 ;
327- alloc -> free_blocks [0 ].addr .chunk = 0 ;
328- alloc -> free_blocks [0 ].addr .offset = 0 ;
329- alloc -> free_blocks [0 ].size = alloc -> max_chunk_size ;
330- memset (alloc -> max_size , 0 , sizeof (alloc -> max_size ));
331-
332- if (alloc -> free_blocks [0 ].size == SIZE_MAX ) {
333- alloc -> free_blocks [0 ].size = SIZE_MAX /2 ; // avoid overflows
361+ for (int i = 0 ; i < GGML_VBUFFER_MAX_CHUNKS ; i ++ ) {
362+ alloc -> free_blocks_begin [i ] = 0 ;
363+ alloc -> max_size [i ] = 0 ;
334364 }
365+ alloc -> free_blocks_begin [GGML_VBUFFER_MAX_CHUNKS ] = 0 ;
366+ alloc -> n_chunks = 0 ;
335367
336368#ifdef GGML_ALLOCATOR_DEBUG
337369 for (int i = 0 ; i < 1024 ; i ++ ) {
@@ -344,12 +376,12 @@ static struct ggml_dyn_tallocr * ggml_dyn_tallocr_new(size_t alignment, size_t m
344376 struct ggml_dyn_tallocr * alloc = (struct ggml_dyn_tallocr * )malloc (sizeof (struct ggml_dyn_tallocr ));
345377
346378 * alloc = (struct ggml_dyn_tallocr ) {
347- /*.alignment = */ alignment ,
348- /*.n_free_blocks = */ 0 ,
349- /*.n_chunks = */ 0 ,
350- /*.free_blocks = */ {{{0 }, 0 }},
351- /*.max_size = */ {0 },
352- /*.max_chunk_size = */ max_buffer_size ,
379+ /*.alignment = */ alignment ,
380+ /*.n_chunks = */ 0 ,
381+ /*.free_blocks_begin = */ { 0 } ,
382+ /*.free_blocks = */ {{{0 }, 0 }},
383+ /*.max_size = */ {0 },
384+ /*.max_chunk_size = */ MIN ( max_buffer_size , SIZE_MAX / 2 ), // clamp to avoid overflows
353385#ifdef GGML_ALLOCATOR_DEBUG
354386 /*.allocated_tensors = */ {{0 }},
355387#endif
0 commit comments