@@ -144,6 +144,27 @@ static struct free_block_range ggml_dyn_tallocr_free_block_range(const struct gg
144144    return  range ;
145145}
146146
147+ void  ggml_dyn_tallocr_insert_block (struct  ggml_dyn_tallocr  *  alloc , struct  buffer_address  addr , size_t  size ) {
148+     int  total_blocks  =  alloc -> free_blocks_begin [alloc -> n_chunks ];
149+     GGML_ASSERT (total_blocks  <  MAX_FREE_BLOCKS  &&  "out of free blocks" );
150+     // insert the new block in the correct position to keep the array sorted by address (to make merging blocks faster) 
151+     int  insert_pos  =  alloc -> free_blocks_begin [addr .chunk ];
152+     int  blocks_end  =  alloc -> free_blocks_begin [addr .chunk  +  1 ];
153+     while  (insert_pos  <  blocks_end  &&  alloc -> free_blocks [insert_pos ].addr .offset  <  addr .offset ) {
154+         insert_pos ++ ;
155+     }
156+     // shift all blocks from insert_pos onward to make room for the new block 
157+     for  (int  i  =  total_blocks ; i  >  insert_pos ; i -- ) {
158+         alloc -> free_blocks [i ] =  alloc -> free_blocks [i - 1 ];
159+     }
160+     // insert the new block 
161+     alloc -> free_blocks [insert_pos ].addr  =  addr ;
162+     alloc -> free_blocks [insert_pos ].size  =  size ;
163+     for  (int  c  =  addr .chunk  +  1 ; c  <  alloc -> n_chunks  +  1 ; ++ c ) {
164+         alloc -> free_blocks_begin [c ]++ ;
165+     }
166+ }
167+ 
147168void  ggml_dyn_tallocr_remove_block (struct  ggml_dyn_tallocr  *  alloc , int  idx ) {
148169    int  chunk  =  alloc -> free_blocks [idx ].addr .chunk ;
149170    // shift all elements after idx by 1 to the left, overwriting the element at idx 
@@ -157,6 +178,27 @@ void ggml_dyn_tallocr_remove_block(struct ggml_dyn_tallocr * alloc, int idx) {
157178    }
158179}
159180
181+ // add a new chunk by creating a block of unclaimed space after the last chunk 
182+ int  ggml_dyn_tallocr_new_chunk (struct  ggml_dyn_tallocr  *  alloc , size_t  min_size ) {
183+     if  (alloc -> n_chunks  >= GGML_VBUFFER_MAX_CHUNKS ) {
184+         return  -1 ;
185+     }
186+     int  i  =  alloc -> free_blocks_begin [alloc -> n_chunks ];
187+     alloc -> free_blocks [i ].addr .chunk  =  alloc -> n_chunks ;
188+     alloc -> free_blocks [i ].addr .offset  =  0 ;
189+     // available space in a chunk is limited to max_chunk_size, but can be higher if: 
190+     // 1. a single tensor exceeds the maximum, and cannot fit any other way 
191+     // 2. we are running out of chunks 
192+     // backends will either manage to allocate the larger size, or report an error. 
193+     alloc -> free_blocks [i ].size  =  MAX (min_size , alloc -> max_chunk_size );
194+     if  (alloc -> n_chunks  ==  GGML_VBUFFER_MAX_CHUNKS  -  1 ) {
195+         alloc -> free_blocks [i ].size  =  SIZE_MAX /2 ;
196+     }
197+     alloc -> free_blocks_begin [alloc -> n_chunks  +  1 ] =  i  +  1 ;
198+     alloc -> n_chunks ++ ;
199+     return  i ;
200+ }
201+ 
160202#ifdef  GGML_ALLOCATOR_DEBUG 
161203static  void  add_allocated_tensor (struct  ggml_dyn_tallocr  *  alloc , struct  buffer_address  addr , const  struct  ggml_tensor  *  tensor ) {
162204    for  (int  i  =  0 ; i  <  1024 ; i ++ ) {
@@ -187,7 +229,7 @@ static struct buffer_address ggml_dyn_tallocr_alloc(struct ggml_dyn_tallocr * al
187229    int  best_fit_block  =  -1 ;
188230    size_t  max_avail  =  0 ;
189231
190-     // find the best fitting free block besides the last block 
232+     // find the best fitting free block in any chunk  besides the last block 
191233    for  (int  c  =  0 ; c  <  alloc -> n_chunks ; ++ c ) {
192234        struct  free_block_range  blocks  =  ggml_dyn_tallocr_free_block_range (alloc , c );
193235        size_t  best_fit_size  =  SIZE_MAX ;
@@ -202,7 +244,7 @@ static struct buffer_address ggml_dyn_tallocr_alloc(struct ggml_dyn_tallocr * al
202244    }
203245
204246    if  (best_fit_block  ==  -1 ) {
205-         // no suitable block found, try the last block (ie. growing  a chunks size) 
247+         // no suitable block found, try the last block (this will grow  a chunks size) 
206248        for  (int  c  =  0 ; c  <  alloc -> n_chunks ; ++ c ) {
207249            struct  free_block_range  blocks  =  ggml_dyn_tallocr_free_block_range (alloc , c );
208250            if  (blocks .size  >  0 ) {
@@ -218,28 +260,13 @@ static struct buffer_address ggml_dyn_tallocr_alloc(struct ggml_dyn_tallocr * al
218260
219261    if  (best_fit_block  ==  -1 ) {
220262        // none of the existing chunks have enough space left 
221-         if  (alloc -> n_chunks  <  GGML_VBUFFER_MAX_CHUNKS ) {
222-             // add a new chunk by creating a block of unclaimed space after the last chunk 
223-             int  i  =  alloc -> free_blocks_begin [alloc -> n_chunks ];
224-             alloc -> free_blocks [i ].addr .chunk  =  alloc -> n_chunks ;
225-             alloc -> free_blocks [i ].addr .offset  =  0 ;
226-             // available space in a chunk is limited to max_chunk_size, but can be higher if: 
227-             // 1. a single tensor exceeds the maximum, and cannot fit any other way 
228-             // 2. we are running out of chunks 
229-             // backends will either manage to allocate the larger size, or report an error. 
230-             alloc -> free_blocks [i ].size  =  MAX (size , alloc -> max_chunk_size );
231-             if  (alloc -> n_chunks  ==  GGML_VBUFFER_MAX_CHUNKS  -  1 ) {
232-                 alloc -> free_blocks [i ].size  =  SIZE_MAX /2 ;
233-             }
234-             alloc -> free_blocks_begin [alloc -> n_chunks  +  1 ] =  i  +  1 ;
235-             alloc -> n_chunks ++ ;
236-             best_fit_block  =  i ;
237-         } else  {
238-             // since the last chunk always has virtually endless memory, this should never happen 
239-             GGML_LOG_ERROR ("%s: not enough space in the buffer to allocate %zu bytes, largest block available %zu bytes\n" ,
240-                 __func__ , size , max_avail );
241-             GGML_ABORT ("graph allocation: failed to reserve memory" );
242-         }
263+         best_fit_block  =  ggml_dyn_tallocr_new_chunk (alloc , size );
264+     }
265+     if  (best_fit_block  ==  -1 ) {
266+         // since the last chunk always has virtually endless memory, this should never happen 
267+         GGML_LOG_ERROR ("%s: not enough space in the buffer to allocate %zu bytes, largest block available %zu bytes\n" ,
268+             __func__ , size , max_avail );
269+         GGML_ABORT ("graph allocation: failed to reserve memory" );
243270    }
244271
245272    struct  free_block  *  block  =  & alloc -> free_blocks [best_fit_block ];
@@ -336,23 +363,7 @@ static void ggml_dyn_tallocr_free_tensor(struct ggml_dyn_tallocr * alloc, struct
336363        }
337364    }
338365    // otherwise, add a new block 
339-     int  n_free_blocks  =  alloc -> free_blocks_begin [alloc -> n_chunks ];
340-     GGML_ASSERT (n_free_blocks  <  MAX_FREE_BLOCKS  &&  "out of free blocks" );
341-     // insert the new block in the correct position to keep the array sorted by address (to make merging blocks faster) 
342-     int  insert_pos  =  blocks .begin ;
343-     while  (insert_pos  <  blocks .end  &&  alloc -> free_blocks [insert_pos ].addr .offset  <  addr .offset ) {
344-         insert_pos ++ ;
345-     }
346-     // shift all blocks from insert_pos onward to make room for the new block 
347-     for  (int  i  =  n_free_blocks ; i  >  insert_pos ; i -- ) {
348-         alloc -> free_blocks [i ] =  alloc -> free_blocks [i - 1 ];
349-     }
350-     // insert the new block 
351-     alloc -> free_blocks [insert_pos ].addr  =  addr ;
352-     alloc -> free_blocks [insert_pos ].size  =  size ;
353-     for  (int  c  =  addr .chunk  +  1 ; c  <  alloc -> n_chunks  +  1 ; c ++ ) {
354-         alloc -> free_blocks_begin [c ]++ ;
355-     }
366+     ggml_dyn_tallocr_insert_block (alloc , addr , size );
356367
357368    GGML_UNUSED (tensor );
358369}
0 commit comments