@@ -110,17 +110,21 @@ static bool ggml_buffer_address_less(struct buffer_address a, struct buffer_addr
110110}
111111
112112struct  free_block  {
113-     struct   buffer_address   addr ;
113+     size_t   offset ;
114114    size_t  size ;
115115};
116116
117+ struct  tallocr_chunk  {
118+     struct  free_block  free_blocks [MAX_FREE_BLOCKS ];
119+     int  n_free_blocks ;
120+     size_t  max_size ;
121+ };
122+ 
117123struct  ggml_dyn_tallocr  {
118124    size_t  alignment ;
119-     int  n_chunks ;
120-     int  free_blocks_begin [GGML_VBUFFER_MAX_CHUNKS  +  1 ]; // end[chunk] == begin[chunk+1] 
121-     struct  free_block  free_blocks [MAX_FREE_BLOCKS ];
122-     size_t  max_size [GGML_VBUFFER_MAX_CHUNKS ];
123125    size_t  max_chunk_size ;
126+     struct  tallocr_chunk  *  chunks [GGML_VBUFFER_MAX_CHUNKS ];
127+     int  n_chunks ;
124128
125129#ifdef  GGML_ALLOCATOR_DEBUG 
126130    struct  {
@@ -130,73 +134,49 @@ struct ggml_dyn_tallocr {
130134#endif 
131135};
132136
133- struct  free_block_range  {
134-     int  begin ;
135-     int  end ;
136-     int  size ;
137- };
138- 
139- static  struct  free_block_range  ggml_dyn_tallocr_free_block_range (const  struct  ggml_dyn_tallocr  *  alloc , int  chunk ) {
140-     struct  free_block_range  range ;
141-     range .begin  =  alloc -> free_blocks_begin [chunk ];
142-     range .end    =  alloc -> free_blocks_begin [chunk  +  1 ];
143-     range .size   =  range .end  -  range .begin ;
144-     return  range ;
145- }
146- 
147- void  ggml_dyn_tallocr_insert_block (struct  ggml_dyn_tallocr  *  alloc , struct  buffer_address  addr , size_t  size ) {
148-     int  total_blocks  =  alloc -> free_blocks_begin [alloc -> n_chunks ];
149-     GGML_ASSERT (total_blocks  <  MAX_FREE_BLOCKS  &&  "out of free blocks" );
137+ void  ggml_dyn_tallocr_insert_block (struct  tallocr_chunk  *  chunk , size_t  offset , size_t  size ) {
138+     GGML_ASSERT (chunk -> n_free_blocks  <  MAX_FREE_BLOCKS  &&  "out of free blocks" );
150139    // insert the new block in the correct position to keep the array sorted by address (to make merging blocks faster) 
151-     int  insert_pos  =  alloc -> free_blocks_begin [addr .chunk ];
152-     int  blocks_end  =  alloc -> free_blocks_begin [addr .chunk  +  1 ];
153-     while  (insert_pos  <  blocks_end  &&  alloc -> free_blocks [insert_pos ].addr .offset  <  addr .offset ) {
140+     int  insert_pos  =  0 ;
141+     while  (insert_pos  <  chunk -> n_free_blocks  &&  chunk -> free_blocks [insert_pos ].offset  <  offset ) {
154142        insert_pos ++ ;
155143    }
156144    // shift all blocks from insert_pos onward to make room for the new block 
157-     for  (int  i  =  total_blocks ; i  >  insert_pos ; i -- ) {
158-         alloc -> free_blocks [i ] =  alloc -> free_blocks [i - 1 ];
145+     for  (int  i  =  chunk -> n_free_blocks ; i  >  insert_pos ; i -- ) {
146+         chunk -> free_blocks [i ] =  chunk -> free_blocks [i - 1 ];
159147    }
160148    // insert the new block 
161-     alloc -> free_blocks [insert_pos ].addr  =  addr ;
162-     alloc -> free_blocks [insert_pos ].size  =  size ;
163-     for  (int  c  =  addr .chunk  +  1 ; c  <  alloc -> n_chunks  +  1 ; ++ c ) {
164-         alloc -> free_blocks_begin [c ]++ ;
165-     }
149+     chunk -> free_blocks [insert_pos ].offset  =  offset ;
150+     chunk -> free_blocks [insert_pos ].size  =  size ;
151+     chunk -> n_free_blocks ++ ;
166152}
167153
168- void  ggml_dyn_tallocr_remove_block (struct  ggml_dyn_tallocr  *  alloc , int  idx ) {
169-     int  chunk  =  alloc -> free_blocks [idx ].addr .chunk ;
154+ void  ggml_dyn_tallocr_remove_block (struct  tallocr_chunk  *  chunk , int  idx ) {
170155    // shift all elements after idx by 1 to the left, overwriting the element at idx 
171-     int  n_free_blocks  =  alloc -> free_blocks_begin [alloc -> n_chunks ];
172-     for  (int  i  =  idx ; i  <  n_free_blocks ; i ++ ) {
173-         alloc -> free_blocks [i ] =  alloc -> free_blocks [i  +  1 ];
174-     }
175-     // adjust first element index of all chunks after the current one 
176-     for  (int  c  =  chunk  +  1 ; c  <  alloc -> n_chunks  +  1 ; c ++ ) {
177-         alloc -> free_blocks_begin [c ]-- ;
156+     for  (int  i  =  idx ; i  <  chunk -> n_free_blocks ; i ++ ) {
157+         chunk -> free_blocks [i ] =  chunk -> free_blocks [i + 1 ];
178158    }
159+     chunk -> n_free_blocks -- ;
179160}
180161
181- // add a new chunk by creating a block of unclaimed space after the last chunk 
182162int  ggml_dyn_tallocr_new_chunk (struct  ggml_dyn_tallocr  *  alloc , size_t  min_size ) {
183163    if  (alloc -> n_chunks  >= GGML_VBUFFER_MAX_CHUNKS ) {
184164        return  -1 ;
185165    }
186-     int   i   =   alloc -> free_blocks_begin [ alloc -> n_chunks ] ;
187-     alloc -> free_blocks [ i ]. addr . chunk  =  alloc -> n_chunks ;
188-     alloc -> free_blocks [i ]. addr .offset  =  0 ;
166+     struct   tallocr_chunk   *   chunk   =   calloc ( 1 ,  sizeof ( struct   tallocr_chunk )) ;
167+     chunk -> n_free_blocks  =  1 ;
168+     chunk -> free_blocks [0 ] .offset  =  0 ;
189169    // available space in a chunk is limited to max_chunk_size, but can be higher if: 
190170    // 1. a single tensor exceeds the maximum, and cannot fit any other way 
191171    // 2. we are running out of chunks 
192172    // backends will either manage to allocate the larger size, or report an error. 
193-     alloc -> free_blocks [i ].size  =  MAX (min_size , alloc -> max_chunk_size );
173+     chunk -> free_blocks [0 ].size  =  MAX (min_size , alloc -> max_chunk_size );
194174    if  (alloc -> n_chunks  ==  GGML_VBUFFER_MAX_CHUNKS  -  1 ) {
195-         alloc -> free_blocks [i ].size  =  SIZE_MAX /2 ;
175+         chunk -> free_blocks [0 ].size  =  SIZE_MAX /2 ;
196176    }
197-     alloc -> free_blocks_begin [alloc -> n_chunks   +   1 ] =  i   +   1 ;
177+     alloc -> chunks [alloc -> n_chunks ] =  chunk ;
198178    alloc -> n_chunks ++ ;
199-     return  i ;
179+     return  alloc -> n_chunks   -   1 ;
200180}
201181
202182#ifdef  GGML_ALLOCATOR_DEBUG 
@@ -226,17 +206,19 @@ static struct buffer_address ggml_dyn_tallocr_alloc(struct ggml_dyn_tallocr * al
226206
227207    AT_PRINTF ("%s: allocating %s (%zu bytes) - " , __func__ , tensor -> name , size );
228208
209+     int  best_fit_chunk  =  -1 ;
229210    int  best_fit_block  =  -1 ;
230211    size_t  max_avail  =  0 ;
231212
232213    // find the best fitting free block in any chunk besides the last block 
233214    for  (int  c  =  0 ; c  <  alloc -> n_chunks ; ++ c ) {
234-         struct  free_block_range   blocks   =   ggml_dyn_tallocr_free_block_range ( alloc ,  c ) ;
215+         struct  tallocr_chunk   *   chunk   =   alloc -> chunks [ c ] ;
235216        size_t  best_fit_size  =  SIZE_MAX ;
236-         for  (int  i  =  blocks . begin ; i  <  blocks . end  -  1 ; i ++ ) {
237-             struct  free_block  *  block  =  & alloc -> free_blocks [i ];
217+         for  (int  i  =  0 ; i  <  chunk -> n_free_blocks  -  1 ; i ++ ) {
218+             struct  free_block  *  block  =  & chunk -> free_blocks [i ];
238219            max_avail  =  MAX (max_avail , block -> size );
239220            if  (block -> size  >= size  &&  block -> size  <= best_fit_size ) {
221+                 best_fit_chunk  =  c ;
240222                best_fit_block  =  i ;
241223                best_fit_size  =  block -> size ;
242224            }
@@ -246,12 +228,13 @@ static struct buffer_address ggml_dyn_tallocr_alloc(struct ggml_dyn_tallocr * al
246228    if  (best_fit_block  ==  -1 ) {
247229        // no suitable block found, try the last block (this will grow a chunks size) 
248230        for  (int  c  =  0 ; c  <  alloc -> n_chunks ; ++ c ) {
249-             struct  free_block_range   blocks   =   ggml_dyn_tallocr_free_block_range ( alloc ,  c ) ;
250-             if  (blocks . size  >  0 ) {
251-                 struct  free_block  *  block  =  & alloc -> free_blocks [blocks . end  -  1 ];
231+             struct  tallocr_chunk   *   chunk   =   alloc -> chunks [ c ] ;
232+             if  (chunk -> n_free_blocks  >  0 ) {
233+                 struct  free_block  *  block  =  & chunk -> free_blocks [chunk -> n_free_blocks  -  1 ];
252234                max_avail  =  MAX (max_avail , block -> size );
253235                if  (block -> size  >= size ) {
254-                     best_fit_block  =  blocks .end  -  1 ;
236+                     best_fit_chunk  =  c ;
237+                     best_fit_block  =  chunk -> n_free_blocks  -  1 ;
255238                    break ;
256239                }
257240            }
@@ -260,7 +243,8 @@ static struct buffer_address ggml_dyn_tallocr_alloc(struct ggml_dyn_tallocr * al
260243
261244    if  (best_fit_block  ==  -1 ) {
262245        // none of the existing chunks have enough space left 
263-         best_fit_block  =  ggml_dyn_tallocr_new_chunk (alloc , size );
246+         best_fit_chunk  =  ggml_dyn_tallocr_new_chunk (alloc , size );
247+         best_fit_block  =  0 ;
264248    }
265249    if  (best_fit_block  ==  -1 ) {
266250        // since the last chunk always has virtually endless memory, this should never happen 
@@ -269,13 +253,14 @@ static struct buffer_address ggml_dyn_tallocr_alloc(struct ggml_dyn_tallocr * al
269253        GGML_ABORT ("graph allocation: failed to reserve memory" );
270254    }
271255
272-     struct  free_block  *  block  =  & alloc -> free_blocks [best_fit_block ];
273-     struct  buffer_address  addr  =  block -> addr ;
274-     block -> addr .offset  +=  size ;
256+     struct  tallocr_chunk  *  chunk  =  alloc -> chunks [best_fit_chunk ];
257+     struct  free_block     *  block  =  & chunk -> free_blocks [best_fit_block ];
258+     struct  buffer_address   addr   =  {.chunk  =  best_fit_chunk , .offset  =  block -> offset  };
259+     block -> offset  +=  size ;
275260    block -> size  -=  size ;
276261    if  (block -> size  ==  0 ) {
277262        // remove block if empty 
278-         ggml_dyn_tallocr_remove_block (alloc , best_fit_block );
263+         ggml_dyn_tallocr_remove_block (chunk , best_fit_block );
279264    }
280265
281266    AT_PRINTF ("block %d, offset %zu, chunk %d\n" , best_fit_block , addr .offset , addr .chunk );
@@ -311,7 +296,7 @@ static struct buffer_address ggml_dyn_tallocr_alloc(struct ggml_dyn_tallocr * al
311296    }
312297#endif 
313298
314-     alloc -> max_size [ addr . chunk ]  =  MAX (alloc -> max_size [ addr . chunk ] , addr .offset  +  size );
299+     chunk -> max_size  =  MAX (chunk -> max_size , addr .offset  +  size );
315300
316301    return  addr ;
317302
@@ -329,51 +314,50 @@ static void ggml_dyn_tallocr_free_tensor(struct ggml_dyn_tallocr * alloc, struct
329314    remove_allocated_tensor (alloc , addr , tensor );
330315#endif 
331316
332-     struct  free_block_range   blocks   =   ggml_dyn_tallocr_free_block_range ( alloc ,  addr .chunk ) ;
317+     struct  tallocr_chunk   *   chunk   =   alloc -> chunks [ addr .chunk ] ;
333318
334319    // see if we can merge with an existing block 
335-     for  (int  i  =  blocks . begin ; i  <  blocks . end ; i ++ ) {
336-         struct  free_block  *  block  =  & alloc -> free_blocks [i ];
320+     for  (int  i  =  0 ; i  <  chunk -> n_free_blocks ; i ++ ) {
321+         struct  free_block  *  block  =  & chunk -> free_blocks [i ];
337322        // check if ptr is at the end of the block 
338-         if  (block -> addr . offset  +  block -> size  ==  addr .offset ) {
323+         if  (block -> offset  +  block -> size  ==  addr .offset ) {
339324            block -> size  +=  size ;
340325            // check if we can merge with the next block 
341-             if  (i  <  blocks . end  -  1 ) {
342-                 struct  free_block  *  next  =  & alloc -> free_blocks [i + 1 ];
343-                 if  (block -> addr . offset  +  block -> size  ==  next -> addr . offset ) {
326+             if  (i  <  chunk -> n_free_blocks  -  1 ) {
327+                 struct  free_block  *  next  =  & chunk -> free_blocks [i + 1 ];
328+                 if  (block -> offset  +  block -> size  ==  next -> offset ) {
344329                    block -> size  +=  next -> size ;
345-                     ggml_dyn_tallocr_remove_block (alloc , i + 1 );
330+                     ggml_dyn_tallocr_remove_block (chunk , i + 1 );
346331                }
347332            }
348333            return ;
349334        }
350335        // check if ptr is at the beginning of the block 
351-         if  (addr .offset  +  size  ==  block -> addr . offset ) {
352-             block -> addr . offset  =  addr .offset ;
336+         if  (addr .offset  +  size  ==  block -> offset ) {
337+             block -> offset  =  addr .offset ;
353338            block -> size  +=  size ;
354339            // check if we can merge with the previous block 
355-             if  (i  >  blocks . begin ) {
356-                 struct  free_block  *  prev  =  & alloc -> free_blocks [i - 1 ];
357-                 if  (prev -> addr . offset  +  prev -> size  ==  block -> addr . offset ) {
340+             if  (i  >  0 ) {
341+                 struct  free_block  *  prev  =  & chunk -> free_blocks [i - 1 ];
342+                 if  (prev -> offset  +  prev -> size  ==  block -> offset ) {
358343                    prev -> size  +=  block -> size ;
359-                     ggml_dyn_tallocr_remove_block (alloc , i );
344+                     ggml_dyn_tallocr_remove_block (chunk , i );
360345                }
361346            }
362347            return ;
363348        }
364349    }
365350    // otherwise, add a new block 
366-     ggml_dyn_tallocr_insert_block (alloc , addr , size );
351+     ggml_dyn_tallocr_insert_block (chunk , addr . offset , size );
367352
368353    GGML_UNUSED (tensor );
369354}
370355
371356static  void  ggml_dyn_tallocr_reset (struct  ggml_dyn_tallocr  *  alloc ) {
372357    for  (int  i  =  0 ; i  <  GGML_VBUFFER_MAX_CHUNKS ; i ++ ) {
373-         alloc -> free_blocks_begin [i ]  =   0 ;
374-         alloc -> max_size [i ] =  0 ;
358+         free ( alloc -> chunks [i ]) ;
359+         alloc -> chunks [i ] =  NULL ;
375360    }
376-     alloc -> free_blocks_begin [GGML_VBUFFER_MAX_CHUNKS ] =  0 ;
377361    alloc -> n_chunks  =  0 ;
378362
379363#ifdef  GGML_ALLOCATOR_DEBUG 
@@ -387,12 +371,10 @@ static struct ggml_dyn_tallocr * ggml_dyn_tallocr_new(size_t alignment, size_t m
387371    struct  ggml_dyn_tallocr  *  alloc  =  (struct  ggml_dyn_tallocr  * )malloc (sizeof (struct  ggml_dyn_tallocr ));
388372
389373    * alloc  =  (struct  ggml_dyn_tallocr ) {
390-         /*.alignment         = */  alignment ,
391-         /*.n_chunks          = */  0 ,
392-         /*.free_blocks_begin = */  {0 },
393-         /*.free_blocks       = */  {{{0 }, 0 }},
394-         /*.max_size          = */  {0 },
395-         /*.max_chunk_size    = */  MIN (max_buffer_size , SIZE_MAX /2 ), // clamp to avoid overflows 
374+         /*.alignment      = */  alignment ,
375+         /*.max_chunk_size = */  MIN (max_buffer_size , SIZE_MAX /2 ), // clamp to avoid overflows 
376+         /*.chunks         = */  {NULL },
377+         /*.n_chunks       = */  0 ,
396378#ifdef  GGML_ALLOCATOR_DEBUG 
397379        /*.allocated_tensors = */  {{0 }},
398380#endif 
@@ -404,13 +386,16 @@ static struct ggml_dyn_tallocr * ggml_dyn_tallocr_new(size_t alignment, size_t m
404386}
405387
406388static  void  ggml_dyn_tallocr_free (struct  ggml_dyn_tallocr  *  alloc ) {
389+     for  (int  i  =  0 ; i  <  alloc -> n_chunks ; ++ i ) {
390+         free (alloc -> chunks [i ]);
391+     }
407392    free (alloc );
408393}
409394
410395static  size_t  ggml_dyn_tallocr_max_size (struct  ggml_dyn_tallocr  *  alloc ) {
411396    size_t  max_size  =  0 ;
412397    for  (int  i  =  0 ; i  <  alloc -> n_chunks ; i ++ ) {
413-         max_size  +=  alloc -> max_size [i ];
398+         max_size  +=  alloc -> chunks [i ]-> max_size ;
414399    }
415400    return  max_size ;
416401}
@@ -453,7 +438,7 @@ static struct vbuffer * ggml_vbuffer_alloc(ggml_backend_buffer_type_t buft, cons
453438    }
454439
455440    for  (int  n  =  0 ; n  <  talloc -> n_chunks ; n ++ ) {
456-         size_t  chunk_size  =  talloc -> max_size [n ];
441+         size_t  chunk_size  =  talloc -> chunks [n ]-> max_size ;
457442        buf -> chunks [n ] =  ggml_backend_buft_alloc_buffer (buft , chunk_size );
458443        if  (buf -> chunks [n ] ==  NULL ) {
459444            ggml_vbuffer_free (buf );
0 commit comments