@@ -226,16 +226,23 @@ static struct buffer_address ggml_dyn_tallocr_alloc(struct ggml_dyn_tallocr * al
226226 }
227227
228228 if (best_fit_block == -1 ) {
229- // no suitable block found, try the last block (this will grow a chunks size)
229+ // no suitable block found, try the last block (this may grow a chunks size)
230+ int64_t best_reuse = INT64_MIN ;
230231 for (int c = 0 ; c < alloc -> n_chunks ; ++ c ) {
231232 struct tallocr_chunk * chunk = alloc -> chunks [c ];
232233 if (chunk -> n_free_blocks > 0 ) {
233234 struct free_block * block = & chunk -> free_blocks [chunk -> n_free_blocks - 1 ];
234235 max_avail = MAX (max_avail , block -> size );
235- if (block -> size >= size ) {
236+ int64_t reuse_factor = chunk -> max_size - block -> offset - size ;
237+ // reuse_factor < 0 : amount of extra memory that needs to be allocated
238+ // reuse_factor = 0 : allocated free space exactly matches tensor size
239+ // reuse_factor > 0 : superfluous memory that will remain unused
240+ bool better_reuse = best_reuse < 0 && reuse_factor > best_reuse ;
241+ bool better_fit = reuse_factor >= 0 && reuse_factor < best_reuse ;
242+ if (block -> size >= size && (better_reuse || better_fit )) {
236243 best_fit_chunk = c ;
237244 best_fit_block = chunk -> n_free_blocks - 1 ;
238- break ;
245+ best_reuse = reuse_factor ;
239246 }
240247 }
241248 }
@@ -268,7 +275,7 @@ static struct buffer_address ggml_dyn_tallocr_alloc(struct ggml_dyn_tallocr * al
268275#ifdef GGML_ALLOCATOR_DEBUG
269276 add_allocated_tensor (alloc , addr , tensor );
270277 size_t cur_max = addr .offset + size ;
271- if (cur_max > alloc -> max_size [ addr . chunk ] ) {
278+ if (cur_max > chunk -> max_size ) {
272279 // sort allocated_tensors by chunk/offset
273280 for (int i = 0 ; i < 1024 ; i ++ ) {
274281 for (int j = i + 1 ; j < 1024 ; j ++ ) {
0 commit comments