@@ -598,6 +598,26 @@ static bool ggml_gallocr_is_allocated(ggml_gallocr_t galloc, struct ggml_tensor
598598 return t -> data != NULL || ggml_gallocr_hash_get (galloc , t )-> allocated ;
599599}
600600
601+ // free the extra space at the end if the new tensor is smaller
602+ static void ggml_gallocr_free_extra_space (ggml_gallocr_t galloc , struct ggml_tensor * node , struct ggml_tensor * parent ) {
603+ struct hash_node * hn = ggml_gallocr_hash_get (galloc , node );
604+ struct hash_node * p_hn = ggml_gallocr_hash_get (galloc , parent );
605+
606+ size_t parent_size = ggml_backend_buft_get_alloc_size (galloc -> bufts [p_hn -> buffer_id ], parent );
607+ size_t node_size = ggml_backend_buft_get_alloc_size (galloc -> bufts [hn -> buffer_id ], node );
608+
609+ GGML_ASSERT (parent_size >= node_size );
610+
611+ if (parent_size > node_size ) {
612+ struct ggml_dyn_tallocr * p_alloc = galloc -> buf_tallocs [p_hn -> buffer_id ];
613+ struct buffer_address p_addr = p_hn -> addr ;
614+ p_addr .offset += node_size ;
615+ size_t extra_size = parent_size - node_size ;
616+ AT_PRINTF ("freeing extra %zu bytes from parent %s for %s\n" , extra_size , parent -> name , node -> name );
617+ ggml_dyn_tallocr_free_tensor (p_alloc , p_addr , extra_size , parent );
618+ }
619+ }
620+
601621static void ggml_gallocr_allocate_node (ggml_gallocr_t galloc , struct ggml_tensor * node , int buffer_id ) {
602622 GGML_ASSERT (buffer_id >= 0 );
603623 struct hash_node * hn = ggml_gallocr_hash_get (galloc , node );
@@ -643,13 +663,15 @@ static void ggml_gallocr_allocate_node(ggml_gallocr_t galloc, struct ggml_tensor
643663 hn -> addr = p_hn -> addr ;
644664 p_hn -> allocated = false; // avoid freeing the parent
645665 view_src_hn -> allocated = false;
666+ ggml_gallocr_free_extra_space (galloc , node , view_src );
646667 return ;
647668 }
648669 } else {
649670 AT_PRINTF ("reusing parent %s for %s\n" , parent -> name , node -> name );
650671 hn -> buffer_id = p_hn -> buffer_id ;
651672 hn -> addr = p_hn -> addr ;
652673 p_hn -> allocated = false; // avoid freeing the parent
674+ ggml_gallocr_free_extra_space (galloc , node , parent );
653675 return ;
654676 }
655677 }
0 commit comments