Skip to content

Commit b617cfd

Browse files
authored
ggml-alloc : fix leak when reusing a tensor with a larger size (ggml-org#16679)
1 parent 7906850 commit b617cfd

File tree

1 file changed

+22
-0
lines changed

1 file changed

+22
-0
lines changed

ggml/src/ggml-alloc.c

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -598,6 +598,26 @@ static bool ggml_gallocr_is_allocated(ggml_gallocr_t galloc, struct ggml_tensor
598598
return t->data != NULL || ggml_gallocr_hash_get(galloc, t)->allocated;
599599
}
600600

601+
// free the extra space at the end if the new tensor is smaller
602+
static void ggml_gallocr_free_extra_space(ggml_gallocr_t galloc, struct ggml_tensor * node, struct ggml_tensor * parent) {
603+
struct hash_node * hn = ggml_gallocr_hash_get(galloc, node);
604+
struct hash_node * p_hn = ggml_gallocr_hash_get(galloc, parent);
605+
606+
size_t parent_size = ggml_backend_buft_get_alloc_size(galloc->bufts[p_hn->buffer_id], parent);
607+
size_t node_size = ggml_backend_buft_get_alloc_size(galloc->bufts[hn->buffer_id], node);
608+
609+
GGML_ASSERT(parent_size >= node_size);
610+
611+
if (parent_size > node_size) {
612+
struct ggml_dyn_tallocr * p_alloc = galloc->buf_tallocs[p_hn->buffer_id];
613+
struct buffer_address p_addr = p_hn->addr;
614+
p_addr.offset += node_size;
615+
size_t extra_size = parent_size - node_size;
616+
AT_PRINTF("freeing extra %zu bytes from parent %s for %s\n", extra_size, parent->name, node->name);
617+
ggml_dyn_tallocr_free_tensor(p_alloc, p_addr, extra_size, parent);
618+
}
619+
}
620+
601621
static void ggml_gallocr_allocate_node(ggml_gallocr_t galloc, struct ggml_tensor * node, int buffer_id) {
602622
GGML_ASSERT(buffer_id >= 0);
603623
struct hash_node * hn = ggml_gallocr_hash_get(galloc, node);
@@ -643,13 +663,15 @@ static void ggml_gallocr_allocate_node(ggml_gallocr_t galloc, struct ggml_tensor
643663
hn->addr = p_hn->addr;
644664
p_hn->allocated = false; // avoid freeing the parent
645665
view_src_hn->allocated = false;
666+
ggml_gallocr_free_extra_space(galloc, node, view_src);
646667
return;
647668
}
648669
} else {
649670
AT_PRINTF("reusing parent %s for %s\n", parent->name, node->name);
650671
hn->buffer_id = p_hn->buffer_id;
651672
hn->addr = p_hn->addr;
652673
p_hn->allocated = false; // avoid freeing the parent
674+
ggml_gallocr_free_extra_space(galloc, node, parent);
653675
return;
654676
}
655677
}

0 commit comments

Comments
 (0)