Skip to content

Commit 56fc38b

Browse files
authored
CANN: fix CPU memory leak in CANN backend (ggml-org#16549)
This commit fixes a CPU-side memory leak issue in the CANN backend, which occurred when intermediate aclTensorList objects were not properly released after operator execution. The leak happened during repeated invocations of CANN ops (e.g., FlashAttention), leading to increasing host memory usage over time. Proper resource cleanup (aclDestroyTensorList and related release logic) has been added to ensure that all temporary tensors are correctly freed.
1 parent 1fb9504 commit 56fc38b

File tree

1 file changed

+6
-8
lines changed

1 file changed

+6
-8
lines changed

ggml/src/ggml-cann/aclnn_ops.cpp

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -146,9 +146,7 @@ void ggml_cann_op_unary_gated(
146146
unary_op(ctx, acl_src0, acl_dst);
147147
GGML_CANN_CALL_ACLNN_OP(ctx, InplaceMul, acl_dst, acl_src1);
148148

149-
ggml_cann_release_resources(ctx, acl_src0, acl_dst);
150-
if(src1)
151-
ggml_cann_release_resources(ctx, acl_src1);
149+
ggml_cann_release_resources(ctx, acl_src0, acl_src1, acl_dst);
152150
}
153151

154152
/**
@@ -1851,7 +1849,7 @@ void ggml_cann_get_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
18511849
dst->data, dst->ne, dst->nb,
18521850
src1, dst->type);
18531851

1854-
ggml_cann_release_resources(ctx, dequant_tensor);
1852+
ggml_cann_release_resources(ctx, acl_weight_tensor, acl_scale_tensor, dequant_tensor);
18551853
break;
18561854
}
18571855
default:
@@ -3290,8 +3288,8 @@ void ggml_cann_flash_attn_ext(ggml_backend_cann_context& ctx, ggml_tensor* dst){
32903288
aclTensor* acl_q_tensor = acl_src0_f16_tensor;
32913289
aclTensor* acl_k_tensors[] = {acl_src1_f16_tensor};
32923290
aclTensor* acl_v_tensors[] = {acl_src2_f16_tensor};
3293-
auto acl_k_tensor_list = aclCreateTensorList(acl_k_tensors, kvTensorNum);
3294-
auto acl_v_tensor_list = aclCreateTensorList(acl_v_tensors, kvTensorNum);
3291+
aclTensorList* acl_k_tensor_list = aclCreateTensorList(acl_k_tensors, kvTensorNum);
3292+
aclTensorList* acl_v_tensor_list = aclCreateTensorList(acl_v_tensors, kvTensorNum);
32953293

32963294
int64_t numHeads = src0->ne[2]; // N
32973295
int64_t numKeyValueHeads = src1->ne[2];
@@ -3362,8 +3360,8 @@ void ggml_cann_flash_attn_ext(ggml_backend_cann_context& ctx, ggml_tensor* dst){
33623360
}
33633361

33643362
ggml_cann_release_resources(ctx, acl_src0_f16_tensor,
3365-
acl_src1_f16_tensor,
3366-
acl_src2_f16_tensor,
3363+
acl_k_tensor_list,
3364+
acl_v_tensor_list,
33673365
fa_dst_tensor,
33683366
acl_dst_tensor,
33693367
bcast_pse_tensor);

0 commit comments

Comments
 (0)