Skip to content

Commit 3a7ffd8

Browse files
author
luyuhong
committed
[CANN]:Replace aclrtMemsetSync with aclnnInplaceZero operator to create zero tensors more efficiently and consistently within the computation graph
1 parent 7e00e60 commit 3a7ffd8

File tree

1 file changed

+5
-1
lines changed

1 file changed

+5
-1
lines changed

ggml/src/ggml-cann/aclnn_ops.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@
6767
#include <aclnnop/aclnn_pow.h>
6868
#include <aclnnop/aclnn_grouped_matmul_v2.h>
6969
#include <aclnnop/aclnn_fused_infer_attention_score_v2.h>
70+
#include "aclnnop/aclnn_zero.h"
7071
#include <float.h>
7172

7273
#include <cmath>
@@ -804,9 +805,11 @@ static aclTensor* aclnn_zero(ggml_backend_cann_context& ctx, void* buffer,
804805
nb[i] = nb[i - 1] * ne[i - 1];
805806
}
806807

807-
ggml_cann_async_memset(ctx, buffer, n_bytes, 0);
808808
aclTensor* zero =
809809
ggml_cann_create_tensor(buffer, type, type_size, ne, nb, dims);
810+
811+
GGML_CANN_CALL_ACLNN_OP(ctx, InplaceZero, zero);
812+
810813
return zero;
811814
}
812815

@@ -834,6 +837,7 @@ static aclTensor* aclnn_values(ggml_backend_cann_context& ctx, void* buffer,
834837
float value = 1.0f) {
835838
aclTensor* acl_tensor =
836839
aclnn_zero(ctx, buffer, n_bytes, ne, dims, type, type_size);
840+
837841
float alpha_host = 1.0f;
838842
aclScalar* alpha = aclCreateScalar(&alpha_host, aclDataType::ACL_FLOAT);
839843
aclScalar* other = aclCreateScalar(&value, aclDataType::ACL_FLOAT);

0 commit comments

Comments
 (0)