[CANN]:Replace aclrtMemsetSync with aclnnInplaceZero operator to create zero tensors more efficiently and consistently within the computation graph

luyuhong · luyuhong · commit ac63e8308d7c · 2025-06-11T09:34:44.000+08:00
diff --git a/ggml/src/ggml-cann/aclnn_ops.cpp b/ggml/src/ggml-cann/aclnn_ops.cpp
@@ -67,7 +67,7 @@
 #include <aclnnop/aclnn_pow.h>
 #include <aclnnop/aclnn_grouped_matmul_v2.h>
 #include <aclnnop/aclnn_fused_infer_attention_score_v2.h>
-#include "aclnnop/aclnn_zero.h"
+#include <aclnnop/aclnn_zero.h>
 #include <float.h>
 
 #include <cmath>
@@ -807,10 +807,9 @@ static aclTensor* aclnn_zero(ggml_backend_cann_context& ctx, void* buffer,
 
     aclTensor* zero =
         ggml_cann_create_tensor(buffer, type, type_size, ne, nb, dims);
-
     GGML_CANN_CALL_ACLNN_OP(ctx, InplaceZero, zero);
-
     return zero;
+    GGML_UNUSED(n_bytes);
 }
 
 /**
@@ -837,7 +836,6 @@ static aclTensor* aclnn_values(ggml_backend_cann_context& ctx, void* buffer,
                                float value = 1.0f) {
     aclTensor* acl_tensor =
         aclnn_zero(ctx, buffer, n_bytes, ne, dims, type, type_size);
-    
     float alpha_host = 1.0f;
     aclScalar* alpha = aclCreateScalar(&alpha_host, aclDataType::ACL_FLOAT);
     aclScalar* other = aclCreateScalar(&value, aclDataType::ACL_FLOAT);