6565#include < aclnnop/aclnn_eq_tensor.h>
6666#include < aclnnop/aclnn_gt_scalar.h>
6767#include < aclnnop/aclnn_pow.h>
68- #include < aclnnop/aclnn_grouped_matmul_v2 .h>
68+ #include < aclnnop/aclnn_grouped_matmul_v3 .h>
6969#include < aclnnop/aclnn_fused_infer_attention_score_v2.h>
7070#include < float.h>
7171
@@ -2701,9 +2701,9 @@ static void ggml_cann_mul_mat_id_fp(ggml_backend_cann_context& ctx, ggml_tensor*
27012701 }
27022702
27032703 size_t GROUP_SIZE = 128 ;
2704- // GroupedMatmulV2 required tensor_list.size < 128
2704+ // GroupedMatmulV3 required tensor_list.size < 128
27052705 for (size_t i = 0 ; i < src0_tensor_vec.size (); i += GROUP_SIZE) {
2706- // split and call GroupedMatmulV2
2706+ // split and call GroupedMatmulV3
27072707 size_t end = std::min (i + GROUP_SIZE, src0_tensor_vec.size ());
27082708 std::vector<aclTensor*> src0_tensor_vec_split (src0_tensor_vec.begin () + i, src0_tensor_vec.begin () + end);
27092709 std::vector<aclTensor*> src1_tensor_vec_split (src1_tensor_vec.begin () + i, src1_tensor_vec.begin () + end);
@@ -2713,7 +2713,7 @@ static void ggml_cann_mul_mat_id_fp(ggml_backend_cann_context& ctx, ggml_tensor*
27132713 aclTensorList* src1_tensor_list = aclCreateTensorList (src1_tensor_vec_split.data (), src1_tensor_vec_split.size ());
27142714 aclTensorList* dst_tensor_list = aclCreateTensorList (dst_tensor_vec_split.data (), dst_tensor_vec_split.size ());
27152715
2716- GGML_CANN_CALL_ACLNN_OP (ctx, GroupedMatmulV2 , src1_tensor_list, src0_tensor_list,
2716+ GGML_CANN_CALL_ACLNN_OP (ctx, GroupedMatmulV3 , src1_tensor_list, src0_tensor_list,
27172717 nullptr , nullptr , nullptr , nullptr , nullptr , nullptr , 0 , -1 , dst_tensor_list);
27182718
27192719 ggml_cann_release_resources (ctx, src0_tensor_list, src1_tensor_list, dst_tensor_list);
0 commit comments