@@ -2622,13 +2622,13 @@ void ggml_cann_mul_mat_id_fp(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
26222622 char * src1_original = (char *) src1->data ;
26232623 char * dst_original = (char *) dst->data ;
26242624 size_t ori_src0_nb[4 ] = {nb00, nb01, nb02, nb03};
2625-
2625+
26262626 // src0 is F16, src1 is F32, dst is F32
26272627 ggml_cann_pool_alloc src0_cast_allocator;
26282628 if (src0->type == GGML_TYPE_F16) {
26292629 src0_cast_allocator.alloc (ctx.pool (), sizeof (float ) * ggml_nelements (src0));
26302630 void * src0_cast_buf = src0_cast_allocator.get ();
2631-
2631+
26322632 size_t cast_nb[GGML_MAX_DIMS];
26332633 cast_nb[0 ] = sizeof (float_t );
26342634 for (int i = 1 ; i < GGML_MAX_DIMS; i++) {
@@ -2736,13 +2736,12 @@ void ggml_cann_mul_mat_id_fp(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
27362736 char * dst_ptr = (char *)src1_cont_buf + total_num_src1_rows * nb11;
27372737 ggml_cann_async_memcpy (ctx, dst_ptr, src_ptr, nb11,
27382738 ACL_MEMCPY_DEVICE_TO_DEVICE);
2739-
2739+
27402740 num_src1_rows++;
27412741 total_num_src1_rows++;
27422742 }
27432743 }
27442744 }
2745-
27462745 // expert_map index is expert index
27472746 expert_mapping expert_map;
27482747 expert_map.row_mappings = row_mappings;
@@ -2798,7 +2797,7 @@ void ggml_cann_mul_mat_id_fp(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
27982797 ACL_MEMCPY_DEVICE_TO_DEVICE);
27992798 }
28002799
2801- }
2800+ }
28022801 ggml_cann_release_resources (ctx, src0_tensor_list, src1_tensor_list, dst_tensor_list);
28032802 }
28042803 return ;
0 commit comments