@@ -2427,6 +2427,7 @@ static void aclnn_mat_mul(ggml_backend_cann_context& ctx, aclTensor* acl_input,
24272427                          aclTensor* acl_weight, aclTensor* acl_dst) {
24282428    int8_t  cube_math_type = 1 ;  //  ALLOW_FP32_DOWN_PRECISION, when input is
24292429                                //  fp32, atlas a2 will transpose it to HFLOAT32.
2430+     
24302431    uint64_t  workspaceSize = 0 ;
24312432    aclOpExecutor* executor;
24322433    void * workspaceAddr = nullptr ;
@@ -2531,7 +2532,7 @@ static void aclnn_mat_mul_3d(ggml_backend_cann_context& ctx, aclTensor* acl_inpu
25312532 * multiplication will be stored. 
25322533 */  
25332534static  void  ggml_cann_mat_mul_fp (ggml_backend_cann_context& ctx,
2534-                                    ggml_tensor* dst) {
2535+                                  ggml_tensor* dst) {
25352536    ggml_tensor* weight = dst->src [0 ];  //  weight
25362537    ggml_tensor* input = dst->src [1 ];   //  input
25372538
@@ -2596,8 +2597,8 @@ static void ggml_cann_mat_mul_fp(ggml_backend_cann_context& ctx,
25962597 * multiplication will be stored. 
25972598 */  
25982599static  void  ggml_cann_mul_mat_quant (ggml_backend_cann_context& ctx,
2599-                                       ggml_tensor* dst,
2600-                                       const  enum  ggml_type type) {
2600+                                    ggml_tensor* dst,
2601+                                    const  enum  ggml_type type) {
26012602    ggml_tensor* src0 = dst->src [0 ];  //  weight
26022603    ggml_tensor* src1 = dst->src [1 ];  //  input
26032604
@@ -2617,8 +2618,7 @@ static void ggml_cann_mul_mat_quant(ggml_backend_cann_context& ctx,
26172618    size_t  weight_stride = src0->ne [1 ] * src0->ne [0 ] * weight_elem_size;
26182619    size_t  weight_size = weight_stride * src0->ne [2 ] * src0->ne [3 ];
26192620
2620-     //  scale stored at the end of weight.
2621-     //  scale need transpose.
2621+     //  scale stored at the end of weight. Also need transpose.
26222622    size_t  scale_elem_size = sizeof (uint16_t );
26232623    size_t  scale_nb[] = {src0->ne [0 ] / QK8_0 * scale_elem_size, scale_elem_size};
26242624    size_t  scale_stride = src0->ne [1 ] * src0->ne [0 ] / QK8_0 * scale_elem_size;
@@ -2677,8 +2677,7 @@ static void ggml_cann_mul_mat_quant(ggml_backend_cann_context& ctx,
26772677            int64_t  batch0 = (n0 * src0->ne [2 ]) + c0;
26782678
26792679            aclTensor* acl_input_tensor = ggml_cann_create_tensor (
2680-                 (char *)input_buffer + batch1 * input_stride,
2681-                 ACL_FLOAT16,
2680+                 (char *)input_buffer + batch1 * input_stride, ACL_FLOAT16,
26822681                input_elem_size, input_ne, input_nb, 2 );
26832682
26842683            //  first split
0 commit comments