@@ -1767,10 +1767,10 @@ void ggml_cann_get_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
17671767        case  GGML_TYPE_F16: {
17681768            aclTensor* acl_src0 = ggml_cann_create_tensor (src0);
17691769            ggml_cann_pool_alloc src_buffer_allocator (
1770-                 ctx.pool (), ggml_nelements (src0) * sizeof (float_t ));
1770+                 ctx.pool (), ggml_nelements (src0) * sizeof (float ));
17711771            void * src_trans_buffer = src_buffer_allocator.get ();
17721772            size_t  src_trans_nb[GGML_MAX_DIMS];
1773-             src_trans_nb[0 ] = sizeof (float_t );
1773+             src_trans_nb[0 ] = sizeof (float );
17741774            for  (int  i = 1 ; i < GGML_MAX_DIMS; i++) {
17751775                src_trans_nb[i] = src_trans_nb[i - 1 ] * src0->ne [i - 1 ];
17761776            }
@@ -1814,14 +1814,14 @@ void ggml_cann_get_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
18141814
18151815            //  [3,4,5,64] -> [3,4,5,2,32]
18161816            dequant_ne = weight_ne;
1817-             dequant_nb[0 ] = sizeof (float_t );
1817+             dequant_nb[0 ] = sizeof (float );
18181818            for  (int  i = 1 ; i < GGML_MAX_DIMS + 1 ; i++) {
18191819                dequant_nb[i] = dequant_nb[i - 1 ] * dequant_ne[i - 1 ];
18201820            }
18211821
18221822            scale_offset = ggml_nelements (src0) * sizeof (int8_t );
18231823            ggml_cann_pool_alloc dequant_buffer_allocator (
1824-                 ctx.pool (), ggml_nelements (src0) * sizeof (float_t ));
1824+                 ctx.pool (), ggml_nelements (src0) * sizeof (float ));
18251825
18261826            aclTensor* acl_weight_tensor = ggml_cann_create_tensor (
18271827                src0->data , ACL_INT8, sizeof (int8_t ), weight_ne, weight_nb,
@@ -1830,11 +1830,11 @@ void ggml_cann_get_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
18301830                src0->data , ACL_FLOAT16, sizeof (uint16_t ), scale_ne, scale_nb,
18311831                GGML_MAX_DIMS + 1 , ACL_FORMAT_ND, scale_offset);
18321832            aclTensor* dequant_tensor = ggml_cann_create_tensor (
1833-                 dequant_buffer_allocator.get (), ACL_FLOAT, sizeof (float_t ),
1833+                 dequant_buffer_allocator.get (), ACL_FLOAT, sizeof (float ),
18341834                dequant_ne, dequant_nb, GGML_MAX_DIMS + 1 );
18351835
18361836            aclnn_mul (ctx, acl_weight_tensor, acl_scale_tensor, dequant_tensor);
1837-             dequant_nb[0 ] = sizeof (float_t );
1837+             dequant_nb[0 ] = sizeof (float );
18381838            dequant_ne = src0->ne ;
18391839            for  (int  i = 1 ; i < GGML_MAX_DIMS; i++) {
18401840                dequant_nb[i] = dequant_nb[i - 1 ] * src0->ne [i - 1 ];
@@ -2282,8 +2282,8 @@ static void aclnn_cache_init(ggml_backend_cann_context& ctx, ggml_tensor* dst,
22822282
22832283    int64_t  theta_scale_length = src0->ne [0 ] / 2 ;
22842284    int64_t  theta_scale_ne[] = {theta_scale_length, 1 , 1 , 1 };
2285-     size_t  theta_scale_nb[] = {sizeof (float_t ), sizeof (float_t ), sizeof (float_t ),
2286-                           theta_scale_length * sizeof (float_t )};
2285+     size_t  theta_scale_nb[] = {sizeof (float ), sizeof (float ), sizeof (float ),
2286+                           theta_scale_length * sizeof (float )};
22872287
22882288    GGML_ASSERT (src1->type  == GGML_TYPE_I32);
22892289    int64_t  position_length = src1->ne [0 ];
@@ -2293,7 +2293,7 @@ static void aclnn_cache_init(ggml_backend_cann_context& ctx, ggml_tensor* dst,
22932293
22942294    int64_t  theta_ne[] = {theta_scale_length, 1 , position_length, 1 };
22952295    size_t  theta_nb[GGML_MAX_DIMS];
2296-     theta_nb[0 ] = sizeof (float_t );
2296+     theta_nb[0 ] = sizeof (float );
22972297    for  (int  i = 1 ; i < GGML_MAX_DIMS; i++) {
22982298        theta_nb[i] = theta_nb[i - 1 ] * theta_ne[i - 1 ];
22992299    }
@@ -2314,10 +2314,10 @@ static void aclnn_cache_init(ggml_backend_cann_context& ctx, ggml_tensor* dst,
23142314        if  (ctx.rope_cache .theta_scale_cache  != nullptr ) {
23152315            ACL_CHECK (aclrtFree (ctx.rope_cache .theta_scale_cache ));
23162316        }
2317-         ACL_CHECK (aclrtMalloc (&ctx.rope_cache .theta_scale_cache , theta_scale_length * sizeof (float_t ), ACL_MEM_MALLOC_HUGE_FIRST));
2317+         ACL_CHECK (aclrtMalloc (&ctx.rope_cache .theta_scale_cache , theta_scale_length * sizeof (float ), ACL_MEM_MALLOC_HUGE_FIRST));
23182318
23192319        acl_theta_scale_tensor =
2320-             ggml_cann_create_tensor (ctx.rope_cache .theta_scale_cache , ACL_FLOAT, sizeof (float_t ),
2320+             ggml_cann_create_tensor (ctx.rope_cache .theta_scale_cache , ACL_FLOAT, sizeof (float ),
23212321                                    theta_scale_ne, theta_scale_nb, GGML_MAX_DIMS);
23222322
23232323        float  start = 0 ;
@@ -2383,20 +2383,20 @@ static void aclnn_cache_init(ggml_backend_cann_context& ctx, ggml_tensor* dst,
23832383    } else  {
23842384        //  use cache
23852385        acl_theta_scale_tensor =
2386-             ggml_cann_create_tensor (ctx.rope_cache .theta_scale_cache , ACL_FLOAT, sizeof (float_t ),
2386+             ggml_cann_create_tensor (ctx.rope_cache .theta_scale_cache , ACL_FLOAT, sizeof (float ),
23872387                                    theta_scale_ne, theta_scale_nb, GGML_MAX_DIMS);
23882388    }
23892389
23902390    ggml_cann_pool_alloc freq_fac_res_allocator (ctx.pool ());
23912391    //  freq_factors
23922392    if  (src2) {
2393-         freq_fac_res_allocator.alloc (theta_scale_length * sizeof (float_t ));
2393+         freq_fac_res_allocator.alloc (theta_scale_length * sizeof (float ));
23942394        void * freq_fac_res_ptr = freq_fac_res_allocator.get ();
23952395        aclTensor* acl_freq_factors_tensor = ggml_cann_create_tensor (
23962396            src2->data , ggml_cann_type_mapping (src2->type ),
23972397            ggml_type_size (src2->type ), theta_scale_ne, theta_scale_nb, GGML_MAX_DIMS);
23982398        aclTensor* acl_freq_fac_res_tensor = ggml_cann_create_tensor (
2399-             freq_fac_res_ptr, ACL_FLOAT, sizeof (float_t ),
2399+             freq_fac_res_ptr, ACL_FLOAT, sizeof (float ),
24002400            theta_scale_ne, theta_scale_nb, GGML_MAX_DIMS);
24012401        aclnn_div (ctx, acl_theta_scale_tensor, acl_freq_factors_tensor, acl_freq_fac_res_tensor);
24022402        std::swap (acl_theta_scale_tensor, acl_freq_fac_res_tensor);
@@ -2411,29 +2411,29 @@ static void aclnn_cache_init(ggml_backend_cann_context& ctx, ggml_tensor* dst,
24112411    //  power * position
24122412    int64_t  theta_length = theta_scale_length * position_length;
24132413    ggml_cann_pool_alloc theta_allocator (ctx.pool (),
2414-                                         theta_length * sizeof (float_t ));
2414+                                         theta_length * sizeof (float ));
24152415    void * theta_buffer = theta_allocator.get ();
24162416
24172417    aclTensor* acl_theta_tensor =
2418-         ggml_cann_create_tensor (theta_buffer, ACL_FLOAT, sizeof (float_t ),
2418+         ggml_cann_create_tensor (theta_buffer, ACL_FLOAT, sizeof (float ),
24192419                                theta_ne, theta_nb, GGML_MAX_DIMS);
24202420    aclnn_mul (ctx, acl_position_tensor, acl_theta_scale_tensor,
24212421            acl_theta_tensor);
24222422
24232423    //  sin/cos
24242424    ggml_cann_pool_alloc sin_allocator (ctx.pool (),
2425-                                     theta_length * sizeof (float_t ));
2425+                                     theta_length * sizeof (float ));
24262426    void * sin_buffer = sin_allocator.get ();
24272427    aclTensor* acl_sin_tensor = ggml_cann_create_tensor (
2428-         sin_buffer, ACL_FLOAT, sizeof (float_t ), theta_ne, theta_nb,
2428+         sin_buffer, ACL_FLOAT, sizeof (float ), theta_ne, theta_nb,
24292429        GGML_MAX_DIMS, ACL_FORMAT_ND);
24302430    aclnn_sin (ctx, acl_theta_tensor, acl_sin_tensor);
24312431
24322432    ggml_cann_pool_alloc cos_allocator (ctx.pool (),
2433-                                     theta_length * sizeof (float_t ));
2433+                                     theta_length * sizeof (float ));
24342434    void * cos_buffer = cos_allocator.get ();
24352435    aclTensor* acl_cos_tensor = ggml_cann_create_tensor (
2436-         cos_buffer, ACL_FLOAT, sizeof (float_t ), theta_ne, theta_nb,
2436+         cos_buffer, ACL_FLOAT, sizeof (float ), theta_ne, theta_nb,
24372437        GGML_MAX_DIMS, ACL_FORMAT_ND);
24382438    aclnn_cos (ctx, acl_theta_tensor, acl_cos_tensor);
24392439
@@ -2449,15 +2449,15 @@ static void aclnn_cache_init(ggml_backend_cann_context& ctx, ggml_tensor* dst,
24492449
24502450    int64_t  sin_reshape_ne[4 ] = {src0->ne [0 ], 1 , src0->ne [2 ], 1 };
24512451    size_t  sin_reshape_nb[GGML_MAX_DIMS];
2452-     sin_reshape_nb[0 ] = sizeof (float_t );
2452+     sin_reshape_nb[0 ] = sizeof (float );
24532453    for  (int  i = 1 ; i < GGML_MAX_DIMS; i++) {
24542454        sin_reshape_nb[i] = sin_reshape_nb[i - 1 ] * sin_reshape_ne[i - 1 ];
24552455    }
24562456    aclTensor* acl_sin_repeat_tensor =
2457-         ggml_cann_create_tensor (sin_tensor_buffer, ACL_FLOAT, sizeof (float_t ),
2457+         ggml_cann_create_tensor (sin_tensor_buffer, ACL_FLOAT, sizeof (float ),
24582458                                sin_reshape_ne, sin_reshape_nb, GGML_MAX_DIMS);
24592459    aclTensor* acl_cos_repeat_tensor =
2460-         ggml_cann_create_tensor (cos_tensor_buffer, ACL_FLOAT, sizeof (float_t ),
2460+         ggml_cann_create_tensor (cos_tensor_buffer, ACL_FLOAT, sizeof (float ),
24612461                                sin_reshape_ne, sin_reshape_nb, GGML_MAX_DIMS);
24622462
24632463    //  repeat
@@ -2543,15 +2543,15 @@ void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
25432543
25442544    int64_t  sin_reshape_ne[4 ] = {ne00, 1 , ne02, 1 };
25452545    size_t  sin_reshape_nb[GGML_MAX_DIMS];
2546-     sin_reshape_nb[0 ] = sizeof (float_t );
2546+     sin_reshape_nb[0 ] = sizeof (float );
25472547    for  (int  i = 1 ; i < GGML_MAX_DIMS; i++) {
25482548        sin_reshape_nb[i] = sin_reshape_nb[i - 1 ] * sin_reshape_ne[i - 1 ];
25492549    }
25502550    aclTensor* acl_sin_reshape_tensor =
2551-         ggml_cann_create_tensor (sin_tensor_buffer, ACL_FLOAT, sizeof (float_t ),
2551+         ggml_cann_create_tensor (sin_tensor_buffer, ACL_FLOAT, sizeof (float ),
25522552                                sin_reshape_ne, sin_reshape_nb, GGML_MAX_DIMS);
25532553    aclTensor* acl_cos_reshape_tensor =
2554-         ggml_cann_create_tensor (cos_tensor_buffer, ACL_FLOAT, sizeof (float_t ),
2554+         ggml_cann_create_tensor (cos_tensor_buffer, ACL_FLOAT, sizeof (float ),
25552555                                sin_reshape_ne, sin_reshape_nb, GGML_MAX_DIMS);
25562556
25572557    aclTensor* acl_src = ggml_cann_create_tensor (src0);
@@ -2566,7 +2566,7 @@ void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
25662566    void * minus_one_scale_buffer = nullptr ;
25672567    ggml_cann_pool_alloc roll_allocator (ctx.pool (), ggml_nbytes (src0));
25682568    ggml_cann_pool_alloc minus_one_scale_allocator (
2569-         ctx.pool (), sizeof (float_t ) * src0->ne [0 ]);
2569+         ctx.pool (), sizeof (float ) * src0->ne [0 ]);
25702570    if  (!is_neox) {
25712571        //  roll input: [q0,q1,q2,q3,...] -> [q1,q0,q3,q2,...]
25722572        input_roll_buffer = roll_allocator.get ();
@@ -2596,13 +2596,13 @@ void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
25962596
25972597        int64_t  minus_one_ne[4 ] = {src0->ne [0 ], 1 , 1 , 1 };
25982598        size_t  minus_one_nb[GGML_MAX_DIMS];
2599-         minus_one_nb[0 ] = sizeof (float_t );
2599+         minus_one_nb[0 ] = sizeof (float );
26002600        for  (int  i = 1 ; i < GGML_MAX_DIMS; i++) {
26012601            minus_one_nb[i] = minus_one_nb[i - 1 ] * minus_one_ne[i - 1 ];
26022602        }
26032603        acl_minus_one_tensor = aclnn_values (
2604-             ctx, minus_one_scale_buffer, sizeof (float_t ) * src0->ne [0 ],
2605-             minus_one_ne, GGML_MAX_DIMS, ACL_FLOAT, sizeof (float_t ), 1 );
2604+             ctx, minus_one_scale_buffer, sizeof (float ) * src0->ne [0 ],
2605+             minus_one_ne, GGML_MAX_DIMS, ACL_FLOAT, sizeof (float ), 1 );
26062606        int64_t  dim = 3 ;
26072607        int64_t * index = new  int64_t [src0->ne [0 ]];
26082608        for  (int  i = 0 ; i < src0->ne [0 ]; i++) {
@@ -2630,22 +2630,22 @@ void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
26302630        minus_one_scale_buffer = minus_one_scale_allocator.get ();
26312631        int64_t  minus_one_ne[4 ] = {src0->ne [0 ], 1 , 1 , 1 };
26322632        size_t  minus_one_nb[GGML_MAX_DIMS];
2633-         minus_one_nb[0 ] = sizeof (float_t );
2633+         minus_one_nb[0 ] = sizeof (float );
26342634        for  (int  i = 1 ; i < GGML_MAX_DIMS; i++) {
26352635            minus_one_nb[i] = minus_one_nb[i - 1 ] * minus_one_ne[i - 1 ];
26362636        }
26372637        acl_minus_one_tensor = aclnn_values (
2638-             ctx, minus_one_scale_buffer, sizeof (float_t ) * src0->ne [0 ],
2639-             minus_one_ne, GGML_MAX_DIMS, ACL_FLOAT, sizeof (float_t ), 1 );
2638+             ctx, minus_one_scale_buffer, sizeof (float ) * src0->ne [0 ],
2639+             minus_one_ne, GGML_MAX_DIMS, ACL_FLOAT, sizeof (float ), 1 );
26402640        //  -1 * first half
26412641        int64_t  first_half_ne[4 ] = {src0->ne [0 ] / 2 , 1 , 1 , 1 };
26422642        size_t  first_half_nb[GGML_MAX_DIMS];
2643-         first_half_nb[0 ] = sizeof (float_t );
2643+         first_half_nb[0 ] = sizeof (float );
26442644        for  (int  i = 1 ; i < GGML_MAX_DIMS; i++) {
26452645            first_half_nb[i] = first_half_nb[i - 1 ] * first_half_ne[i - 1 ];
26462646        }
26472647        aclTensor* acl_first_half_tensor = ggml_cann_create_tensor (
2648-             minus_one_scale_buffer, ACL_FLOAT, sizeof (float_t ), first_half_ne,
2648+             minus_one_scale_buffer, ACL_FLOAT, sizeof (float ), first_half_ne,
26492649            first_half_nb, GGML_MAX_DIMS);
26502650        bool  inplace = true ;
26512651        float  scale = -1 ;
@@ -2685,28 +2685,28 @@ void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
26852685        //  TODO: ne0 != n_dims in mode2
26862686    } else  if  (src0->type  == GGML_TYPE_F16) {
26872687        size_t  input_fp32_nb[GGML_MAX_DIMS];
2688-         input_fp32_nb[0 ] = sizeof (float_t );
2688+         input_fp32_nb[0 ] = sizeof (float );
26892689        for  (int  i = 1 ; i < GGML_MAX_DIMS; i++) {
26902690            input_fp32_nb[i] = input_fp32_nb[i - 1 ] * dst->ne [i - 1 ];
26912691        }
26922692        ggml_cann_pool_alloc fp32_allocator1 (
2693-             ctx.pool (), ggml_nelements (dst) * sizeof (float_t ));
2693+             ctx.pool (), ggml_nelements (dst) * sizeof (float ));
26942694        void * input_fp32_buffer1 = fp32_allocator1.get ();
26952695        aclTensor* input_fp32_tensor1 = ggml_cann_create_tensor (
2696-             input_fp32_buffer1, ACL_FLOAT, sizeof (float_t ), dst->ne ,
2696+             input_fp32_buffer1, ACL_FLOAT, sizeof (float ), dst->ne ,
26972697            input_fp32_nb, GGML_MAX_DIMS);
26982698        ggml_cann_pool_alloc fp32_allocator2 (
2699-             ctx.pool (), ggml_nelements (dst) * sizeof (float_t ));
2699+             ctx.pool (), ggml_nelements (dst) * sizeof (float ));
27002700        void * input_fp32_buffer2 = fp32_allocator2.get ();
27012701        aclTensor* input_fp32_tensor2 = ggml_cann_create_tensor (
2702-             input_fp32_buffer2, ACL_FLOAT, sizeof (float_t ), dst->ne ,
2702+             input_fp32_buffer2, ACL_FLOAT, sizeof (float ), dst->ne ,
27032703            input_fp32_nb, GGML_MAX_DIMS);
27042704
27052705        ggml_cann_pool_alloc fp32_allocator (
2706-             ctx.pool (), ggml_nelements (dst) * sizeof (float_t ));
2706+             ctx.pool (), ggml_nelements (dst) * sizeof (float ));
27072707        output_fp32_buffer = fp32_allocator.get ();
27082708        aclTensor* output_fp32_tensor = ggml_cann_create_tensor (
2709-             output_fp32_buffer, ACL_FLOAT, sizeof (float_t ), dst->ne ,
2709+             output_fp32_buffer, ACL_FLOAT, sizeof (float ), dst->ne ,
27102710            input_fp32_nb, GGML_MAX_DIMS);
27112711        aclnn_mul (ctx, acl_src, acl_cos_reshape_tensor, input_fp32_tensor1);
27122712        aclnn_mul (ctx, acl_input_roll_mul_scale_tensor, acl_sin_reshape_tensor,
0 commit comments