@@ -1767,10 +1767,10 @@ void ggml_cann_get_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
17671767 case GGML_TYPE_F16: {
17681768 aclTensor* acl_src0 = ggml_cann_create_tensor (src0);
17691769 ggml_cann_pool_alloc src_buffer_allocator (
1770- ctx.pool (), ggml_nelements (src0) * sizeof (float_t ));
1770+ ctx.pool (), ggml_nelements (src0) * sizeof (float ));
17711771 void * src_trans_buffer = src_buffer_allocator.get ();
17721772 size_t src_trans_nb[GGML_MAX_DIMS];
1773- src_trans_nb[0 ] = sizeof (float_t );
1773+ src_trans_nb[0 ] = sizeof (float );
17741774 for (int i = 1 ; i < GGML_MAX_DIMS; i++) {
17751775 src_trans_nb[i] = src_trans_nb[i - 1 ] * src0->ne [i - 1 ];
17761776 }
@@ -1814,14 +1814,14 @@ void ggml_cann_get_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
18141814
18151815 // [3,4,5,64] -> [3,4,5,2,32]
18161816 dequant_ne = weight_ne;
1817- dequant_nb[0 ] = sizeof (float_t );
1817+ dequant_nb[0 ] = sizeof (float );
18181818 for (int i = 1 ; i < GGML_MAX_DIMS + 1 ; i++) {
18191819 dequant_nb[i] = dequant_nb[i - 1 ] * dequant_ne[i - 1 ];
18201820 }
18211821
18221822 scale_offset = ggml_nelements (src0) * sizeof (int8_t );
18231823 ggml_cann_pool_alloc dequant_buffer_allocator (
1824- ctx.pool (), ggml_nelements (src0) * sizeof (float_t ));
1824+ ctx.pool (), ggml_nelements (src0) * sizeof (float ));
18251825
18261826 aclTensor* acl_weight_tensor = ggml_cann_create_tensor (
18271827 src0->data , ACL_INT8, sizeof (int8_t ), weight_ne, weight_nb,
@@ -1830,11 +1830,11 @@ void ggml_cann_get_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
18301830 src0->data , ACL_FLOAT16, sizeof (uint16_t ), scale_ne, scale_nb,
18311831 GGML_MAX_DIMS + 1 , ACL_FORMAT_ND, scale_offset);
18321832 aclTensor* dequant_tensor = ggml_cann_create_tensor (
1833- dequant_buffer_allocator.get (), ACL_FLOAT, sizeof (float_t ),
1833+ dequant_buffer_allocator.get (), ACL_FLOAT, sizeof (float ),
18341834 dequant_ne, dequant_nb, GGML_MAX_DIMS + 1 );
18351835
18361836 aclnn_mul (ctx, acl_weight_tensor, acl_scale_tensor, dequant_tensor);
1837- dequant_nb[0 ] = sizeof (float_t );
1837+ dequant_nb[0 ] = sizeof (float );
18381838 dequant_ne = src0->ne ;
18391839 for (int i = 1 ; i < GGML_MAX_DIMS; i++) {
18401840 dequant_nb[i] = dequant_nb[i - 1 ] * src0->ne [i - 1 ];
@@ -2282,8 +2282,8 @@ static void aclnn_cache_init(ggml_backend_cann_context& ctx, ggml_tensor* dst,
22822282
22832283 int64_t theta_scale_length = src0->ne [0 ] / 2 ;
22842284 int64_t theta_scale_ne[] = {theta_scale_length, 1 , 1 , 1 };
2285- size_t theta_scale_nb[] = {sizeof (float_t ), sizeof (float_t ), sizeof (float_t ),
2286- theta_scale_length * sizeof (float_t )};
2285+ size_t theta_scale_nb[] = {sizeof (float ), sizeof (float ), sizeof (float ),
2286+ theta_scale_length * sizeof (float )};
22872287
22882288 GGML_ASSERT (src1->type == GGML_TYPE_I32);
22892289 int64_t position_length = src1->ne [0 ];
@@ -2293,7 +2293,7 @@ static void aclnn_cache_init(ggml_backend_cann_context& ctx, ggml_tensor* dst,
22932293
22942294 int64_t theta_ne[] = {theta_scale_length, 1 , position_length, 1 };
22952295 size_t theta_nb[GGML_MAX_DIMS];
2296- theta_nb[0 ] = sizeof (float_t );
2296+ theta_nb[0 ] = sizeof (float );
22972297 for (int i = 1 ; i < GGML_MAX_DIMS; i++) {
22982298 theta_nb[i] = theta_nb[i - 1 ] * theta_ne[i - 1 ];
22992299 }
@@ -2314,10 +2314,10 @@ static void aclnn_cache_init(ggml_backend_cann_context& ctx, ggml_tensor* dst,
23142314 if (ctx.rope_cache .theta_scale_cache != nullptr ) {
23152315 ACL_CHECK (aclrtFree (ctx.rope_cache .theta_scale_cache ));
23162316 }
2317- ACL_CHECK (aclrtMalloc (&ctx.rope_cache .theta_scale_cache , theta_scale_length * sizeof (float_t ), ACL_MEM_MALLOC_HUGE_FIRST));
2317+ ACL_CHECK (aclrtMalloc (&ctx.rope_cache .theta_scale_cache , theta_scale_length * sizeof (float ), ACL_MEM_MALLOC_HUGE_FIRST));
23182318
23192319 acl_theta_scale_tensor =
2320- ggml_cann_create_tensor (ctx.rope_cache .theta_scale_cache , ACL_FLOAT, sizeof (float_t ),
2320+ ggml_cann_create_tensor (ctx.rope_cache .theta_scale_cache , ACL_FLOAT, sizeof (float ),
23212321 theta_scale_ne, theta_scale_nb, GGML_MAX_DIMS);
23222322
23232323 float start = 0 ;
@@ -2383,20 +2383,20 @@ static void aclnn_cache_init(ggml_backend_cann_context& ctx, ggml_tensor* dst,
23832383 } else {
23842384 // use cache
23852385 acl_theta_scale_tensor =
2386- ggml_cann_create_tensor (ctx.rope_cache .theta_scale_cache , ACL_FLOAT, sizeof (float_t ),
2386+ ggml_cann_create_tensor (ctx.rope_cache .theta_scale_cache , ACL_FLOAT, sizeof (float ),
23872387 theta_scale_ne, theta_scale_nb, GGML_MAX_DIMS);
23882388 }
23892389
23902390 ggml_cann_pool_alloc freq_fac_res_allocator (ctx.pool ());
23912391 // freq_factors
23922392 if (src2) {
2393- freq_fac_res_allocator.alloc (theta_scale_length * sizeof (float_t ));
2393+ freq_fac_res_allocator.alloc (theta_scale_length * sizeof (float ));
23942394 void * freq_fac_res_ptr = freq_fac_res_allocator.get ();
23952395 aclTensor* acl_freq_factors_tensor = ggml_cann_create_tensor (
23962396 src2->data , ggml_cann_type_mapping (src2->type ),
23972397 ggml_type_size (src2->type ), theta_scale_ne, theta_scale_nb, GGML_MAX_DIMS);
23982398 aclTensor* acl_freq_fac_res_tensor = ggml_cann_create_tensor (
2399- freq_fac_res_ptr, ACL_FLOAT, sizeof (float_t ),
2399+ freq_fac_res_ptr, ACL_FLOAT, sizeof (float ),
24002400 theta_scale_ne, theta_scale_nb, GGML_MAX_DIMS);
24012401 aclnn_div (ctx, acl_theta_scale_tensor, acl_freq_factors_tensor, acl_freq_fac_res_tensor);
24022402 std::swap (acl_theta_scale_tensor, acl_freq_fac_res_tensor);
@@ -2411,29 +2411,29 @@ static void aclnn_cache_init(ggml_backend_cann_context& ctx, ggml_tensor* dst,
24112411 // power * position
24122412 int64_t theta_length = theta_scale_length * position_length;
24132413 ggml_cann_pool_alloc theta_allocator (ctx.pool (),
2414- theta_length * sizeof (float_t ));
2414+ theta_length * sizeof (float ));
24152415 void * theta_buffer = theta_allocator.get ();
24162416
24172417 aclTensor* acl_theta_tensor =
2418- ggml_cann_create_tensor (theta_buffer, ACL_FLOAT, sizeof (float_t ),
2418+ ggml_cann_create_tensor (theta_buffer, ACL_FLOAT, sizeof (float ),
24192419 theta_ne, theta_nb, GGML_MAX_DIMS);
24202420 aclnn_mul (ctx, acl_position_tensor, acl_theta_scale_tensor,
24212421 acl_theta_tensor);
24222422
24232423 // sin/cos
24242424 ggml_cann_pool_alloc sin_allocator (ctx.pool (),
2425- theta_length * sizeof (float_t ));
2425+ theta_length * sizeof (float ));
24262426 void * sin_buffer = sin_allocator.get ();
24272427 aclTensor* acl_sin_tensor = ggml_cann_create_tensor (
2428- sin_buffer, ACL_FLOAT, sizeof (float_t ), theta_ne, theta_nb,
2428+ sin_buffer, ACL_FLOAT, sizeof (float ), theta_ne, theta_nb,
24292429 GGML_MAX_DIMS, ACL_FORMAT_ND);
24302430 aclnn_sin (ctx, acl_theta_tensor, acl_sin_tensor);
24312431
24322432 ggml_cann_pool_alloc cos_allocator (ctx.pool (),
2433- theta_length * sizeof (float_t ));
2433+ theta_length * sizeof (float ));
24342434 void * cos_buffer = cos_allocator.get ();
24352435 aclTensor* acl_cos_tensor = ggml_cann_create_tensor (
2436- cos_buffer, ACL_FLOAT, sizeof (float_t ), theta_ne, theta_nb,
2436+ cos_buffer, ACL_FLOAT, sizeof (float ), theta_ne, theta_nb,
24372437 GGML_MAX_DIMS, ACL_FORMAT_ND);
24382438 aclnn_cos (ctx, acl_theta_tensor, acl_cos_tensor);
24392439
@@ -2449,15 +2449,15 @@ static void aclnn_cache_init(ggml_backend_cann_context& ctx, ggml_tensor* dst,
24492449
24502450 int64_t sin_reshape_ne[4 ] = {src0->ne [0 ], 1 , src0->ne [2 ], 1 };
24512451 size_t sin_reshape_nb[GGML_MAX_DIMS];
2452- sin_reshape_nb[0 ] = sizeof (float_t );
2452+ sin_reshape_nb[0 ] = sizeof (float );
24532453 for (int i = 1 ; i < GGML_MAX_DIMS; i++) {
24542454 sin_reshape_nb[i] = sin_reshape_nb[i - 1 ] * sin_reshape_ne[i - 1 ];
24552455 }
24562456 aclTensor* acl_sin_repeat_tensor =
2457- ggml_cann_create_tensor (sin_tensor_buffer, ACL_FLOAT, sizeof (float_t ),
2457+ ggml_cann_create_tensor (sin_tensor_buffer, ACL_FLOAT, sizeof (float ),
24582458 sin_reshape_ne, sin_reshape_nb, GGML_MAX_DIMS);
24592459 aclTensor* acl_cos_repeat_tensor =
2460- ggml_cann_create_tensor (cos_tensor_buffer, ACL_FLOAT, sizeof (float_t ),
2460+ ggml_cann_create_tensor (cos_tensor_buffer, ACL_FLOAT, sizeof (float ),
24612461 sin_reshape_ne, sin_reshape_nb, GGML_MAX_DIMS);
24622462
24632463 // repeat
@@ -2543,15 +2543,15 @@ void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
25432543
25442544 int64_t sin_reshape_ne[4 ] = {ne00, 1 , ne02, 1 };
25452545 size_t sin_reshape_nb[GGML_MAX_DIMS];
2546- sin_reshape_nb[0 ] = sizeof (float_t );
2546+ sin_reshape_nb[0 ] = sizeof (float );
25472547 for (int i = 1 ; i < GGML_MAX_DIMS; i++) {
25482548 sin_reshape_nb[i] = sin_reshape_nb[i - 1 ] * sin_reshape_ne[i - 1 ];
25492549 }
25502550 aclTensor* acl_sin_reshape_tensor =
2551- ggml_cann_create_tensor (sin_tensor_buffer, ACL_FLOAT, sizeof (float_t ),
2551+ ggml_cann_create_tensor (sin_tensor_buffer, ACL_FLOAT, sizeof (float ),
25522552 sin_reshape_ne, sin_reshape_nb, GGML_MAX_DIMS);
25532553 aclTensor* acl_cos_reshape_tensor =
2554- ggml_cann_create_tensor (cos_tensor_buffer, ACL_FLOAT, sizeof (float_t ),
2554+ ggml_cann_create_tensor (cos_tensor_buffer, ACL_FLOAT, sizeof (float ),
25552555 sin_reshape_ne, sin_reshape_nb, GGML_MAX_DIMS);
25562556
25572557 aclTensor* acl_src = ggml_cann_create_tensor (src0);
@@ -2566,7 +2566,7 @@ void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
25662566 void * minus_one_scale_buffer = nullptr ;
25672567 ggml_cann_pool_alloc roll_allocator (ctx.pool (), ggml_nbytes (src0));
25682568 ggml_cann_pool_alloc minus_one_scale_allocator (
2569- ctx.pool (), sizeof (float_t ) * src0->ne [0 ]);
2569+ ctx.pool (), sizeof (float ) * src0->ne [0 ]);
25702570 if (!is_neox) {
25712571 // roll input: [q0,q1,q2,q3,...] -> [q1,q0,q3,q2,...]
25722572 input_roll_buffer = roll_allocator.get ();
@@ -2596,13 +2596,13 @@ void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
25962596
25972597 int64_t minus_one_ne[4 ] = {src0->ne [0 ], 1 , 1 , 1 };
25982598 size_t minus_one_nb[GGML_MAX_DIMS];
2599- minus_one_nb[0 ] = sizeof (float_t );
2599+ minus_one_nb[0 ] = sizeof (float );
26002600 for (int i = 1 ; i < GGML_MAX_DIMS; i++) {
26012601 minus_one_nb[i] = minus_one_nb[i - 1 ] * minus_one_ne[i - 1 ];
26022602 }
26032603 acl_minus_one_tensor = aclnn_values (
2604- ctx, minus_one_scale_buffer, sizeof (float_t ) * src0->ne [0 ],
2605- minus_one_ne, GGML_MAX_DIMS, ACL_FLOAT, sizeof (float_t ), 1 );
2604+ ctx, minus_one_scale_buffer, sizeof (float ) * src0->ne [0 ],
2605+ minus_one_ne, GGML_MAX_DIMS, ACL_FLOAT, sizeof (float ), 1 );
26062606 int64_t dim = 3 ;
26072607 int64_t * index = new int64_t [src0->ne [0 ]];
26082608 for (int i = 0 ; i < src0->ne [0 ]; i++) {
@@ -2630,22 +2630,22 @@ void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
26302630 minus_one_scale_buffer = minus_one_scale_allocator.get ();
26312631 int64_t minus_one_ne[4 ] = {src0->ne [0 ], 1 , 1 , 1 };
26322632 size_t minus_one_nb[GGML_MAX_DIMS];
2633- minus_one_nb[0 ] = sizeof (float_t );
2633+ minus_one_nb[0 ] = sizeof (float );
26342634 for (int i = 1 ; i < GGML_MAX_DIMS; i++) {
26352635 minus_one_nb[i] = minus_one_nb[i - 1 ] * minus_one_ne[i - 1 ];
26362636 }
26372637 acl_minus_one_tensor = aclnn_values (
2638- ctx, minus_one_scale_buffer, sizeof (float_t ) * src0->ne [0 ],
2639- minus_one_ne, GGML_MAX_DIMS, ACL_FLOAT, sizeof (float_t ), 1 );
2638+ ctx, minus_one_scale_buffer, sizeof (float ) * src0->ne [0 ],
2639+ minus_one_ne, GGML_MAX_DIMS, ACL_FLOAT, sizeof (float ), 1 );
26402640 // -1 * first half
26412641 int64_t first_half_ne[4 ] = {src0->ne [0 ] / 2 , 1 , 1 , 1 };
26422642 size_t first_half_nb[GGML_MAX_DIMS];
2643- first_half_nb[0 ] = sizeof (float_t );
2643+ first_half_nb[0 ] = sizeof (float );
26442644 for (int i = 1 ; i < GGML_MAX_DIMS; i++) {
26452645 first_half_nb[i] = first_half_nb[i - 1 ] * first_half_ne[i - 1 ];
26462646 }
26472647 aclTensor* acl_first_half_tensor = ggml_cann_create_tensor (
2648- minus_one_scale_buffer, ACL_FLOAT, sizeof (float_t ), first_half_ne,
2648+ minus_one_scale_buffer, ACL_FLOAT, sizeof (float ), first_half_ne,
26492649 first_half_nb, GGML_MAX_DIMS);
26502650 bool inplace = true ;
26512651 float scale = -1 ;
@@ -2685,28 +2685,28 @@ void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
26852685 // TODO: ne0 != n_dims in mode2
26862686 } else if (src0->type == GGML_TYPE_F16) {
26872687 size_t input_fp32_nb[GGML_MAX_DIMS];
2688- input_fp32_nb[0 ] = sizeof (float_t );
2688+ input_fp32_nb[0 ] = sizeof (float );
26892689 for (int i = 1 ; i < GGML_MAX_DIMS; i++) {
26902690 input_fp32_nb[i] = input_fp32_nb[i - 1 ] * dst->ne [i - 1 ];
26912691 }
26922692 ggml_cann_pool_alloc fp32_allocator1 (
2693- ctx.pool (), ggml_nelements (dst) * sizeof (float_t ));
2693+ ctx.pool (), ggml_nelements (dst) * sizeof (float ));
26942694 void * input_fp32_buffer1 = fp32_allocator1.get ();
26952695 aclTensor* input_fp32_tensor1 = ggml_cann_create_tensor (
2696- input_fp32_buffer1, ACL_FLOAT, sizeof (float_t ), dst->ne ,
2696+ input_fp32_buffer1, ACL_FLOAT, sizeof (float ), dst->ne ,
26972697 input_fp32_nb, GGML_MAX_DIMS);
26982698 ggml_cann_pool_alloc fp32_allocator2 (
2699- ctx.pool (), ggml_nelements (dst) * sizeof (float_t ));
2699+ ctx.pool (), ggml_nelements (dst) * sizeof (float ));
27002700 void * input_fp32_buffer2 = fp32_allocator2.get ();
27012701 aclTensor* input_fp32_tensor2 = ggml_cann_create_tensor (
2702- input_fp32_buffer2, ACL_FLOAT, sizeof (float_t ), dst->ne ,
2702+ input_fp32_buffer2, ACL_FLOAT, sizeof (float ), dst->ne ,
27032703 input_fp32_nb, GGML_MAX_DIMS);
27042704
27052705 ggml_cann_pool_alloc fp32_allocator (
2706- ctx.pool (), ggml_nelements (dst) * sizeof (float_t ));
2706+ ctx.pool (), ggml_nelements (dst) * sizeof (float ));
27072707 output_fp32_buffer = fp32_allocator.get ();
27082708 aclTensor* output_fp32_tensor = ggml_cann_create_tensor (
2709- output_fp32_buffer, ACL_FLOAT, sizeof (float_t ), dst->ne ,
2709+ output_fp32_buffer, ACL_FLOAT, sizeof (float ), dst->ne ,
27102710 input_fp32_nb, GGML_MAX_DIMS);
27112711 aclnn_mul (ctx, acl_src, acl_cos_reshape_tensor, input_fp32_tensor1);
27122712 aclnn_mul (ctx, acl_input_roll_mul_scale_tensor, acl_sin_reshape_tensor,
0 commit comments