@@ -1767,10 +1767,10 @@ void ggml_cann_get_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
1767
1767
case GGML_TYPE_F16: {
1768
1768
aclTensor* acl_src0 = ggml_cann_create_tensor (src0);
1769
1769
ggml_cann_pool_alloc src_buffer_allocator (
1770
- ctx.pool (), ggml_nelements (src0) * sizeof (float_t ));
1770
+ ctx.pool (), ggml_nelements (src0) * sizeof (float ));
1771
1771
void * src_trans_buffer = src_buffer_allocator.get ();
1772
1772
size_t src_trans_nb[GGML_MAX_DIMS];
1773
- src_trans_nb[0 ] = sizeof (float_t );
1773
+ src_trans_nb[0 ] = sizeof (float );
1774
1774
for (int i = 1 ; i < GGML_MAX_DIMS; i++) {
1775
1775
src_trans_nb[i] = src_trans_nb[i - 1 ] * src0->ne [i - 1 ];
1776
1776
}
@@ -1814,14 +1814,14 @@ void ggml_cann_get_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
1814
1814
1815
1815
// [3,4,5,64] -> [3,4,5,2,32]
1816
1816
dequant_ne = weight_ne;
1817
- dequant_nb[0 ] = sizeof (float_t );
1817
+ dequant_nb[0 ] = sizeof (float );
1818
1818
for (int i = 1 ; i < GGML_MAX_DIMS + 1 ; i++) {
1819
1819
dequant_nb[i] = dequant_nb[i - 1 ] * dequant_ne[i - 1 ];
1820
1820
}
1821
1821
1822
1822
scale_offset = ggml_nelements (src0) * sizeof (int8_t );
1823
1823
ggml_cann_pool_alloc dequant_buffer_allocator (
1824
- ctx.pool (), ggml_nelements (src0) * sizeof (float_t ));
1824
+ ctx.pool (), ggml_nelements (src0) * sizeof (float ));
1825
1825
1826
1826
aclTensor* acl_weight_tensor = ggml_cann_create_tensor (
1827
1827
src0->data , ACL_INT8, sizeof (int8_t ), weight_ne, weight_nb,
@@ -1830,11 +1830,11 @@ void ggml_cann_get_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
1830
1830
src0->data , ACL_FLOAT16, sizeof (uint16_t ), scale_ne, scale_nb,
1831
1831
GGML_MAX_DIMS + 1 , ACL_FORMAT_ND, scale_offset);
1832
1832
aclTensor* dequant_tensor = ggml_cann_create_tensor (
1833
- dequant_buffer_allocator.get (), ACL_FLOAT, sizeof (float_t ),
1833
+ dequant_buffer_allocator.get (), ACL_FLOAT, sizeof (float ),
1834
1834
dequant_ne, dequant_nb, GGML_MAX_DIMS + 1 );
1835
1835
1836
1836
aclnn_mul (ctx, acl_weight_tensor, acl_scale_tensor, dequant_tensor);
1837
- dequant_nb[0 ] = sizeof (float_t );
1837
+ dequant_nb[0 ] = sizeof (float );
1838
1838
dequant_ne = src0->ne ;
1839
1839
for (int i = 1 ; i < GGML_MAX_DIMS; i++) {
1840
1840
dequant_nb[i] = dequant_nb[i - 1 ] * src0->ne [i - 1 ];
@@ -2282,8 +2282,8 @@ static void aclnn_cache_init(ggml_backend_cann_context& ctx, ggml_tensor* dst,
2282
2282
2283
2283
int64_t theta_scale_length = src0->ne [0 ] / 2 ;
2284
2284
int64_t theta_scale_ne[] = {theta_scale_length, 1 , 1 , 1 };
2285
- size_t theta_scale_nb[] = {sizeof (float_t ), sizeof (float_t ), sizeof (float_t ),
2286
- theta_scale_length * sizeof (float_t )};
2285
+ size_t theta_scale_nb[] = {sizeof (float ), sizeof (float ), sizeof (float ),
2286
+ theta_scale_length * sizeof (float )};
2287
2287
2288
2288
GGML_ASSERT (src1->type == GGML_TYPE_I32);
2289
2289
int64_t position_length = src1->ne [0 ];
@@ -2293,7 +2293,7 @@ static void aclnn_cache_init(ggml_backend_cann_context& ctx, ggml_tensor* dst,
2293
2293
2294
2294
int64_t theta_ne[] = {theta_scale_length, 1 , position_length, 1 };
2295
2295
size_t theta_nb[GGML_MAX_DIMS];
2296
- theta_nb[0 ] = sizeof (float_t );
2296
+ theta_nb[0 ] = sizeof (float );
2297
2297
for (int i = 1 ; i < GGML_MAX_DIMS; i++) {
2298
2298
theta_nb[i] = theta_nb[i - 1 ] * theta_ne[i - 1 ];
2299
2299
}
@@ -2314,10 +2314,10 @@ static void aclnn_cache_init(ggml_backend_cann_context& ctx, ggml_tensor* dst,
2314
2314
if (ctx.rope_cache .theta_scale_cache != nullptr ) {
2315
2315
ACL_CHECK (aclrtFree (ctx.rope_cache .theta_scale_cache ));
2316
2316
}
2317
- ACL_CHECK (aclrtMalloc (&ctx.rope_cache .theta_scale_cache , theta_scale_length * sizeof (float_t ), ACL_MEM_MALLOC_HUGE_FIRST));
2317
+ ACL_CHECK (aclrtMalloc (&ctx.rope_cache .theta_scale_cache , theta_scale_length * sizeof (float ), ACL_MEM_MALLOC_HUGE_FIRST));
2318
2318
2319
2319
acl_theta_scale_tensor =
2320
- ggml_cann_create_tensor (ctx.rope_cache .theta_scale_cache , ACL_FLOAT, sizeof (float_t ),
2320
+ ggml_cann_create_tensor (ctx.rope_cache .theta_scale_cache , ACL_FLOAT, sizeof (float ),
2321
2321
theta_scale_ne, theta_scale_nb, GGML_MAX_DIMS);
2322
2322
2323
2323
float start = 0 ;
@@ -2383,20 +2383,20 @@ static void aclnn_cache_init(ggml_backend_cann_context& ctx, ggml_tensor* dst,
2383
2383
} else {
2384
2384
// use cache
2385
2385
acl_theta_scale_tensor =
2386
- ggml_cann_create_tensor (ctx.rope_cache .theta_scale_cache , ACL_FLOAT, sizeof (float_t ),
2386
+ ggml_cann_create_tensor (ctx.rope_cache .theta_scale_cache , ACL_FLOAT, sizeof (float ),
2387
2387
theta_scale_ne, theta_scale_nb, GGML_MAX_DIMS);
2388
2388
}
2389
2389
2390
2390
ggml_cann_pool_alloc freq_fac_res_allocator (ctx.pool ());
2391
2391
// freq_factors
2392
2392
if (src2) {
2393
- freq_fac_res_allocator.alloc (theta_scale_length * sizeof (float_t ));
2393
+ freq_fac_res_allocator.alloc (theta_scale_length * sizeof (float ));
2394
2394
void * freq_fac_res_ptr = freq_fac_res_allocator.get ();
2395
2395
aclTensor* acl_freq_factors_tensor = ggml_cann_create_tensor (
2396
2396
src2->data , ggml_cann_type_mapping (src2->type ),
2397
2397
ggml_type_size (src2->type ), theta_scale_ne, theta_scale_nb, GGML_MAX_DIMS);
2398
2398
aclTensor* acl_freq_fac_res_tensor = ggml_cann_create_tensor (
2399
- freq_fac_res_ptr, ACL_FLOAT, sizeof (float_t ),
2399
+ freq_fac_res_ptr, ACL_FLOAT, sizeof (float ),
2400
2400
theta_scale_ne, theta_scale_nb, GGML_MAX_DIMS);
2401
2401
aclnn_div (ctx, acl_theta_scale_tensor, acl_freq_factors_tensor, acl_freq_fac_res_tensor);
2402
2402
std::swap (acl_theta_scale_tensor, acl_freq_fac_res_tensor);
@@ -2411,29 +2411,29 @@ static void aclnn_cache_init(ggml_backend_cann_context& ctx, ggml_tensor* dst,
2411
2411
// power * position
2412
2412
int64_t theta_length = theta_scale_length * position_length;
2413
2413
ggml_cann_pool_alloc theta_allocator (ctx.pool (),
2414
- theta_length * sizeof (float_t ));
2414
+ theta_length * sizeof (float ));
2415
2415
void * theta_buffer = theta_allocator.get ();
2416
2416
2417
2417
aclTensor* acl_theta_tensor =
2418
- ggml_cann_create_tensor (theta_buffer, ACL_FLOAT, sizeof (float_t ),
2418
+ ggml_cann_create_tensor (theta_buffer, ACL_FLOAT, sizeof (float ),
2419
2419
theta_ne, theta_nb, GGML_MAX_DIMS);
2420
2420
aclnn_mul (ctx, acl_position_tensor, acl_theta_scale_tensor,
2421
2421
acl_theta_tensor);
2422
2422
2423
2423
// sin/cos
2424
2424
ggml_cann_pool_alloc sin_allocator (ctx.pool (),
2425
- theta_length * sizeof (float_t ));
2425
+ theta_length * sizeof (float ));
2426
2426
void * sin_buffer = sin_allocator.get ();
2427
2427
aclTensor* acl_sin_tensor = ggml_cann_create_tensor (
2428
- sin_buffer, ACL_FLOAT, sizeof (float_t ), theta_ne, theta_nb,
2428
+ sin_buffer, ACL_FLOAT, sizeof (float ), theta_ne, theta_nb,
2429
2429
GGML_MAX_DIMS, ACL_FORMAT_ND);
2430
2430
aclnn_sin (ctx, acl_theta_tensor, acl_sin_tensor);
2431
2431
2432
2432
ggml_cann_pool_alloc cos_allocator (ctx.pool (),
2433
- theta_length * sizeof (float_t ));
2433
+ theta_length * sizeof (float ));
2434
2434
void * cos_buffer = cos_allocator.get ();
2435
2435
aclTensor* acl_cos_tensor = ggml_cann_create_tensor (
2436
- cos_buffer, ACL_FLOAT, sizeof (float_t ), theta_ne, theta_nb,
2436
+ cos_buffer, ACL_FLOAT, sizeof (float ), theta_ne, theta_nb,
2437
2437
GGML_MAX_DIMS, ACL_FORMAT_ND);
2438
2438
aclnn_cos (ctx, acl_theta_tensor, acl_cos_tensor);
2439
2439
@@ -2449,15 +2449,15 @@ static void aclnn_cache_init(ggml_backend_cann_context& ctx, ggml_tensor* dst,
2449
2449
2450
2450
int64_t sin_reshape_ne[4 ] = {src0->ne [0 ], 1 , src0->ne [2 ], 1 };
2451
2451
size_t sin_reshape_nb[GGML_MAX_DIMS];
2452
- sin_reshape_nb[0 ] = sizeof (float_t );
2452
+ sin_reshape_nb[0 ] = sizeof (float );
2453
2453
for (int i = 1 ; i < GGML_MAX_DIMS; i++) {
2454
2454
sin_reshape_nb[i] = sin_reshape_nb[i - 1 ] * sin_reshape_ne[i - 1 ];
2455
2455
}
2456
2456
aclTensor* acl_sin_repeat_tensor =
2457
- ggml_cann_create_tensor (sin_tensor_buffer, ACL_FLOAT, sizeof (float_t ),
2457
+ ggml_cann_create_tensor (sin_tensor_buffer, ACL_FLOAT, sizeof (float ),
2458
2458
sin_reshape_ne, sin_reshape_nb, GGML_MAX_DIMS);
2459
2459
aclTensor* acl_cos_repeat_tensor =
2460
- ggml_cann_create_tensor (cos_tensor_buffer, ACL_FLOAT, sizeof (float_t ),
2460
+ ggml_cann_create_tensor (cos_tensor_buffer, ACL_FLOAT, sizeof (float ),
2461
2461
sin_reshape_ne, sin_reshape_nb, GGML_MAX_DIMS);
2462
2462
2463
2463
// repeat
@@ -2543,15 +2543,15 @@ void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
2543
2543
2544
2544
int64_t sin_reshape_ne[4 ] = {ne00, 1 , ne02, 1 };
2545
2545
size_t sin_reshape_nb[GGML_MAX_DIMS];
2546
- sin_reshape_nb[0 ] = sizeof (float_t );
2546
+ sin_reshape_nb[0 ] = sizeof (float );
2547
2547
for (int i = 1 ; i < GGML_MAX_DIMS; i++) {
2548
2548
sin_reshape_nb[i] = sin_reshape_nb[i - 1 ] * sin_reshape_ne[i - 1 ];
2549
2549
}
2550
2550
aclTensor* acl_sin_reshape_tensor =
2551
- ggml_cann_create_tensor (sin_tensor_buffer, ACL_FLOAT, sizeof (float_t ),
2551
+ ggml_cann_create_tensor (sin_tensor_buffer, ACL_FLOAT, sizeof (float ),
2552
2552
sin_reshape_ne, sin_reshape_nb, GGML_MAX_DIMS);
2553
2553
aclTensor* acl_cos_reshape_tensor =
2554
- ggml_cann_create_tensor (cos_tensor_buffer, ACL_FLOAT, sizeof (float_t ),
2554
+ ggml_cann_create_tensor (cos_tensor_buffer, ACL_FLOAT, sizeof (float ),
2555
2555
sin_reshape_ne, sin_reshape_nb, GGML_MAX_DIMS);
2556
2556
2557
2557
aclTensor* acl_src = ggml_cann_create_tensor (src0);
@@ -2566,7 +2566,7 @@ void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
2566
2566
void * minus_one_scale_buffer = nullptr ;
2567
2567
ggml_cann_pool_alloc roll_allocator (ctx.pool (), ggml_nbytes (src0));
2568
2568
ggml_cann_pool_alloc minus_one_scale_allocator (
2569
- ctx.pool (), sizeof (float_t ) * src0->ne [0 ]);
2569
+ ctx.pool (), sizeof (float ) * src0->ne [0 ]);
2570
2570
if (!is_neox) {
2571
2571
// roll input: [q0,q1,q2,q3,...] -> [q1,q0,q3,q2,...]
2572
2572
input_roll_buffer = roll_allocator.get ();
@@ -2596,13 +2596,13 @@ void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
2596
2596
2597
2597
int64_t minus_one_ne[4 ] = {src0->ne [0 ], 1 , 1 , 1 };
2598
2598
size_t minus_one_nb[GGML_MAX_DIMS];
2599
- minus_one_nb[0 ] = sizeof (float_t );
2599
+ minus_one_nb[0 ] = sizeof (float );
2600
2600
for (int i = 1 ; i < GGML_MAX_DIMS; i++) {
2601
2601
minus_one_nb[i] = minus_one_nb[i - 1 ] * minus_one_ne[i - 1 ];
2602
2602
}
2603
2603
acl_minus_one_tensor = aclnn_values (
2604
- ctx, minus_one_scale_buffer, sizeof (float_t ) * src0->ne [0 ],
2605
- minus_one_ne, GGML_MAX_DIMS, ACL_FLOAT, sizeof (float_t ), 1 );
2604
+ ctx, minus_one_scale_buffer, sizeof (float ) * src0->ne [0 ],
2605
+ minus_one_ne, GGML_MAX_DIMS, ACL_FLOAT, sizeof (float ), 1 );
2606
2606
int64_t dim = 3 ;
2607
2607
int64_t * index = new int64_t [src0->ne [0 ]];
2608
2608
for (int i = 0 ; i < src0->ne [0 ]; i++) {
@@ -2630,22 +2630,22 @@ void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
2630
2630
minus_one_scale_buffer = minus_one_scale_allocator.get ();
2631
2631
int64_t minus_one_ne[4 ] = {src0->ne [0 ], 1 , 1 , 1 };
2632
2632
size_t minus_one_nb[GGML_MAX_DIMS];
2633
- minus_one_nb[0 ] = sizeof (float_t );
2633
+ minus_one_nb[0 ] = sizeof (float );
2634
2634
for (int i = 1 ; i < GGML_MAX_DIMS; i++) {
2635
2635
minus_one_nb[i] = minus_one_nb[i - 1 ] * minus_one_ne[i - 1 ];
2636
2636
}
2637
2637
acl_minus_one_tensor = aclnn_values (
2638
- ctx, minus_one_scale_buffer, sizeof (float_t ) * src0->ne [0 ],
2639
- minus_one_ne, GGML_MAX_DIMS, ACL_FLOAT, sizeof (float_t ), 1 );
2638
+ ctx, minus_one_scale_buffer, sizeof (float ) * src0->ne [0 ],
2639
+ minus_one_ne, GGML_MAX_DIMS, ACL_FLOAT, sizeof (float ), 1 );
2640
2640
// -1 * first half
2641
2641
int64_t first_half_ne[4 ] = {src0->ne [0 ] / 2 , 1 , 1 , 1 };
2642
2642
size_t first_half_nb[GGML_MAX_DIMS];
2643
- first_half_nb[0 ] = sizeof (float_t );
2643
+ first_half_nb[0 ] = sizeof (float );
2644
2644
for (int i = 1 ; i < GGML_MAX_DIMS; i++) {
2645
2645
first_half_nb[i] = first_half_nb[i - 1 ] * first_half_ne[i - 1 ];
2646
2646
}
2647
2647
aclTensor* acl_first_half_tensor = ggml_cann_create_tensor (
2648
- minus_one_scale_buffer, ACL_FLOAT, sizeof (float_t ), first_half_ne,
2648
+ minus_one_scale_buffer, ACL_FLOAT, sizeof (float ), first_half_ne,
2649
2649
first_half_nb, GGML_MAX_DIMS);
2650
2650
bool inplace = true ;
2651
2651
float scale = -1 ;
@@ -2685,28 +2685,28 @@ void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
2685
2685
// TODO: ne0 != n_dims in mode2
2686
2686
} else if (src0->type == GGML_TYPE_F16) {
2687
2687
size_t input_fp32_nb[GGML_MAX_DIMS];
2688
- input_fp32_nb[0 ] = sizeof (float_t );
2688
+ input_fp32_nb[0 ] = sizeof (float );
2689
2689
for (int i = 1 ; i < GGML_MAX_DIMS; i++) {
2690
2690
input_fp32_nb[i] = input_fp32_nb[i - 1 ] * dst->ne [i - 1 ];
2691
2691
}
2692
2692
ggml_cann_pool_alloc fp32_allocator1 (
2693
- ctx.pool (), ggml_nelements (dst) * sizeof (float_t ));
2693
+ ctx.pool (), ggml_nelements (dst) * sizeof (float ));
2694
2694
void * input_fp32_buffer1 = fp32_allocator1.get ();
2695
2695
aclTensor* input_fp32_tensor1 = ggml_cann_create_tensor (
2696
- input_fp32_buffer1, ACL_FLOAT, sizeof (float_t ), dst->ne ,
2696
+ input_fp32_buffer1, ACL_FLOAT, sizeof (float ), dst->ne ,
2697
2697
input_fp32_nb, GGML_MAX_DIMS);
2698
2698
ggml_cann_pool_alloc fp32_allocator2 (
2699
- ctx.pool (), ggml_nelements (dst) * sizeof (float_t ));
2699
+ ctx.pool (), ggml_nelements (dst) * sizeof (float ));
2700
2700
void * input_fp32_buffer2 = fp32_allocator2.get ();
2701
2701
aclTensor* input_fp32_tensor2 = ggml_cann_create_tensor (
2702
- input_fp32_buffer2, ACL_FLOAT, sizeof (float_t ), dst->ne ,
2702
+ input_fp32_buffer2, ACL_FLOAT, sizeof (float ), dst->ne ,
2703
2703
input_fp32_nb, GGML_MAX_DIMS);
2704
2704
2705
2705
ggml_cann_pool_alloc fp32_allocator (
2706
- ctx.pool (), ggml_nelements (dst) * sizeof (float_t ));
2706
+ ctx.pool (), ggml_nelements (dst) * sizeof (float ));
2707
2707
output_fp32_buffer = fp32_allocator.get ();
2708
2708
aclTensor* output_fp32_tensor = ggml_cann_create_tensor (
2709
- output_fp32_buffer, ACL_FLOAT, sizeof (float_t ), dst->ne ,
2709
+ output_fp32_buffer, ACL_FLOAT, sizeof (float ), dst->ne ,
2710
2710
input_fp32_nb, GGML_MAX_DIMS);
2711
2711
aclnn_mul (ctx, acl_src, acl_cos_reshape_tensor, input_fp32_tensor1);
2712
2712
aclnn_mul (ctx, acl_input_roll_mul_scale_tensor, acl_sin_reshape_tensor,
0 commit comments