@@ -1425,21 +1425,25 @@ static void aclnn_pow_tensor_tensor(ggml_backend_cann_context& ctx,
1425
1425
* @param start Starting exponent offset.
1426
1426
* @param stop Stopping exponent offset (exclusive).
1427
1427
* @param step Step size for the exponent increment.
1428
+ * @param dtype Data type for slope tensor.
1428
1429
*/
1429
1430
static void aclnn_get_slope_inner (ggml_backend_cann_context& ctx, void * slope_buffer,
1430
- float m, int64_t size, float start, float stop, float step){
1431
+ float m, int64_t size, float start, float stop, float step, ggml_type dtype){
1432
+ aclDataType acl_type = ggml_cann_type_mapping (dtype);
1433
+ size_t type_size = ggml_type_size (dtype);
1434
+
1431
1435
int64_t ne[] = {size};
1432
- size_t nb[] = {sizeof ( uint16_t ) };
1436
+ size_t nb[] = {type_size };
1433
1437
1434
- ggml_cann_pool_alloc arange_allocator (ctx.pool (), size * sizeof ( uint16_t ) );
1438
+ ggml_cann_pool_alloc arange_allocator (ctx.pool (), size * type_size );
1435
1439
void * arange_buffer = arange_allocator.get ();
1436
1440
1437
1441
aclTensor* arange_tensor = ggml_cann_create_tensor (
1438
- arange_buffer, ACL_FLOAT16, sizeof ( uint16_t ) , ne, nb, 1 );
1442
+ arange_buffer, acl_type, type_size , ne, nb, 1 );
1439
1443
aclnn_arange (ctx, arange_tensor, start, stop, step, size);
1440
1444
1441
1445
aclTensor* slope_tensor = ggml_cann_create_tensor (
1442
- slope_buffer, ACL_FLOAT16, sizeof ( uint16_t ) , ne, nb, 1 );
1446
+ slope_buffer, acl_type, type_size , ne, nb, 1 );
1443
1447
1444
1448
aclScalar* sc = aclCreateScalar (&m, aclDataType::ACL_FLOAT);
1445
1449
@@ -1470,10 +1474,11 @@ static void aclnn_get_slope_inner(ggml_backend_cann_context& ctx, void* slope_bu
1470
1474
* @param n_head Total number of attention heads.
1471
1475
* @param slope_buffer Pointer to the output buffer (float array) for storing slopes.
1472
1476
* @param max_bias Maximum bias value for slope computation.
1477
+ * @param dtype Data type for slope tensor.
1473
1478
*
1474
1479
*/
1475
1480
static void aclnn_get_slope (ggml_backend_cann_context & ctx, int64_t n_head,
1476
- void * slope_buffer, float max_bias) {
1481
+ void * slope_buffer, float max_bias, ggml_type dtype ) {
1477
1482
const int n_head_log2 = 1u << (uint32_t ) floor (log2 (n_head));
1478
1483
1479
1484
float m0 = powf (2 .0f , -(max_bias) / n_head_log2);
@@ -1490,7 +1495,7 @@ static void aclnn_get_slope(ggml_backend_cann_context & ctx, int64_t n_head,
1490
1495
float step = 1 ;
1491
1496
float count = n_head_log2;
1492
1497
// end needs to be +1 because aclnn uses a left-closed, right-open interval.
1493
- aclnn_get_slope_inner (ctx, slope_buffer, m0, count, start, end + 1 , step);
1498
+ aclnn_get_slope_inner (ctx, slope_buffer, m0, count, start, end + 1 , step, dtype );
1494
1499
if (n_head_log2 < n_head) {
1495
1500
// arange2
1496
1501
start = 2 * (n_head_log2 - n_head_log2) + 1 ;
@@ -1499,7 +1504,7 @@ static void aclnn_get_slope(ggml_backend_cann_context & ctx, int64_t n_head,
1499
1504
count = n_head - n_head_log2;
1500
1505
aclnn_get_slope_inner (
1501
1506
ctx, (char *) slope_buffer + n_head_log2 * sizeof (float ),
1502
- m1, count, start, end + 1 , step);
1507
+ m1, count, start, end + 1 , step, dtype );
1503
1508
}
1504
1509
}
1505
1510
@@ -1536,7 +1541,7 @@ static void aclnn_add_alibi(ggml_backend_cann_context& ctx, ggml_tensor* mask,
1536
1541
ggml_cann_pool_alloc bias_allocator (
1537
1542
ctx.pool (), ggml_nelements (dst) * ggml_element_size (dst));
1538
1543
bias_buffer = bias_allocator.get ();
1539
- aclnn_get_slope (ctx, n_heads, slope_buffer, max_bias);
1544
+ aclnn_get_slope (ctx, n_heads, slope_buffer, max_bias, GGML_TYPE_F32 );
1540
1545
}
1541
1546
1542
1547
// broadcast for mask, slop and dst;
@@ -3269,7 +3274,7 @@ void ggml_cann_flash_attn_ext(ggml_backend_cann_context& ctx, ggml_tensor* dst){
3269
3274
const int64_t n_heads = src0->ne [2 ];
3270
3275
ggml_cann_pool_alloc slope_allocator (ctx.pool (), n_heads * sizeof (uint16_t ));
3271
3276
void * slope_buffer = slope_allocator.get ();
3272
- aclnn_get_slope (ctx, n_heads, slope_buffer, maxBias);
3277
+ aclnn_get_slope (ctx, n_heads, slope_buffer, maxBias, GGML_TYPE_F16 );
3273
3278
3274
3279
int64_t slope_ne[] = {1 , 1 , n_heads, 1 };
3275
3280
size_t slope_nb[GGML_MAX_DIMS];
0 commit comments