@@ -1425,21 +1425,25 @@ static void aclnn_pow_tensor_tensor(ggml_backend_cann_context& ctx,
14251425 * @param start         Starting exponent offset. 
14261426 * @param stop          Stopping exponent offset (exclusive). 
14271427 * @param step          Step size for the exponent increment. 
1428+  * @param dtype         Data type for slope tensor. 
14281429 */  
14291430static  void  aclnn_get_slope_inner (ggml_backend_cann_context& ctx, void * slope_buffer,
1430-     float  m, int64_t  size, float  start, float  stop, float  step){
1431+     float  m, int64_t  size, float  start, float  stop, float  step, ggml_type dtype){
1432+     aclDataType acl_type = ggml_cann_type_mapping (dtype);
1433+     size_t  type_size = ggml_type_size (dtype);
1434+ 
14311435    int64_t  ne[] = {size};
1432-     size_t  nb[] = {sizeof ( uint16_t ) };
1436+     size_t  nb[] = {type_size };
14331437
1434-     ggml_cann_pool_alloc arange_allocator (ctx.pool (), size * sizeof ( uint16_t ) );
1438+     ggml_cann_pool_alloc arange_allocator (ctx.pool (), size * type_size );
14351439    void * arange_buffer = arange_allocator.get ();
14361440
14371441    aclTensor* arange_tensor = ggml_cann_create_tensor (
1438-         arange_buffer, ACL_FLOAT16,  sizeof ( uint16_t ) , ne, nb, 1 );
1442+         arange_buffer, acl_type, type_size , ne, nb, 1 );
14391443    aclnn_arange (ctx, arange_tensor, start, stop, step, size);
14401444
14411445    aclTensor* slope_tensor = ggml_cann_create_tensor (
1442-         slope_buffer, ACL_FLOAT16,  sizeof ( uint16_t ) , ne, nb, 1 );
1446+         slope_buffer, acl_type, type_size , ne, nb, 1 );
14431447
14441448    aclScalar* sc = aclCreateScalar (&m, aclDataType::ACL_FLOAT);
14451449
@@ -1470,10 +1474,11 @@ static void aclnn_get_slope_inner(ggml_backend_cann_context& ctx, void* slope_bu
14701474 * @param n_head        Total number of attention heads. 
14711475 * @param slope_buffer  Pointer to the output buffer (float array) for storing slopes. 
14721476 * @param max_bias      Maximum bias value for slope computation. 
1477+  * @param dtype         Data type for slope tensor. 
14731478 * 
14741479*/ 
14751480static  void  aclnn_get_slope (ggml_backend_cann_context & ctx, int64_t  n_head,
1476-     void * slope_buffer, float  max_bias) {
1481+     void * slope_buffer, float  max_bias, ggml_type dtype ) {
14771482    const  int  n_head_log2 = 1u  << (uint32_t ) floor (log2 (n_head));
14781483
14791484    float  m0 = powf (2 .0f , -(max_bias) / n_head_log2);
@@ -1490,7 +1495,7 @@ static void aclnn_get_slope(ggml_backend_cann_context & ctx, int64_t n_head,
14901495    float  step  = 1 ;
14911496    float  count = n_head_log2;
14921497    //  end needs to be +1 because aclnn uses a left-closed, right-open interval.
1493-     aclnn_get_slope_inner (ctx, slope_buffer, m0, count, start, end + 1 , step);
1498+     aclnn_get_slope_inner (ctx, slope_buffer, m0, count, start, end + 1 , step, dtype );
14941499    if  (n_head_log2 < n_head) {
14951500        //  arange2
14961501        start = 2  * (n_head_log2 - n_head_log2) + 1 ;
@@ -1499,7 +1504,7 @@ static void aclnn_get_slope(ggml_backend_cann_context & ctx, int64_t n_head,
14991504        count = n_head - n_head_log2;
15001505        aclnn_get_slope_inner (
15011506            ctx, (char  *) slope_buffer + n_head_log2 * sizeof (float ),
1502-             m1, count, start, end + 1 , step);
1507+             m1, count, start, end + 1 , step, dtype );
15031508    }
15041509}
15051510
@@ -1536,7 +1541,7 @@ static void aclnn_add_alibi(ggml_backend_cann_context& ctx, ggml_tensor* mask,
15361541        ggml_cann_pool_alloc bias_allocator (
15371542                    ctx.pool (), ggml_nelements (dst) * ggml_element_size (dst));
15381543        bias_buffer = bias_allocator.get ();
1539-         aclnn_get_slope (ctx, n_heads, slope_buffer, max_bias);
1544+         aclnn_get_slope (ctx, n_heads, slope_buffer, max_bias, GGML_TYPE_F32 );
15401545    }
15411546
15421547    //  broadcast for mask, slop and dst;
@@ -3269,7 +3274,7 @@ void ggml_cann_flash_attn_ext(ggml_backend_cann_context& ctx, ggml_tensor* dst){
32693274                const  int64_t  n_heads = src0->ne [2 ];
32703275                ggml_cann_pool_alloc slope_allocator (ctx.pool (), n_heads * sizeof (uint16_t ));
32713276                void * slope_buffer = slope_allocator.get ();
3272-                 aclnn_get_slope (ctx, n_heads, slope_buffer, maxBias);
3277+                 aclnn_get_slope (ctx, n_heads, slope_buffer, maxBias, GGML_TYPE_F16 );
32733278
32743279                int64_t  slope_ne[] = {1 , 1 , n_heads, 1 };
32753280                size_t  slope_nb[GGML_MAX_DIMS];
0 commit comments