@@ -1425,21 +1425,25 @@ static void aclnn_pow_tensor_tensor(ggml_backend_cann_context& ctx,
14251425 * @param start Starting exponent offset.
14261426 * @param stop Stopping exponent offset (exclusive).
14271427 * @param step Step size for the exponent increment.
1428+ * @param dtype Data type for slope tensor.
14281429 */
14291430static void aclnn_get_slope_inner (ggml_backend_cann_context& ctx, void * slope_buffer,
1430- float m, int64_t size, float start, float stop, float step){
1431+ float m, int64_t size, float start, float stop, float step, ggml_type dtype){
1432+ aclDataType acl_type = ggml_cann_type_mapping (dtype);
1433+ size_t type_size = ggml_type_size (dtype);
1434+
14311435 int64_t ne[] = {size};
1432- size_t nb[] = {sizeof ( uint16_t ) };
1436+ size_t nb[] = {type_size };
14331437
1434- ggml_cann_pool_alloc arange_allocator (ctx.pool (), size * sizeof ( uint16_t ) );
1438+ ggml_cann_pool_alloc arange_allocator (ctx.pool (), size * type_size );
14351439 void * arange_buffer = arange_allocator.get ();
14361440
14371441 aclTensor* arange_tensor = ggml_cann_create_tensor (
1438- arange_buffer, ACL_FLOAT16, sizeof ( uint16_t ) , ne, nb, 1 );
1442+ arange_buffer, acl_type, type_size , ne, nb, 1 );
14391443 aclnn_arange (ctx, arange_tensor, start, stop, step, size);
14401444
14411445 aclTensor* slope_tensor = ggml_cann_create_tensor (
1442- slope_buffer, ACL_FLOAT16, sizeof ( uint16_t ) , ne, nb, 1 );
1446+ slope_buffer, acl_type, type_size , ne, nb, 1 );
14431447
14441448 aclScalar* sc = aclCreateScalar (&m, aclDataType::ACL_FLOAT);
14451449
@@ -1470,10 +1474,11 @@ static void aclnn_get_slope_inner(ggml_backend_cann_context& ctx, void* slope_bu
14701474 * @param n_head Total number of attention heads.
14711475 * @param slope_buffer Pointer to the output buffer (float array) for storing slopes.
14721476 * @param max_bias Maximum bias value for slope computation.
1477+ * @param dtype Data type for slope tensor.
14731478 *
14741479*/
14751480static void aclnn_get_slope (ggml_backend_cann_context & ctx, int64_t n_head,
1476- void * slope_buffer, float max_bias) {
1481+ void * slope_buffer, float max_bias, ggml_type dtype ) {
14771482 const int n_head_log2 = 1u << (uint32_t ) floor (log2 (n_head));
14781483
14791484 float m0 = powf (2 .0f , -(max_bias) / n_head_log2);
@@ -1490,7 +1495,7 @@ static void aclnn_get_slope(ggml_backend_cann_context & ctx, int64_t n_head,
14901495 float step = 1 ;
14911496 float count = n_head_log2;
14921497 // end needs to be +1 because aclnn uses a left-closed, right-open interval.
1493- aclnn_get_slope_inner (ctx, slope_buffer, m0, count, start, end + 1 , step);
1498+ aclnn_get_slope_inner (ctx, slope_buffer, m0, count, start, end + 1 , step, dtype );
14941499 if (n_head_log2 < n_head) {
14951500 // arange2
14961501 start = 2 * (n_head_log2 - n_head_log2) + 1 ;
@@ -1499,7 +1504,7 @@ static void aclnn_get_slope(ggml_backend_cann_context & ctx, int64_t n_head,
14991504 count = n_head - n_head_log2;
15001505 aclnn_get_slope_inner (
15011506 ctx, (char *) slope_buffer + n_head_log2 * sizeof (float ),
1502- m1, count, start, end + 1 , step);
1507+ m1, count, start, end + 1 , step, dtype );
15031508 }
15041509}
15051510
@@ -1536,7 +1541,7 @@ static void aclnn_add_alibi(ggml_backend_cann_context& ctx, ggml_tensor* mask,
15361541 ggml_cann_pool_alloc bias_allocator (
15371542 ctx.pool (), ggml_nelements (dst) * ggml_element_size (dst));
15381543 bias_buffer = bias_allocator.get ();
1539- aclnn_get_slope (ctx, n_heads, slope_buffer, max_bias);
1544+ aclnn_get_slope (ctx, n_heads, slope_buffer, max_bias, GGML_TYPE_F32 );
15401545 }
15411546
15421547 // broadcast for mask, slop and dst;
@@ -3269,7 +3274,7 @@ void ggml_cann_flash_attn_ext(ggml_backend_cann_context& ctx, ggml_tensor* dst){
32693274 const int64_t n_heads = src0->ne [2 ];
32703275 ggml_cann_pool_alloc slope_allocator (ctx.pool (), n_heads * sizeof (uint16_t ));
32713276 void * slope_buffer = slope_allocator.get ();
3272- aclnn_get_slope (ctx, n_heads, slope_buffer, maxBias);
3277+ aclnn_get_slope (ctx, n_heads, slope_buffer, maxBias, GGML_TYPE_F16 );
32733278
32743279 int64_t slope_ne[] = {1 , 1 , n_heads, 1 };
32753280 size_t slope_nb[GGML_MAX_DIMS];
0 commit comments