Skip to content

Commit 3b084d5

Browse files
committed
cann: clean the whitespace
1 parent 1a3bfec commit 3b084d5

File tree

2 files changed

+24
-25
lines changed

2 files changed

+24
-25
lines changed

ggml/src/ggml-cann/aclnn_ops.cpp

Lines changed: 23 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -2898,14 +2898,14 @@ void ggml_cann_flash_attn_ext(ggml_backend_cann_context& ctx, ggml_tensor* dst){
28982898
ggml_nelements(src0) * faElemSize);
28992899

29002900
int64_t* src0_f16_ne = src0->ne;
2901-
size_t src0_f16_nb[GGML_MAX_DIMS];
2901+
size_t src0_f16_nb[GGML_MAX_DIMS];
29022902
src0_f16_nb[0] = sizeof(uint16_t);
29032903
for(int i = 1; i < GGML_MAX_DIMS; ++i){
29042904
src0_f16_nb[i] = src0_f16_nb[i - 1] * src0_f16_ne[i - 1];
29052905
}
29062906

29072907
acl_src0_f16_tensor = ggml_cann_create_tensor(
2908-
src0_f16_buffer, faDataType, faElemSize,
2908+
src0_f16_buffer, faDataType, faElemSize,
29092909
src0_f16_ne, src0_f16_nb, GGML_MAX_DIMS
29102910
);
29112911
aclnn_cast(ctx, acl_src0_f32_tensor, acl_src0_f16_tensor, faDataType);
@@ -2914,7 +2914,7 @@ void ggml_cann_flash_attn_ext(ggml_backend_cann_context& ctx, ggml_tensor* dst){
29142914
acl_src0_f16_tensor = ggml_cann_create_tensor(src0);
29152915
}
29162916

2917-
// Step 2: create the acl tensors for src1 (Key), src2 (Value),
2917+
// Step 2: create the acl tensors for src1 (Key), src2 (Value),
29182918
// and the direct output from FusedInferAttention
29192919

29202920
acl_src1_f16_tensor = ggml_cann_create_tensor(src1);
@@ -2932,24 +2932,23 @@ void ggml_cann_flash_attn_ext(ggml_backend_cann_context& ctx, ggml_tensor* dst){
29322932
}
29332933

29342934
acl_dst_f16_tensor = ggml_cann_create_tensor(
2935-
out_f16_buffer, faDataType, faElemSize,
2935+
out_f16_buffer, faDataType, faElemSize,
29362936
out_f16_ne, out_f16_nb, GGML_MAX_DIMS
29372937
);
29382938

2939-
29402939
// Step 3: create the PSEShift tensor if needed
29412940
// this tensor is considered as mask (f16) in the llama.cpp
2942-
2941+
29432942
aclTensor* bcast_pse_tensor = nullptr;
29442943
int64_t bcast_pse_ne[GGML_MAX_DIMS];
29452944
size_t bcast_pse_nb[GGML_MAX_DIMS];
29462945
ggml_cann_pool_alloc bcast_pse_allocator(ctx.pool());
29472946
void* bcast_pse_buffer = nullptr;
2948-
2947+
29492948
if(src3 != nullptr){
29502949
bcast_pse_buffer = bcast_pse_allocator.alloc(
29512950
ggml_nelements(src3) * src0->ne[2] * sizeof(uint16_t));
2952-
2951+
29532952
if(src0->ne[1] > 1){
29542953
// Case 1: broadcast pse for prefill stage with multiple head
29552954
aclTensor* acl_mask_f16_tensor = ggml_cann_create_tensor(src3);
@@ -2964,7 +2963,7 @@ void ggml_cann_flash_attn_ext(ggml_backend_cann_context& ctx, ggml_tensor* dst){
29642963
}
29652964

29662965
bcast_pse_tensor = ggml_cann_create_tensor(
2967-
bcast_pse_buffer, ACL_FLOAT16, sizeof(uint16_t),
2966+
bcast_pse_buffer, ACL_FLOAT16, sizeof(uint16_t),
29682967
bcast_pse_ne, bcast_pse_nb, GGML_MAX_DIMS);
29692968

29702969
int64_t repeats[] = {1, src0->ne[2], 1, 1};
@@ -2977,7 +2976,7 @@ void ggml_cann_flash_attn_ext(ggml_backend_cann_context& ctx, ggml_tensor* dst){
29772976
size_t* trunc_pse_nb = src3->nb;
29782977

29792978
aclTensor* acl_mask_f16_trunc_tensor = ggml_cann_create_tensor(
2980-
src3->data, ACL_FLOAT16, sizeof(uint16_t),
2979+
src3->data, ACL_FLOAT16, sizeof(uint16_t),
29812980
trunc_pse_ne, trunc_pse_nb, GGML_MAX_DIMS);
29822981

29832982
bcast_pse_ne[0] = src3->ne[0];
@@ -2991,7 +2990,7 @@ void ggml_cann_flash_attn_ext(ggml_backend_cann_context& ctx, ggml_tensor* dst){
29912990
}
29922991

29932992
bcast_pse_tensor = ggml_cann_create_tensor(
2994-
bcast_pse_buffer, ACL_FLOAT16, sizeof(uint16_t),
2993+
bcast_pse_buffer, ACL_FLOAT16, sizeof(uint16_t),
29952994
bcast_pse_ne, bcast_pse_nb, GGML_MAX_DIMS);
29962995

29972996
int64_t repeats[] = {1, src0->ne[2], 1, 1};
@@ -3007,8 +3006,8 @@ void ggml_cann_flash_attn_ext(ggml_backend_cann_context& ctx, ggml_tensor* dst){
30073006
const int64_t n_head = src0->ne[2];
30083007
const int n_heads_log2_floor = 1u << (uint32_t)floor(log2(n_head));
30093008
float m0 = powf(2.0f, -(maxBias) / n_heads_log2_floor);
3010-
float m1 = powf(2.0f, -(maxBias / 2.0f) / n_heads_log2_floor);
3011-
// init arange
3009+
float m1 = powf(2.0f, -(maxBias / 2.0f) / n_heads_log2_floor);
3010+
// init arange
30123011
ggml_cann_pool_alloc arange_allocator(ctx.pool(),
30133012
ne2_ne3 * faElemSize);
30143013
void* tmp_arange_buffer = arange_allocator.get();
@@ -3076,11 +3075,11 @@ void ggml_cann_flash_attn_ext(ggml_backend_cann_context& ctx, ggml_tensor* dst){
30763075
int64_t tmp_mk_base_ne[] = {ne2_ne3};
30773076
size_t tmp_mk_base_nb[] = {faElemSize};
30783077
aclTensor* tmp_mk_base_tensor = ggml_cann_create_tensor(
3079-
tmp_mk_base_buffer, faDataType, faElemSize,
3078+
tmp_mk_base_buffer, faDataType, faElemSize,
30803079
tmp_mk_base_ne, tmp_mk_base_nb,
30813080
GGML_MAX_DIMS - 3, ACL_FORMAT_ND);
30823081
aclTensor* tmp_arange_tensor = ggml_cann_create_tensor(
3083-
tmp_arange_buffer, faDataType, faElemSize,
3082+
tmp_arange_buffer, faDataType, faElemSize,
30843083
tmp_mk_base_ne, tmp_mk_base_nb,
30853084
GGML_MAX_DIMS - 3, ACL_FORMAT_ND);
30863085
aclnn_pow_tensor_tensor(ctx, tmp_mk_base_tensor, tmp_arange_tensor);
@@ -3095,12 +3094,12 @@ void ggml_cann_flash_attn_ext(ggml_backend_cann_context& ctx, ggml_tensor* dst){
30953094
aclTensor* tmp_mk_tensor = ggml_cann_create_tensor(
30963095
tmp_mk_base_buffer, faDataType, faElemSize,
30973096
tmp_mk_ne, tmp_mk_nb, GGML_MAX_DIMS,
3098-
ACL_FORMAT_ND);
3097+
ACL_FORMAT_ND);
30993098
GGML_CANN_CALL_ACLNN_OP(ctx, InplaceMul, bcast_pse_tensor, tmp_mk_tensor);
31003099

31013100
ggml_cann_release_resources(ctx, tmp_arange1_tensor, tmp_arange2_tensor,
31023101
tmp_mk_base1_tensor, tmp_mk_base2_tensor, tmp_mk_base_tensor,
3103-
tmp_arange_tensor, tmp_mk_tensor);
3102+
tmp_arange_tensor, tmp_mk_tensor);
31043103
}
31053104
}
31063105

@@ -3128,7 +3127,7 @@ void ggml_cann_flash_attn_ext(ggml_backend_cann_context& ctx, ggml_tensor* dst){
31283127

31293128
// Step 5: launch the FusedInferAttentionScoreV2 kernel.
31303129
// Refer to https://gitee.com/ascend/cann-ops-adv/blob/master/docs/FusedInferAttentionScoreV2.md
3131-
3130+
31323131
GGML_CANN_CALL_ACLNN_OP(ctx, FusedInferAttentionScoreV2,
31333132
acl_q_tensor, acl_k_tensor_list, acl_v_tensor_list, // q, k, v
31343133
bcast_pse_tensor, nullptr, // pse, mask
@@ -3170,20 +3169,20 @@ void ggml_cann_flash_attn_ext(ggml_backend_cann_context& ctx, ggml_tensor* dst){
31703169
perm_out_f16_nb[i] = perm_out_f16_nb[i - 1] * perm_out_f16_ne[i - 1];
31713170
}
31723171
aclTensor* acl_perm_out_f16_tensor = ggml_cann_create_tensor(
3173-
perm_out_f16_buffer, faDataType, faElemSize,
3172+
perm_out_f16_buffer, faDataType, faElemSize,
31743173
perm_out_f16_ne, perm_out_f16_nb, GGML_MAX_DIMS);
31753174
aclnn_permute(ctx, acl_dst_f16_tensor, acl_perm_out_f16_tensor, new_dim, GGML_MAX_DIMS);
3176-
aclnn_cast(ctx,
3175+
aclnn_cast(ctx,
31773176
acl_perm_out_f16_tensor, acl_dst_tensor, ggml_cann_type_mapping(dst->type));
31783177
ggml_cann_release_resources(ctx, acl_perm_out_f16_tensor);
31793178
}else{
31803179
// only need to permute
31813180
aclnn_permute(ctx, acl_dst_f16_tensor, acl_dst_tensor, new_dim, GGML_MAX_DIMS);
31823181
}
3183-
ggml_cann_release_resources(ctx, acl_src0_f16_tensor,
3184-
acl_src1_f16_tensor,
3185-
acl_src2_f16_tensor,
3186-
acl_dst_f16_tensor,
3182+
ggml_cann_release_resources(ctx, acl_src0_f16_tensor,
3183+
acl_src1_f16_tensor,
3184+
acl_src2_f16_tensor,
3185+
acl_dst_f16_tensor,
31873186
acl_dst_tensor);
31883187
if(src3 != nullptr){
31893188
ggml_cann_release_resources(ctx, bcast_pse_tensor);

ggml/src/ggml-cann/aclnn_ops.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -720,7 +720,7 @@ void ggml_cann_step(ggml_backend_cann_context& ctx, ggml_tensor* dst);
720720
* @details This function implements the memory-efficient Flash Attention algorithm
721721
* for computing scaled dot-product attention with hardware acceleration.
722722
* The result is stored in the destination tensor `dst`.
723-
*
723+
*
724724
* This operation is accelerated using the CANN backend to improve runtime performance.
725725
*
726726
* @param ctx The CANN context used for operations.

0 commit comments

Comments
 (0)