Skip to content

Commit e341ec6

Browse files
committed
fix
1 parent be5f499 commit e341ec6

File tree

1 file changed

+13
-4
lines changed

1 file changed

+13
-4
lines changed

src/llama.cpp

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -600,17 +600,26 @@ static struct ggml_tensor * llm_build_kqv(
600600
cur = ggml_flash_attn_ext(ctx, q, k, padded_v, kq_mask, kq_scale, hparams.f_max_alibi_bias,
601601
hparams.attn_soft_cap ? hparams.f_attn_logit_softcapping : 0.0f);
602602

603+
LLAMA_LOG_INFO("kq_scale: %f\n", kq_scale);
604+
605+
// 检查 Softmax 参数
606+
if (hparams.attn_soft_cap) {
607+
LLAMA_LOG_INFO("Soft capping applied: %f\n", hparams.f_attn_logit_softcapping);
608+
}
609+
LLAMA_LOG_INFO("q shape: [%ld, %ld, %ld]\n", q->ne[0], q->ne[1], q->ne[2]);
610+
LLAMA_LOG_INFO("k shape: [%ld, %ld, %ld]\n", k->ne[0], k->ne[1], k->ne[2]);
611+
LLAMA_LOG_INFO("padded_v shape: [%ld, %ld, %ld]\n", padded_v->ne[0], padded_v->ne[1], padded_v->ne[2]);
612+
603613
if (v->type == GGML_TYPE_F32) {
604614
ggml_flash_attn_ext_set_prec(cur, GGML_PREC_F32);
605615
}
606616

607617
if (n_embd_head_v < n_embd_head_k) {
608-
cur = ggml_reshape_3d(ctx, cur, n_embd_head_v_out, n_head, n_tokens);
609-
cur = ggml_view_3d(ctx, cur, n_embd_head_v, n_head, n_tokens,
618+
cur = ggml_cont(ctx, ggml_reshape_3d(ctx, cur, n_embd_head_v_out, n_head, n_tokens));
619+
cur = ggml_cont(ctx, ggml_view_3d(ctx, cur, n_embd_head_v, n_head, n_tokens,
610620
ggml_element_size(cur) * n_embd_head_v_out,
611621
ggml_element_size(cur) * n_embd_head_v_out * n_head,
612-
0);
613-
cur = ggml_cont(ctx, cur);
622+
0));
614623
}
615624

616625
cur = ggml_reshape_2d(ctx, cur, n_embd_head_v*n_head, n_tokens);

0 commit comments

Comments
 (0)