Skip to content

Commit ebb19c5

Browse files
committed
fix
1 parent 6305eb7 commit ebb19c5

File tree

1 file changed

+1
-10
lines changed

1 file changed

+1
-10
lines changed

src/llama.cpp

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -591,21 +591,12 @@ static struct ggml_tensor * llm_build_kqv(
591591

592592
struct ggml_tensor * padded_v = v;
593593
int64_t n_embd_head_v_out = n_embd_head_v;
594-
// 确保正确填充特征维度(假设v的特征维度是ne[2])
595594
if (n_embd_head_v < n_embd_head_k) {
596-
padded_v = ggml_pad(ctx, v,
597-
0, // 不填充dim 0
598-
0, // 不填充dim 1
599-
n_embd_head_k - n_embd_head_v, // 填充特征维度dim 2
600-
0);
595+
padded_v = ggml_pad(ctx, v, 0, k->ne[0] - v->ne[1], 0, 0);
601596
cb(padded_v, "padded_v", il);
602597
n_embd_head_v_out = n_embd_head_k;
603598
}
604599

605-
// 确保Flash Attention输入维度对齐
606-
GGML_ASSERT(padded_v->ne[2] == k->ne[2]); // 特征维度一致
607-
GGML_ASSERT(q->ne[1] == k->ne[1]); // 序列长度一致
608-
609600
cur = ggml_flash_attn_ext(ctx, q, k, padded_v, kq_mask, kq_scale, hparams.f_max_alibi_bias,
610601
hparams.attn_soft_cap ? hparams.f_attn_logit_softcapping : 0.0f);
611602

0 commit comments

Comments
 (0)