Skip to content

Commit a5ca0eb

Browse files
committed
fix
1 parent d826821 commit a5ca0eb

File tree

1 file changed

+11
-3
lines changed

1 file changed

+11
-3
lines changed

src/llama.cpp

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -582,7 +582,7 @@ static struct ggml_tensor * llm_build_kqv(
582582

583583
// split cached v into n_head heads (not transposed)
584584
struct ggml_tensor * v =
585-
ggml_view_3d(ctx, kv.v_l[il],
585+
ggml_view_3d(ctx, kv.v_l[il],
586586
n_embd_head_v, n_kv, n_head_kv,
587587
ggml_row_size(kv.v_l[il]->type, n_embd_v_gqa),
588588
ggml_row_size(kv.v_l[il]->type, n_embd_head_v),
@@ -592,8 +592,7 @@ static struct ggml_tensor * llm_build_kqv(
592592
struct ggml_tensor * padded_v = v;
593593
int64_t n_embd_head_v_out = n_embd_head_v;
594594
if (n_embd_head_v < n_embd_head_k) {
595-
// Pad the feature dimension (assuming it's the third dimension, adjust indices as per actual tensor layout)
596-
padded_v = ggml_pad(ctx, v, 0, 0, k->ne[2] - v->ne[2], 0); // Correct dimension for feature padding
595+
padded_v = ggml_pad(ctx, v, 0, k->ne[0] - v->ne[1], 0, 0);
597596
cb(padded_v, "padded_v", il);
598597
n_embd_head_v_out = n_embd_head_k;
599598
}
@@ -603,6 +602,15 @@ static struct ggml_tensor * llm_build_kqv(
603602

604603
ggml_flash_attn_ext_set_prec(cur, GGML_PREC_F32);
605604

605+
if (n_embd_head_v < n_embd_head_k) {
606+
cur = ggml_reshape_3d(ctx, cur, n_embd_head_v_out, n_head, n_tokens);
607+
cur = ggml_view_3d(ctx, cur, n_embd_head_v, n_head, n_tokens,
608+
ggml_element_size(cur) * n_embd_head_v_out,
609+
ggml_element_size(cur) * n_embd_head_v_out * n_head,
610+
0);
611+
cur = ggml_cont(ctx, cur);
612+
}
613+
606614
cur = ggml_reshape_2d(ctx, cur, n_embd_head_v*n_head, n_tokens);
607615
} else {
608616
struct ggml_tensor * kq = ggml_mul_mat(ctx, k, q);

0 commit comments

Comments
 (0)