We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent dcbce53 commit 1647e2bCopy full SHA for 1647e2b
src/llama.cpp
@@ -595,6 +595,7 @@ static struct ggml_tensor * llm_build_kqv(
595
padded_v = ggml_pad(ctx, v, 0, k->ne[0] - v->ne[1], 0, 0);
596
cb(padded_v, "padded_v", il);
597
n_embd_head_v_out = n_embd_head_k;
598
+ padded_v = ggml_cont(ctx, padded_v);
599
}
600
601
cur = ggml_flash_attn_ext(ctx, q, k, padded_v, kq_mask, kq_scale, hparams.f_max_alibi_bias,
@@ -613,8 +614,7 @@ static struct ggml_tensor * llm_build_kqv(
613
614
ggml_flash_attn_ext_set_prec(cur, GGML_PREC_F32);
615
616
if (n_embd_head_v < n_embd_head_k) {
- cur = ggml_cont(ctx, cur);
617
- cur = ggml_cont(ctx, ggml_view_3d(ctx, cur, n_embd_head_v, n_head, n_tokens,
+ cur = ggml_cont(ctx, ggml_view_3d(ctx, ggml_cont(ctx, cur), n_embd_head_v, n_head, n_tokens,
618
ggml_element_size(cur) * n_embd_head_v_out,
619
ggml_element_size(cur) * n_embd_head_v_out * n_head,
620
0));
0 commit comments