Skip to content

Commit 9e2634d

Browse files
committed
https://github.com/ggerganov/llama.cpp/pull/11445
1 parent a2400dd commit 9e2634d

File tree

1 file changed

+5
-8
lines changed

1 file changed

+5
-8
lines changed

src/llama.cpp

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -16373,17 +16373,14 @@ struct llm_build_context {
1637316373

1637416374
cur = ggml_add(ctx0, cur, llm_build_rwkv6_time_mix(lctx, ctx0, layer, x_norm_att, x_prev, &wkv_states));
1637516375
ggml_build_forward_expand(gf, cur);
16376+
16377+
struct ggml_tensor * last_norm_att = ggml_view_3d(ctx0, x_norm_att, n_embd, 1, n_seqs, x_norm_att->nb[1], x_norm_att->nb[2], (n_seq_tokens-1)*n_embd*ggml_element_size(x_norm_att));
1637616378
ggml_build_forward_expand(
1637716379
gf,
1637816380
ggml_cpy(
1637916381
ctx0,
16380-
wkv_states,
16381-
ggml_view_1d(
16382-
ctx0,
16383-
kv_self.v_l[il],
16384-
hparams.n_embd_v_s() * n_seqs,
16385-
hparams.n_embd_v_s() * kv_head * ggml_element_size(kv_self.v_l[il])
16386-
)
16382+
ggml_view_1d(ctx0, last_norm_att, n_embd * n_seqs, 0),
16383+
ggml_view_1d(ctx0, kv_self.k_l[il], hparams.n_embd_k_s() * n_seqs, hparams.n_embd_k_s() * kv_head * ggml_element_size(kv_self.k_l[il]))
1638716384
)
1638816385
);
1638916386

@@ -16397,7 +16394,7 @@ struct llm_build_context {
1639716394
cur = ggml_add(ctx0, cur, llm_build_rwkv6_channel_mix(lctx, ctx0, layer, x_norm_ffn, x_prev));
1639816395
ggml_build_forward_expand(gf, cur);
1639916396

16400-
struct ggml_tensor * last_norm_att = ggml_view_3d(ctx0, x_norm_att, n_embd, 1, n_seqs, x_norm_att->nb[1], x_norm_att->nb[2], (n_seq_tokens-1)*n_embd*ggml_element_size(x_norm_att));
16397+
// struct ggml_tensor * last_norm_att = ggml_view_3d(ctx0, x_norm_att, n_embd, 1, n_seqs, x_norm_att->nb[1], x_norm_att->nb[2], (n_seq_tokens-1)*n_embd*ggml_element_size(x_norm_att));
1640116398
struct ggml_tensor * last_norm_ffn = ggml_view_3d(ctx0, x_norm_ffn, n_embd, 1, n_seqs, x_norm_ffn->nb[1], x_norm_ffn->nb[2], (n_seq_tokens-1)*n_embd*ggml_element_size(x_norm_ffn));
1640216399

1640316400
token_shift = ggml_concat(ctx0, last_norm_att, last_norm_ffn, 1);

0 commit comments

Comments
 (0)