@@ -16373,17 +16373,14 @@ struct llm_build_context {
1637316373
1637416374 cur = ggml_add(ctx0, cur, llm_build_rwkv6_time_mix(lctx, ctx0, layer, x_norm_att, x_prev, &wkv_states));
1637516375 ggml_build_forward_expand(gf, cur);
16376+
16377+ struct ggml_tensor * last_norm_att = ggml_view_3d(ctx0, x_norm_att, n_embd, 1, n_seqs, x_norm_att->nb[1], x_norm_att->nb[2], (n_seq_tokens-1)*n_embd*ggml_element_size(x_norm_att));
1637616378 ggml_build_forward_expand(
1637716379 gf,
1637816380 ggml_cpy(
1637916381 ctx0,
16380- wkv_states,
16381- ggml_view_1d(
16382- ctx0,
16383- kv_self.v_l[il],
16384- hparams.n_embd_v_s() * n_seqs,
16385- hparams.n_embd_v_s() * kv_head * ggml_element_size(kv_self.v_l[il])
16386- )
16382+ ggml_view_1d(ctx0, last_norm_att, n_embd * n_seqs, 0),
16383+ ggml_view_1d(ctx0, kv_self.k_l[il], hparams.n_embd_k_s() * n_seqs, hparams.n_embd_k_s() * kv_head * ggml_element_size(kv_self.k_l[il]))
1638716384 )
1638816385 );
1638916386
@@ -16397,7 +16394,7 @@ struct llm_build_context {
1639716394 cur = ggml_add(ctx0, cur, llm_build_rwkv6_channel_mix(lctx, ctx0, layer, x_norm_ffn, x_prev));
1639816395 ggml_build_forward_expand(gf, cur);
1639916396
16400- struct ggml_tensor * last_norm_att = ggml_view_3d(ctx0, x_norm_att, n_embd, 1, n_seqs, x_norm_att->nb[1], x_norm_att->nb[2], (n_seq_tokens-1)*n_embd*ggml_element_size(x_norm_att));
16397+ // struct ggml_tensor * last_norm_att = ggml_view_3d(ctx0, x_norm_att, n_embd, 1, n_seqs, x_norm_att->nb[1], x_norm_att->nb[2], (n_seq_tokens-1)*n_embd*ggml_element_size(x_norm_att));
1640116398 struct ggml_tensor * last_norm_ffn = ggml_view_3d(ctx0, x_norm_ffn, n_embd, 1, n_seqs, x_norm_ffn->nb[1], x_norm_ffn->nb[2], (n_seq_tokens-1)*n_embd*ggml_element_size(x_norm_ffn));
1640216399
1640316400 token_shift = ggml_concat(ctx0, last_norm_att, last_norm_ffn, 1);
0 commit comments