Skip to content

Commit 66c374c

Browse files
authored
permute Qcur instead of q_nope_absorbed
1 parent 54a7272 commit 66c374c

File tree

1 file changed

+5
-5
lines changed

1 file changed

+5
-5
lines changed

src/llama-model.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10151,15 +10151,15 @@ struct llm_build_deepseek2 : public llm_graph_context {
1015110151
ggml_tensor * q_nope_absorbed = ggml_mul_mat(ctx0, model.layers[il].wk_b, q_nope);
1015210152
cb(q_nope_absorbed, "q_nope_absorbed", il);
1015310153

10154-
// {kv_lora_rank, n_head, n_tokens}
10155-
q_nope_absorbed = ggml_permute(ctx0, q_nope_absorbed, 0, 2, 1, 3);
10156-
cb(q_nope_absorbed, "q_nope_absorbed_perm", il);
10157-
10158-
// {n_embd_head_qk_rope + kv_lora_rank, n_head, n_tokens}
10154+
// {n_embd_head_qk_rope + kv_lora_rank, n_tokens, n_head}
1015910155
// note: rope must go first for in-place context shifting in build_rope_shift()
1016010156
ggml_tensor * Qcur = ggml_concat(ctx0, q_pe, q_nope_absorbed, 0);
1016110157
cb(Qcur, "Qcur", il);
1016210158

10159+
// {n_embd_head_qk_rope + kv_lora_rank, n_head, n_tokens}
10160+
Qcur = ggml_permute(ctx0, Qcur, 0, 2, 1, 3);
10161+
cb(Qcur, "Qcur_perm", il);
10162+
1016310163
kv_cmpr = ggml_reshape_3d(ctx0, kv_cmpr, kv_lora_rank, 1, n_tokens);
1016410164
cb(kv_cmpr, "kv_cmpr_reshape", il);
1016510165

0 commit comments

Comments
 (0)