@@ -10151,15 +10151,15 @@ struct llm_build_deepseek2 : public llm_graph_context {
1015110151 ggml_tensor * q_nope_absorbed = ggml_mul_mat(ctx0, model.layers[il].wk_b, q_nope);
1015210152 cb(q_nope_absorbed, "q_nope_absorbed", il);
1015310153
10154- // {kv_lora_rank, n_head, n_tokens}
10155- q_nope_absorbed = ggml_permute(ctx0, q_nope_absorbed, 0, 2, 1, 3);
10156- cb(q_nope_absorbed, "q_nope_absorbed_perm", il);
10157-
10158- // {n_embd_head_qk_rope + kv_lora_rank, n_head, n_tokens}
10154+ // {n_embd_head_qk_rope + kv_lora_rank, n_tokens, n_head}
1015910155 // note: rope must go first for in-place context shifting in build_rope_shift()
1016010156 ggml_tensor * Qcur = ggml_concat(ctx0, q_pe, q_nope_absorbed, 0);
1016110157 cb(Qcur, "Qcur", il);
1016210158
10159+ // {n_embd_head_qk_rope + kv_lora_rank, n_head, n_tokens}
10160+ Qcur = ggml_permute(ctx0, Qcur, 0, 2, 1, 3);
10161+ cb(Qcur, "Qcur_perm", il);
10162+
1016310163 kv_cmpr = ggml_reshape_3d(ctx0, kv_cmpr, kv_lora_rank, 1, n_tokens);
1016410164 cb(kv_cmpr, "kv_cmpr_reshape", il);
1016510165
0 commit comments