@@ -173,15 +173,14 @@ struct ggml_tensor * llm_build_qwen3next::build_qwen3next_attention_layer(ggml_t
173173 cb (Vcur, " Vcur" , il);
174174
175175 // Apply K normalization
176+ Kcur = ggml_reshape_3d (ctx0, Kcur, n_embd_head, n_head_kv, n_tokens);
176177 Kcur = build_q3n_norm (Kcur, model.layers [il].attn_k_norm , il);
177178 cb (Kcur, " Kcur_normed" , il);
178179
179180 // Reshape gate to [n_embd, n_tokens] for the sigmoid gating (flatten the heads)
180181 gate = ggml_cont_2d (ctx0, gate, n_embd_head * n_head, n_tokens);
181182 cb (gate, " gate_reshaped" , il);
182183
183- Qcur = ggml_cont_3d (ctx0, Qcur, n_embd_head, n_head, n_tokens);
184- Kcur = ggml_reshape_3d (ctx0, Kcur, n_embd_head, n_head_kv, n_tokens);
185184 Vcur = ggml_reshape_3d (ctx0, Vcur, n_embd_head, n_head_kv, n_tokens);
186185
187186 // Apply RoPE
@@ -204,7 +203,6 @@ struct ggml_tensor * llm_build_qwen3next::build_qwen3next_attention_layer(ggml_t
204203 struct ggml_tensor * gate_sigmoid = ggml_sigmoid (ctx0, gate);
205204 cb (gate_sigmoid, " gate_sigmoid" , il);
206205
207- // Apply gating directly using the original gate tensor
208206 cur = ggml_mul (ctx0, cur, gate_sigmoid);
209207 cb (cur, " attn_gated" , il);
210208
0 commit comments