Skip to content

Commit e5ffc91

Browse files
committed
Fix wrong shape for K norm
1 parent 875de2b commit e5ffc91

File tree

1 file changed

+1
-3
lines changed

1 file changed

+1
-3
lines changed

src/models/llm_build_qwen3next.cpp

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -173,15 +173,14 @@ struct ggml_tensor * llm_build_qwen3next::build_qwen3next_attention_layer(ggml_t
173173
cb(Vcur, "Vcur", il);
174174

175175
// Apply K normalization
176+
Kcur = ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens);
176177
Kcur = build_q3n_norm(Kcur, model.layers[il].attn_k_norm, il);
177178
cb(Kcur, "Kcur_normed", il);
178179

179180
// Reshape gate to [n_embd, n_tokens] for the sigmoid gating (flatten the heads)
180181
gate = ggml_cont_2d(ctx0, gate, n_embd_head * n_head, n_tokens);
181182
cb(gate, "gate_reshaped", il);
182183

183-
Qcur = ggml_cont_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens);
184-
Kcur = ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens);
185184
Vcur = ggml_reshape_3d(ctx0, Vcur, n_embd_head, n_head_kv, n_tokens);
186185

187186
// Apply RoPE
@@ -204,7 +203,6 @@ struct ggml_tensor * llm_build_qwen3next::build_qwen3next_attention_layer(ggml_t
204203
struct ggml_tensor * gate_sigmoid = ggml_sigmoid(ctx0, gate);
205204
cb(gate_sigmoid, "gate_sigmoid", il);
206205

207-
// Apply gating directly using the original gate tensor
208206
cur = ggml_mul(ctx0, cur, gate_sigmoid);
209207
cb(cur, "attn_gated", il);
210208

0 commit comments

Comments
 (0)