Move the norm shift to conversion, Gemma 2 style

pwilkin · pwilkin · commit c2a82a1773db · 2025-10-14T18:03:54.000+02:00
diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py
@@ -3773,7 +3773,9 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
             name = name.rpartition(".dt_bias")[0] + ".dt_proj.bias"
         elif "conv1d" in name:
             data_torch = data_torch.squeeze()
-
+        elif name.endswith("norm.weight") and not name.endswith("linear_attn.norm.weight"):
+            data_torch = data_torch + 1
+            
         yield from Qwen2MoeModel.modify_tensors(self, data_torch, name, bid)
 
 
diff --git a/src/models/llm_build_qwen3next.cpp b/src/models/llm_build_qwen3next.cpp
@@ -127,8 +127,9 @@ llm_build_qwen3next::llm_build_qwen3next(const llama_model & model, const llm_gr
 }
 
 struct ggml_tensor * llm_build_qwen3next::build_q3n_norm(struct ggml_tensor * input, struct ggml_tensor * weights, int layer) {
-    ggml_tensor * input_norm = ggml_scale_bias(ctx0, weights, 1.0f, 1.0f);
-    return build_norm(input, input_norm, nullptr, LLM_NORM_RMS, layer);
+    // ggml_tensor * input_norm = ggml_scale_bias(ctx0, weights, 1.0f, 1.0f);
+    // EDIT: we moved the shifting part to the conversion, so we just call normal build_norm
+    return build_norm(input, weights, nullptr, LLM_NORM_RMS, layer);
 }
 
 struct ggml_tensor * llm_build_qwen3next::build_q3n_gated_norm(struct ggml_tensor * input, struct ggml_tensor * weights, struct ggml_tensor * gate, int layer) {

Original file line number	Diff line number	Diff line change
`@@ -127,8 +127,9 @@ llm_build_qwen3next::llm_build_qwen3next(const llama_model & model, const llm_gr`
`127`	`127`	`}`
`128`	`128`
`129`	`129`	`struct ggml_tensor * llm_build_qwen3next::build_q3n_norm(struct ggml_tensor * input, struct ggml_tensor * weights, int layer) {`
`130`		`- ggml_tensor * input_norm = ggml_scale_bias(ctx0, weights, 1.0f, 1.0f);`
`131`		`- return build_norm(input, input_norm, nullptr, LLM_NORM_RMS, layer);`
	`130`	`+ // ggml_tensor * input_norm = ggml_scale_bias(ctx0, weights, 1.0f, 1.0f);`
	`131`	`+ // EDIT: we moved the shifting part to the conversion, so we just call normal build_norm`
	`132`	`+ return build_norm(input, weights, nullptr, LLM_NORM_RMS, layer);`
`132`	`133`	`}`
`133`	`134`
`134`	`135`	`struct ggml_tensor * llm_build_qwen3next::build_q3n_gated_norm(struct ggml_tensor * input, struct ggml_tensor * weights, struct ggml_tensor * gate, int layer) {`