Skip to content

Commit c2a82a1

Browse files
committed
Move the norm shift to conversion, Gemma 2 style
1 parent 5306640 commit c2a82a1

File tree

2 files changed

+6
-3
lines changed

2 files changed

+6
-3
lines changed

convert_hf_to_gguf.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3773,7 +3773,9 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
37733773
name = name.rpartition(".dt_bias")[0] + ".dt_proj.bias"
37743774
elif "conv1d" in name:
37753775
data_torch = data_torch.squeeze()
3776-
3776+
elif name.endswith("norm.weight") and not name.endswith("linear_attn.norm.weight"):
3777+
data_torch = data_torch + 1
3778+
37773779
yield from Qwen2MoeModel.modify_tensors(self, data_torch, name, bid)
37783780

37793781

src/models/llm_build_qwen3next.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -127,8 +127,9 @@ llm_build_qwen3next::llm_build_qwen3next(const llama_model & model, const llm_gr
127127
}
128128

129129
struct ggml_tensor * llm_build_qwen3next::build_q3n_norm(struct ggml_tensor * input, struct ggml_tensor * weights, int layer) {
130-
ggml_tensor * input_norm = ggml_scale_bias(ctx0, weights, 1.0f, 1.0f);
131-
return build_norm(input, input_norm, nullptr, LLM_NORM_RMS, layer);
130+
// ggml_tensor * input_norm = ggml_scale_bias(ctx0, weights, 1.0f, 1.0f);
131+
// EDIT: we moved the shifting part to the conversion, so we just call normal build_norm
132+
return build_norm(input, weights, nullptr, LLM_NORM_RMS, layer);
132133
}
133134

134135
struct ggml_tensor * llm_build_qwen3next::build_q3n_gated_norm(struct ggml_tensor * input, struct ggml_tensor * weights, struct ggml_tensor * gate, int layer) {

0 commit comments

Comments
 (0)