Rope fixes.

pwilkin · pwilkin · commit 992d4f0d2601 · 2025-07-13T20:02:08.000+02:00
diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py
@@ -2828,6 +2828,7 @@ def set_gguf_parameters(self):
         self.gguf_writer.add_expert_count(self.hparams["moe_num_experts"])
         self.gguf_writer.add_expert_used_count(self.hparams["moe_k"])
         self.gguf_writer.add_interleave_moe_layer_step(self.hparams["moe_layer_interval"])
+        self.gguf_writer.add_rope_freq_base(self.hparams["rope_theta"])
         if (moe_intermediate_size := self.hparams.get("moe_intermediate_size")) is not None:
             self.gguf_writer.add_expert_feed_forward_length(moe_intermediate_size)
         if (shared_expert_intermediate_size := self.hparams.get('intermediate_size')) is not None and (num_key_value_heads := self.hparams.get('num_key_value_heads')) is not None:
diff --git a/src/llama-arch.cpp b/src/llama-arch.cpp
@@ -1816,6 +1816,7 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
             { LLM_TENSOR_FFN_GATE_EXPS,      "blk.%d.ffn_gate_exps" },
             { LLM_TENSOR_FFN_DOWN_EXPS,      "blk.%d.ffn_down_exps" },
             { LLM_TENSOR_FFN_UP_EXPS,        "blk.%d.ffn_up_exps" },
+            { LLM_TENSOR_FFN_EXP_PROBS_B,    "blk.%d.exp_probs_b" },
         },
     },
     {
diff --git a/src/llama-model.cpp b/src/llama-model.cpp
@@ -8367,7 +8367,6 @@ struct llm_build_ernie4_5_moe : public llm_graph_context {
 
         for (int il = 0; il < n_layer; ++il) {
             ggml_tensor * inpSA = inpL;
-
             // norm
             {
                 cur = build_norm(inpL,
@@ -8404,15 +8403,17 @@ struct llm_build_ernie4_5_moe : public llm_graph_context {
                 Kcur = ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens);
                 Vcur = ggml_reshape_3d(ctx0, Vcur, n_embd_head, n_head_kv, n_tokens);
 
+                const float freq_base_l  = model.get_rope_freq_base (cparams, il);
+                const float freq_scale_l = model.get_rope_freq_scale(cparams, il);
                 Qcur = ggml_rope_ext(
                         ctx0, Qcur, inp_pos, nullptr,
-                        n_rot, rope_type, n_ctx_orig, freq_base, freq_scale,
+                        n_rot, rope_type, n_ctx_orig, freq_base_l, freq_scale_l,
                         ext_factor, attn_factor, beta_fast, beta_slow
                         );
 
                 Kcur = ggml_rope_ext(
                         ctx0, Kcur, inp_pos, nullptr,
-                        n_rot, rope_type, n_ctx_orig, freq_base, freq_scale,
+                        n_rot, rope_type, n_ctx_orig, freq_base_l, freq_scale_l,
                         ext_factor, attn_factor, beta_fast, beta_slow
                         );
 
@@ -8435,7 +8436,7 @@ struct llm_build_ernie4_5_moe : public llm_graph_context {
             cb(ffn_inp, "ffn_inp", il);
 
             // feed-forward network
-            bool is_moe_layer = arch == LLM_ARCH_ERNIE4_5_MOE && hparams.n_moe_layer_step > 0 && (il + 1) % hparams.n_moe_layer_step == 0;
+            bool is_moe_layer = arch == LLM_ARCH_ERNIE4_5_MOE && hparams.n_moe_layer_step > 0 && il >= hparams.n_moe_layer_step;
 
             if (!is_moe_layer) {
                 cur = build_norm(ffn_inp,
@@ -16828,6 +16829,7 @@ llama_rope_type llama_model_rope_type(const llama_model * model) {
         case LLM_ARCH_SMOLLM3:
         case LLM_ARCH_ARCEE:
         case LLM_ARCH_ERNIE4_5:
+        case LLM_ARCH_ERNIE4_5_MOE:
             return LLAMA_ROPE_TYPE_NORM;
 
         // the pairs of head values are offset by n_rot/2

Original file line number	Diff line number	Diff line change
`@@ -1816,6 +1816,7 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N`
`1816`	`1816`	`{ LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },`
`1817`	`1817`	`{ LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },`
`1818`	`1818`	`{ LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },`
	`1819`	`+ { LLM_TENSOR_FFN_EXP_PROBS_B, "blk.%d.exp_probs_b" },`
`1819`	`1820`	`},`
`1820`	`1821`	`},`
`1821`	`1822`	`{`