Pass and read n_ff_exp

pwilkin · pwilkin · commit bb23dd075502 · 2025-07-13T13:22:37.000+02:00
diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py
@@ -2828,6 +2828,8 @@ def set_gguf_parameters(self):
         self.gguf_writer.add_expert_count(self.hparams["moe_num_experts"])
         self.gguf_writer.add_expert_used_count(self.hparams["moe_k"])
         self.gguf_writer.add_interleave_moe_layer_step(self.hparams["moe_layer_interval"])
+        if (moe_intermediate_size := self.hparams.get("moe_intermediate_size")) is not None:
+            self.gguf_writer.add_expert_feed_forward_length(moe_intermediate_size)
 
     def tensor_force_quant(self, name: str, new_name: str, bid: int | None, n_dims: int) -> gguf.GGMLQuantizationType | bool:
         if "exps" in new_name:
diff --git a/src/llama-model.cpp b/src/llama-model.cpp
@@ -1611,6 +1611,7 @@ void llama_model::load_hparams(llama_model_loader & ml) {
             {
                 ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
                 if (arch == LLM_ARCH_ERNIE4_5_MOE) {
+                    ml.get_key(LLM_KV_EXPERT_FEED_FORWARD_LENGTH,  hparams.n_ff_exp);
                     ml.get_key(LLM_KV_INTERLEAVE_MOE_LAYER_STEP,   hparams.n_moe_layer_step);
                 }
                 switch (hparams.n_layer) {

Original file line number	Diff line number	Diff line change
`@@ -1611,6 +1611,7 @@ void llama_model::load_hparams(llama_model_loader & ml) {`
`1611`	`1611`	`{`
`1612`	`1612`	`ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);`
`1613`	`1613`	`if (arch == LLM_ARCH_ERNIE4_5_MOE) {`
	`1614`	`+ ml.get_key(LLM_KV_EXPERT_FEED_FORWARD_LENGTH, hparams.n_ff_exp);`
`1614`	`1615`	`ml.get_key(LLM_KV_INTERLEAVE_MOE_LAYER_STEP, hparams.n_moe_layer_step);`
`1615`	`1616`	`}`
`1616`	`1617`	`switch (hparams.n_layer) {`