@@ -311,14 +311,16 @@ class TensorNameMap:
311311 "model.layers.{bid}.feed_forward.router" , # llama4 jamba
312312 "encoder.layers.{bid}.mlp.router.layer" , # nomic-bert-moe
313313 "model.layers.{bid}.mlp.gate.wg" , # hunyuan
314+ "model.layers.{bid}.mlp.ffn_gate_inp.weight" , # ernie4.5-moe
314315 ),
315316
316317 MODEL_TENSOR .FFN_GATE_INP_SHEXP : (
317318 "model.layers.{bid}.mlp.shared_expert_gate" , # qwen2moe
318319 ),
319320
320321 MODEL_TENSOR .FFN_EXP_PROBS_B : (
321- "model.layers.{bid}.mlp.gate.e_score_correction" , # deepseek-v3 dots1
322+ "model.layers.{bid}.mlp.gate.e_score_correction" , # deepseek-v3 dots1
323+ "model.layers.{bid}.mlp.moe_statics.e_score_correction" , # ernie4.5-moe
322324 ),
323325
324326 # Feed-forward up
@@ -357,13 +359,14 @@ class TensorNameMap:
357359 ),
358360
359361 MODEL_TENSOR .FFN_UP_EXP : (
360- "layers.{bid}.feed_forward.experts.w3" , # mixtral (merged)
361- "transformer.decoder_layer.{bid}.moe.linear_v" , # Grok (merged)
362- "transformer.blocks.{bid}.ffn.experts.mlp.v1" , # dbrx
363- "model.layers.{bid}.mlp.experts.up_proj" , # qwen2moe olmoe (merged)
364- "model.layers.{bid}.block_sparse_moe.experts.w3" , # phimoe (merged)
365- "model.layers.{bid}.feed_forward.experts.up_proj" , # llama4
366- "encoder.layers.{bid}.mlp.experts.mlp.w1" , # nomic-bert-moe
362+ "layers.{bid}.feed_forward.experts.w3" , # mixtral (merged)
363+ "transformer.decoder_layer.{bid}.moe.linear_v" , # Grok (merged)
364+ "transformer.blocks.{bid}.ffn.experts.mlp.v1" , # dbrx
365+ "model.layers.{bid}.mlp.experts.up_proj" , # qwen2moe olmoe (merged)
366+ "model.layers.{bid}.block_sparse_moe.experts.w3" , # phimoe (merged)
367+ "model.layers.{bid}.feed_forward.experts.up_proj" , # llama4
368+ "encoder.layers.{bid}.mlp.experts.mlp.w1" , # nomic-bert-moe
369+ "layers.{bid}.mlp.experts.up_proj.weight" , # ernie4.5-moe
367370 ),
368371
369372 MODEL_TENSOR .FFN_UP_SHEXP : (
@@ -396,12 +399,13 @@ class TensorNameMap:
396399 ),
397400
398401 MODEL_TENSOR .FFN_GATE_EXP : (
399- "layers.{bid}.feed_forward.experts.w1" , # mixtral (merged)
400- "transformer.decoder_layer.{bid}.moe.linear" , # Grok (merged)
401- "transformer.blocks.{bid}.ffn.experts.mlp.w1" , # dbrx
402- "model.layers.{bid}.mlp.experts.gate_proj" , # qwen2moe olmoe (merged)
403- "model.layers.{bid}.block_sparse_moe.experts.w1" , # phimoe (merged)
404- "model.layers.{bid}.feed_forward.experts.gate_proj" , # llama4
402+ "layers.{bid}.feed_forward.experts.w1" , # mixtral (merged)
403+ "transformer.decoder_layer.{bid}.moe.linear" , # Grok (merged)
404+ "transformer.blocks.{bid}.ffn.experts.mlp.w1" , # dbrx
405+ "model.layers.{bid}.mlp.experts.gate_proj" , # qwen2moe olmoe (merged)
406+ "model.layers.{bid}.block_sparse_moe.experts.w1" , # phimoe (merged)
407+ "model.layers.{bid}.feed_forward.experts.gate_proj" , # llama4
408+ "layers.{bid}.mlp.experts.gate_proj.weight" , # ernie4.5-moe
405409 ),
406410
407411 MODEL_TENSOR .FFN_GATE_SHEXP : (
@@ -443,14 +447,15 @@ class TensorNameMap:
443447 ),
444448
445449 MODEL_TENSOR .FFN_DOWN_EXP : (
446- "layers.{bid}.feed_forward.experts.w2" , # mixtral (merged)
447- "transformer.decoder_layer.{bid}.moe.linear_1" , # Grok (merged)
448- "transformer.blocks.{bid}.ffn.experts.mlp.w2" , # dbrx
449- "model.layers.{bid}.mlp.experts.down_proj" , # qwen2moe olmoe (merged)
450- "model.layers.{bid}.block_sparse_moe.output_linear" , # granitemoe
451- "model.layers.{bid}.block_sparse_moe.experts.w2" , # phimoe (merged)
452- "model.layers.{bid}.feed_forward.experts.down_proj" , # llama4
453- "encoder.layers.{bid}.mlp.experts.mlp.w2" , # nomic-bert-moe
450+ "layers.{bid}.feed_forward.experts.w2" , # mixtral (merged)
451+ "transformer.decoder_layer.{bid}.moe.linear_1" , # Grok (merged)
452+ "transformer.blocks.{bid}.ffn.experts.mlp.w2" , # dbrx
453+ "model.layers.{bid}.mlp.experts.down_proj" , # qwen2moe olmoe (merged)
454+ "model.layers.{bid}.block_sparse_moe.output_linear" , # granitemoe
455+ "model.layers.{bid}.block_sparse_moe.experts.w2" , # phimoe (merged)
456+ "model.layers.{bid}.feed_forward.experts.down_proj" , # llama4
457+ "encoder.layers.{bid}.mlp.experts.mlp.w2" , # nomic-bert-moe
458+ "layers.{bid}.mlp.experts.down_proj.weight" , # ernie4.5-moe
454459 ),
455460
456461 MODEL_TENSOR .FFN_DOWN_SHEXP : (
0 commit comments