@@ -378,18 +378,18 @@ class TensorNameMap:
378378
379379 # Feed-forward gate
380380 MODEL_TENSOR .FFN_GATE : (
381- "model.layers.{bid}.mlp.gate_proj" , # llama-hf refact olmo2
382- "layers.{bid}.feed_forward.w1" , # llama-pth
383- "transformer.h.{bid}.mlp.w2" , # qwen
384- "transformer.h.{bid}.mlp.c_fc2" , # jais
385- "model.layers.layers.{bid}.mlp.gate_proj" , # plamo
386- "model.layers.{bid}.feed_forward.w1" , # internlm2
387- "encoder.layers.{bid}.mlp.fc12" , # nomic-bert
388- "encoder.layer.{bid}.mlp.gated_layers_w" , # jina-bert-v2 (split up/gate, no longer used)
389- "transformer.h.{bid}.mlp.linear_1" , # refact
390- "model.layers.{bid}.residual_mlp.w1" , # arctic
391- "transformer.h.{bid}.mlp.c_fc_0" , # exaone
392- "model.layers.{bid}.feed_forward.gate_proj" , # llama4 jamba
381+ "model.layers.{bid}.mlp.gate_proj" , # llama-hf refact olmo2
382+ "layers.{bid}.feed_forward.w1" , # llama-pth
383+ "transformer.h.{bid}.mlp.w2" , # qwen
384+ "transformer.h.{bid}.mlp.c_fc2" , # jais
385+ "model.layers.layers.{bid}.mlp.gate_proj" , # plamo
386+ "model.layers.{bid}.feed_forward.w1" , # internlm2
387+ "encoder.layers.{bid}.mlp.fc12" , # nomic-bert
388+ "encoder.layer.{bid}.mlp.gated_layers_w" , # jina-bert-v2 (split up/gate, no longer used)
389+ "transformer.h.{bid}.mlp.linear_1" , # refact
390+ "model.layers.{bid}.residual_mlp.w1" , # arctic
391+ "transformer.h.{bid}.mlp.c_fc_0" , # exaone
392+ "model.layers.{bid}.feed_forward.gate_proj" , # llama4 jamba
393393 ),
394394
395395 MODEL_TENSOR .FFN_GATE_EXP : (
0 commit comments