@@ -32,7 +32,7 @@ class TensorNameMap:
3232 "model.word_embeddings" , # bailingmoe
3333 "language_model.model.embed_tokens" , # llama4
3434 "encoder" , # neobert
35- "model.text_model.embed_tokens.weight" , # smoldocling
35+ "model.text_model.embed_tokens" , # smoldocling
3636 ),
3737
3838 # Token type embeddings
@@ -64,7 +64,7 @@ class TensorNameMap:
6464 MODEL_TENSOR .OUTPUT : (
6565 "embed_out" , # gptneox
6666 "lm_head" , # gpt2 mpt falcon llama-hf baichuan qwen mamba dbrx jais nemotron exaone olmoe olmo2 phimoe
67- "output" , # llama-pth bloom internlm2 smoldocling
67+ "output" , # llama-pth bloom internlm2
6868 "word_embeddings_for_head" , # persimmon
6969 "lm_head.linear" , # phi2
7070 "output_layer" , # chatglm
@@ -94,7 +94,6 @@ class TensorNameMap:
9494 "model.ln_out" , # rwkv7
9595 "backbone.final_layer_norm" , # wavtokenizer
9696 "model.norm" , # llama4
97- "output_norm" , # smoldocling
9897 ),
9998
10099 # Rope frequencies
@@ -138,7 +137,7 @@ class TensorNameMap:
138137 "model.layers.{bid}.ln1" , # rwkv7
139138 "model.layers.{bid}.input_layernorm" , # llama4
140139 "transformer_encoder.{bid}.attention_norm" , # neobert
141- "blk. {bid}.attn_norm" , # smoldocling
140+ "model.text_model.layers. {bid}.input_layernorm" , # smoldocling
142141 ),
143142
144143 # Attention norm 2
@@ -182,7 +181,7 @@ class TensorNameMap:
182181 "transformer.decoder_layer.{bid}.multi_head_attention.query" ,# Grok
183182 "transformer.h.{bid}.attn.attention.q_proj" , # exaone
184183 "model.layers.{bid}.self_attn.q_proj" , # llama4
185- "blk. {bid}.attn_q" , # smoldocling
184+ "model.text_model.layers. {bid}.self_attn.q_proj" , # smoldocling
186185 ),
187186
188187 # Attention key
@@ -199,7 +198,7 @@ class TensorNameMap:
199198 "transformer.decoder_layer.{bid}.multi_head_attention.key" ,# Grok
200199 "transformer.h.{bid}.attn.attention.k_proj" , # exaone
201200 "model.layers.{bid}.self_attn.k_proj" , # llama4
202- "blk. {bid}.attn_k" , # smoldocling
201+ "model.text_model.layers. {bid}.self_attn.k_proj" , # smoldocling
203202 ),
204203
205204 # Attention value
@@ -215,7 +214,7 @@ class TensorNameMap:
215214 "transformer.decoder_layer.{bid}.multi_head_attention.value" ,# Grok
216215 "transformer.h.{bid}.attn.attention.v_proj" , # exaone
217216 "model.layers.{bid}.self_attn.v_proj" , # llama4
218- "blk. {bid}.attn_v" , # smoldocling
217+ "model.text_model.layers. {bid}.self_attn.v_proj" , # smoldocling
219218 ),
220219
221220 # Attention output
@@ -246,7 +245,7 @@ class TensorNameMap:
246245 "transformer.h.{bid}.attn.attention.out_proj" , # exaone
247246 "model.layers.{bid}.self_attn.o_proj" , # llama4
248247 "transformer_encoder.{bid}.wo" , # neobert
249- "blk. {bid}.attn_output" , # smoldocling
248+ "model.text_model.layers. {bid}.self_attn.o_proj" , # smoldocling
250249 ),
251250
252251 # Attention output norm
@@ -260,8 +259,9 @@ class TensorNameMap:
260259 ),
261260
262261 MODEL_TENSOR .ATTN_POST_NORM : (
263- "model.layers.{bid}.post_attention_layernorm" , # gemma2 olmo2 # ge
264- "model.layers.{bid}.post_self_attn_layernorm" , # glm-4-0414
262+ "model.layers.{bid}.post_attention_layernorm" , # gemma2 olmo2 # ge
263+ "model.layers.{bid}.post_self_attn_layernorm" , # glm-4-0414
264+ "model.text_model.layers.{bid}.post_attention_layernorm" , # smoldocling
265265 ),
266266
267267 # Rotary embeddings
@@ -291,7 +291,6 @@ class TensorNameMap:
291291 "model.layers.{bid}.pre_moe_layernorm" , # mini-jamba
292292 "model.layers.{bid}.post_attention_layernorm" , # llama4
293293 "transformer_encoder.{bid}.ffn_norm" , # neobert
294- "blk.{bid}.ffn_norm" , # smoldocling
295294 ),
296295
297296 # Post feed-forward norm
@@ -360,8 +359,7 @@ class TensorNameMap:
360359 "transformer.h.{bid}.mlp.c_fc_1" , # exaone
361360 "model.layers.{bid}.feed_forward.up_proj" , # llama4 jamba
362361 "transformer_encoder.{bid}.ffn.w12" , # neobert
363- "blk.{bid}.ffn_up" , # smoldocling
364- "blk.{bid}.ffn_up" , # smoldocling
362+ "model.text_model.layers.{bid}.mlp.up_proj" , # smoldocling
365363 ),
366364
367365 MODEL_TENSOR .FFN_UP_EXP : (
@@ -389,20 +387,20 @@ class TensorNameMap:
389387
390388 # Feed-forward gate
391389 MODEL_TENSOR .FFN_GATE : (
392- "model.layers.{bid}.mlp.gate_proj" , # llama-hf refact olmo2
393- "layers.{bid}.feed_forward.w1" , # llama-pth
394- "transformer.h.{bid}.mlp.w2" , # qwen
395- "transformer.h.{bid}.mlp.c_fc2" , # jais
396- "model.layers.layers.{bid}.mlp.gate_proj" , # plamo
397- "model.layers.{bid}.feed_forward.w1" , # internlm2
398- "encoder.layers.{bid}.mlp.fc12" , # nomic-bert
399- "encoder.layer.{bid}.mlp.gated_layers_w" , # jina-bert-v2 (split up/gate, no longer used)
400- "transformer.h.{bid}.mlp.linear_1" , # refact
401- "model.layers.{bid}.residual_mlp.w1" , # arctic
402- "transformer.h.{bid}.mlp.c_fc_0" , # exaone
403- "model.layers.{bid}.feed_forward.gate_proj" , # llama4
404- "blk. {bid}.ffn_gate " , # smoldocling
405- "model.layers.{bid}.feed_forward .gate_proj" , # llama4 jamba
390+ "model.layers.{bid}.mlp.gate_proj" , # llama-hf refact olmo2
391+ "layers.{bid}.feed_forward.w1" , # llama-pth
392+ "transformer.h.{bid}.mlp.w2" , # qwen
393+ "transformer.h.{bid}.mlp.c_fc2" , # jais
394+ "model.layers.layers.{bid}.mlp.gate_proj" , # plamo
395+ "model.layers.{bid}.feed_forward.w1" , # internlm2
396+ "encoder.layers.{bid}.mlp.fc12" , # nomic-bert
397+ "encoder.layer.{bid}.mlp.gated_layers_w" , # jina-bert-v2 (split up/gate, no longer used)
398+ "transformer.h.{bid}.mlp.linear_1" , # refact
399+ "model.layers.{bid}.residual_mlp.w1" , # arctic
400+ "transformer.h.{bid}.mlp.c_fc_0" , # exaone
401+ "model.layers.{bid}.feed_forward.gate_proj" , # llama4
402+ "model.layers. {bid}.feed_forward.gate_proj " , # llama4 jamba
403+ "model.text_model. layers.{bid}.mlp .gate_proj" , # smoldocling
406404 ),
407405
408406 MODEL_TENSOR .FFN_GATE_EXP : (
@@ -450,7 +448,7 @@ class TensorNameMap:
450448 "model.layers.h.{bid}.mlp.c_proj" , # exaone
451449 "model.layers.{bid}.feed_forward.down_proj" , # llama4 jamba
452450 "transformer_encoder.{bid}.ffn.w3" , # neobert
453- "blk. {bid}.ffn_down" , # smoldocling
451+ "model.text_model.layers. {bid}.mlp.down_proj" , # smoldocling
454452 ),
455453
456454 MODEL_TENSOR .FFN_DOWN_EXP : (
@@ -1038,7 +1036,7 @@ class TensorNameMap:
10381036 ),
10391037
10401038 MODEL_TENSOR .V_MMPROJ_FC : (
1041- "model.connector.modality_projection.proj" , # SmolVLM
1039+ "model.connector.modality_projection.proj" , # SmolVLM smoldocling
10421040 ),
10431041
10441042 MODEL_TENSOR .V_MMPROJ_MLP : (
0 commit comments