@@ -32,7 +32,6 @@ class TensorNameMap:
3232 "model.word_embeddings" , # bailingmoe
3333 "language_model.model.embed_tokens" , # llama4
3434 "encoder" , # neobert
35- "model.text_model.embed_tokens" , # smoldocling
3635 ),
3736
3837 # Token type embeddings
@@ -137,7 +136,6 @@ class TensorNameMap:
137136 "model.layers.{bid}.ln1" , # rwkv7
138137 "model.layers.{bid}.input_layernorm" , # llama4
139138 "transformer_encoder.{bid}.attention_norm" , # neobert
140- "model.text_model.layers.{bid}.input_layernorm" , # smoldocling
141139 ),
142140
143141 # Attention norm 2
@@ -181,7 +179,6 @@ class TensorNameMap:
181179 "transformer.decoder_layer.{bid}.multi_head_attention.query" ,# Grok
182180 "transformer.h.{bid}.attn.attention.q_proj" , # exaone
183181 "model.layers.{bid}.self_attn.q_proj" , # llama4
184- "model.text_model.layers.{bid}.self_attn.q_proj" , # smoldocling
185182 ),
186183
187184 # Attention key
@@ -198,7 +195,6 @@ class TensorNameMap:
198195 "transformer.decoder_layer.{bid}.multi_head_attention.key" ,# Grok
199196 "transformer.h.{bid}.attn.attention.k_proj" , # exaone
200197 "model.layers.{bid}.self_attn.k_proj" , # llama4
201- "model.text_model.layers.{bid}.self_attn.k_proj" , # smoldocling
202198 ),
203199
204200 # Attention value
@@ -214,7 +210,6 @@ class TensorNameMap:
214210 "transformer.decoder_layer.{bid}.multi_head_attention.value" ,# Grok
215211 "transformer.h.{bid}.attn.attention.v_proj" , # exaone
216212 "model.layers.{bid}.self_attn.v_proj" , # llama4
217- "model.text_model.layers.{bid}.self_attn.v_proj" , # smoldocling
218213 ),
219214
220215 # Attention output
@@ -245,7 +240,6 @@ class TensorNameMap:
245240 "transformer.h.{bid}.attn.attention.out_proj" , # exaone
246241 "model.layers.{bid}.self_attn.o_proj" , # llama4
247242 "transformer_encoder.{bid}.wo" , # neobert
248- "model.text_model.layers.{bid}.self_attn.o_proj" , # smoldocling
249243 ),
250244
251245 # Attention output norm
@@ -260,7 +254,6 @@ class TensorNameMap:
260254 MODEL_TENSOR .ATTN_POST_NORM : (
261255 "model.layers.{bid}.post_attention_layernorm" , # gemma2 olmo2 # ge
262256 "model.layers.{bid}.post_self_attn_layernorm" , # glm-4-0414
263- "model.text_model.layers.{bid}.post_attention_layernorm" , # smoldocling
264257 ),
265258
266259 # Rotary embeddings
@@ -358,7 +351,6 @@ class TensorNameMap:
358351 "transformer.h.{bid}.mlp.c_fc_1" , # exaone
359352 "model.layers.{bid}.feed_forward.up_proj" , # llama4 jamba
360353 "transformer_encoder.{bid}.ffn.w12" , # neobert
361- "model.text_model.layers.{bid}.mlp.up_proj" , # smoldocling
362354 ),
363355
364356 MODEL_TENSOR .FFN_UP_EXP : (
@@ -398,7 +390,6 @@ class TensorNameMap:
398390 "model.layers.{bid}.residual_mlp.w1" , # arctic
399391 "transformer.h.{bid}.mlp.c_fc_0" , # exaone
400392 "model.layers.{bid}.feed_forward.gate_proj" , # llama4 jamba
401- "model.text_model.layers.{bid}.mlp.gate_proj" , # smoldocling
402393 ),
403394
404395 MODEL_TENSOR .FFN_GATE_EXP : (
@@ -446,7 +437,6 @@ class TensorNameMap:
446437 "model.layers.h.{bid}.mlp.c_proj" , # exaone
447438 "model.layers.{bid}.feed_forward.down_proj" , # llama4 jamba
448439 "transformer_encoder.{bid}.ffn.w3" , # neobert
449- "model.text_model.layers.{bid}.mlp.down_proj" , # smoldocling
450440 ),
451441
452442 MODEL_TENSOR .FFN_DOWN_EXP : (
@@ -1034,7 +1024,7 @@ class TensorNameMap:
10341024 ),
10351025
10361026 MODEL_TENSOR .V_MMPROJ_FC : (
1037- "model.connector.modality_projection.proj" , # SmolVLM smoldocling
1027+ "model.connector.modality_projection.proj" , # SmolVLM
10381028 ),
10391029
10401030 MODEL_TENSOR .V_MMPROJ_MLP : (
0 commit comments