Skip to content

Commit 5856f8c

Browse files
committed
safetensors tensor mapping
Signed-off-by: ryan-mangeno <[email protected]>
1 parent 376ea47 commit 5856f8c

File tree

1 file changed

+27
-29
lines changed

1 file changed

+27
-29
lines changed

gguf-py/gguf/tensor_mapping.py

Lines changed: 27 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ class TensorNameMap:
3232
"model.word_embeddings", # bailingmoe
3333
"language_model.model.embed_tokens", # llama4
3434
"encoder", # neobert
35-
"model.text_model.embed_tokens.weight", # smoldocling
35+
"model.text_model.embed_tokens", # smoldocling
3636
),
3737

3838
# Token type embeddings
@@ -64,7 +64,7 @@ class TensorNameMap:
6464
MODEL_TENSOR.OUTPUT: (
6565
"embed_out", # gptneox
6666
"lm_head", # gpt2 mpt falcon llama-hf baichuan qwen mamba dbrx jais nemotron exaone olmoe olmo2 phimoe
67-
"output", # llama-pth bloom internlm2 smoldocling
67+
"output", # llama-pth bloom internlm2
6868
"word_embeddings_for_head", # persimmon
6969
"lm_head.linear", # phi2
7070
"output_layer", # chatglm
@@ -94,7 +94,6 @@ class TensorNameMap:
9494
"model.ln_out", # rwkv7
9595
"backbone.final_layer_norm", # wavtokenizer
9696
"model.norm", # llama4
97-
"output_norm", # smoldocling
9897
),
9998

10099
# Rope frequencies
@@ -138,7 +137,7 @@ class TensorNameMap:
138137
"model.layers.{bid}.ln1", # rwkv7
139138
"model.layers.{bid}.input_layernorm", # llama4
140139
"transformer_encoder.{bid}.attention_norm", # neobert
141-
"blk.{bid}.attn_norm", # smoldocling
140+
"model.text_model.layers.{bid}.input_layernorm", # smoldocling
142141
),
143142

144143
# Attention norm 2
@@ -182,7 +181,7 @@ class TensorNameMap:
182181
"transformer.decoder_layer.{bid}.multi_head_attention.query",# Grok
183182
"transformer.h.{bid}.attn.attention.q_proj", # exaone
184183
"model.layers.{bid}.self_attn.q_proj", # llama4
185-
"blk.{bid}.attn_q", # smoldocling
184+
"model.text_model.layers.{bid}.self_attn.q_proj", # smoldocling
186185
),
187186

188187
# Attention key
@@ -199,7 +198,7 @@ class TensorNameMap:
199198
"transformer.decoder_layer.{bid}.multi_head_attention.key",# Grok
200199
"transformer.h.{bid}.attn.attention.k_proj", # exaone
201200
"model.layers.{bid}.self_attn.k_proj", # llama4
202-
"blk.{bid}.attn_k", # smoldocling
201+
"model.text_model.layers.{bid}.self_attn.k_proj", # smoldocling
203202
),
204203

205204
# Attention value
@@ -215,7 +214,7 @@ class TensorNameMap:
215214
"transformer.decoder_layer.{bid}.multi_head_attention.value",# Grok
216215
"transformer.h.{bid}.attn.attention.v_proj", # exaone
217216
"model.layers.{bid}.self_attn.v_proj", # llama4
218-
"blk.{bid}.attn_v", # smoldocling
217+
"model.text_model.layers.{bid}.self_attn.v_proj", # smoldocling
219218
),
220219

221220
# Attention output
@@ -246,7 +245,7 @@ class TensorNameMap:
246245
"transformer.h.{bid}.attn.attention.out_proj", # exaone
247246
"model.layers.{bid}.self_attn.o_proj", # llama4
248247
"transformer_encoder.{bid}.wo", # neobert
249-
"blk.{bid}.attn_output", # smoldocling
248+
"model.text_model.layers.{bid}.self_attn.o_proj", # smoldocling
250249
),
251250

252251
# Attention output norm
@@ -260,8 +259,9 @@ class TensorNameMap:
260259
),
261260

262261
MODEL_TENSOR.ATTN_POST_NORM: (
263-
"model.layers.{bid}.post_attention_layernorm", # gemma2 olmo2 # ge
264-
"model.layers.{bid}.post_self_attn_layernorm", # glm-4-0414
262+
"model.layers.{bid}.post_attention_layernorm", # gemma2 olmo2 # ge
263+
"model.layers.{bid}.post_self_attn_layernorm", # glm-4-0414
264+
"model.text_model.layers.{bid}.post_attention_layernorm", # smoldocling
265265
),
266266

267267
# Rotary embeddings
@@ -291,7 +291,6 @@ class TensorNameMap:
291291
"model.layers.{bid}.pre_moe_layernorm", # mini-jamba
292292
"model.layers.{bid}.post_attention_layernorm", # llama4
293293
"transformer_encoder.{bid}.ffn_norm", # neobert
294-
"blk.{bid}.ffn_norm", # smoldocling
295294
),
296295

297296
# Post feed-forward norm
@@ -360,8 +359,7 @@ class TensorNameMap:
360359
"transformer.h.{bid}.mlp.c_fc_1", # exaone
361360
"model.layers.{bid}.feed_forward.up_proj", # llama4 jamba
362361
"transformer_encoder.{bid}.ffn.w12", # neobert
363-
"blk.{bid}.ffn_up", # smoldocling
364-
"blk.{bid}.ffn_up", # smoldocling
362+
"model.text_model.layers.{bid}.mlp.up_proj", # smoldocling
365363
),
366364

367365
MODEL_TENSOR.FFN_UP_EXP: (
@@ -389,20 +387,20 @@ class TensorNameMap:
389387

390388
# Feed-forward gate
391389
MODEL_TENSOR.FFN_GATE: (
392-
"model.layers.{bid}.mlp.gate_proj", # llama-hf refact olmo2
393-
"layers.{bid}.feed_forward.w1", # llama-pth
394-
"transformer.h.{bid}.mlp.w2", # qwen
395-
"transformer.h.{bid}.mlp.c_fc2", # jais
396-
"model.layers.layers.{bid}.mlp.gate_proj", # plamo
397-
"model.layers.{bid}.feed_forward.w1", # internlm2
398-
"encoder.layers.{bid}.mlp.fc12", # nomic-bert
399-
"encoder.layer.{bid}.mlp.gated_layers_w", # jina-bert-v2 (split up/gate, no longer used)
400-
"transformer.h.{bid}.mlp.linear_1", # refact
401-
"model.layers.{bid}.residual_mlp.w1", # arctic
402-
"transformer.h.{bid}.mlp.c_fc_0", # exaone
403-
"model.layers.{bid}.feed_forward.gate_proj", # llama4
404-
"blk.{bid}.ffn_gate", # smoldocling
405-
"model.layers.{bid}.feed_forward.gate_proj", # llama4 jamba
390+
"model.layers.{bid}.mlp.gate_proj", # llama-hf refact olmo2
391+
"layers.{bid}.feed_forward.w1", # llama-pth
392+
"transformer.h.{bid}.mlp.w2", # qwen
393+
"transformer.h.{bid}.mlp.c_fc2", # jais
394+
"model.layers.layers.{bid}.mlp.gate_proj", # plamo
395+
"model.layers.{bid}.feed_forward.w1", # internlm2
396+
"encoder.layers.{bid}.mlp.fc12", # nomic-bert
397+
"encoder.layer.{bid}.mlp.gated_layers_w", # jina-bert-v2 (split up/gate, no longer used)
398+
"transformer.h.{bid}.mlp.linear_1", # refact
399+
"model.layers.{bid}.residual_mlp.w1", # arctic
400+
"transformer.h.{bid}.mlp.c_fc_0", # exaone
401+
"model.layers.{bid}.feed_forward.gate_proj", # llama4
402+
"model.layers.{bid}.feed_forward.gate_proj", # llama4 jamba
403+
"model.text_model.layers.{bid}.mlp.gate_proj", # smoldocling
406404
),
407405

408406
MODEL_TENSOR.FFN_GATE_EXP: (
@@ -450,7 +448,7 @@ class TensorNameMap:
450448
"model.layers.h.{bid}.mlp.c_proj", # exaone
451449
"model.layers.{bid}.feed_forward.down_proj", # llama4 jamba
452450
"transformer_encoder.{bid}.ffn.w3", # neobert
453-
"blk.{bid}.ffn_down", # smoldocling
451+
"model.text_model.layers.{bid}.mlp.down_proj", # smoldocling
454452
),
455453

456454
MODEL_TENSOR.FFN_DOWN_EXP: (
@@ -1038,7 +1036,7 @@ class TensorNameMap:
10381036
),
10391037

10401038
MODEL_TENSOR.V_MMPROJ_FC: (
1041-
"model.connector.modality_projection.proj", # SmolVLM
1039+
"model.connector.modality_projection.proj", # SmolVLM smoldocling
10421040
),
10431041

10441042
MODEL_TENSOR.V_MMPROJ_MLP: (

0 commit comments

Comments
 (0)