@@ -14,6 +14,7 @@ class TensorNameMap:
14
14
"transformer.word_embeddings" , # falcon
15
15
"word_embeddings" , # bloom
16
16
"model.embed_tokens" , # llama-hf nemotron olmoe olmo2 rwkv6qwen2 glm4-0414 plamo2 granite-hybrid
17
+ "embed_tokens" , # embeddinggemma
17
18
"tok_embeddings" , # llama-pth
18
19
"embeddings.word_embeddings" , # bert nomic-bert
19
20
"language_model.embedding.word_embeddings" , # persimmon
@@ -141,6 +142,7 @@ class TensorNameMap:
141
142
"rwkv.blocks.{bid}.ln1" , # rwkv6
142
143
"model.layers.{bid}.ln1" , # rwkv7
143
144
"model.layers.{bid}.input_layernorm" , # llama4
145
+ "layers.{bid}.input_layernorm" , # embeddinggemma
144
146
"transformer_encoder.{bid}.attention_norm" , # neobert
145
147
"model.layers.{bid}.operator_norm" , # lfm2
146
148
"model.transformer.blocks.{bid}.attn_norm" , # llada
@@ -179,6 +181,7 @@ class TensorNameMap:
179
181
# Attention query
180
182
MODEL_TENSOR .ATTN_Q : (
181
183
"model.layers.{bid}.self_attn.q_proj" , # llama-hf nemotron olmoe olmo2 phimoe
184
+ "layers.{bid}.self_attn.q_proj" , # embeddinggemma
182
185
"model.layers.{bid}.self_attn.q_proj_no_perm" , # llama-custom
183
186
"layers.{bid}.attention.wq" , # llama-pth
184
187
"encoder.layer.{bid}.attention.self.query" , # bert
@@ -197,6 +200,7 @@ class TensorNameMap:
197
200
# Attention key
198
201
MODEL_TENSOR .ATTN_K : (
199
202
"model.layers.{bid}.self_attn.k_proj" , # llama-hf nemotron olmoe olmo2 phimoe
203
+ "layers.{bid}.self_attn.k_proj" , # embeddinggemma
200
204
"model.layers.{bid}.self_attn.k_proj_no_perm" , # llama-custom
201
205
"layers.{bid}.attention.wk" , # llama-pth
202
206
"encoder.layer.{bid}.attention.self.key" , # bert
@@ -216,6 +220,7 @@ class TensorNameMap:
216
220
# Attention value
217
221
MODEL_TENSOR .ATTN_V : (
218
222
"model.layers.{bid}.self_attn.v_proj" , # llama-hf nemotron olmoe olmo2 phimoe
223
+ "layers.{bid}.self_attn.v_proj" , # embeddinggemma
219
224
"layers.{bid}.attention.wv" , # llama-pth
220
225
"encoder.layer.{bid}.attention.self.value" , # bert
221
226
"transformer.layer.{bid}.attention.v_lin" , # distillbert
@@ -239,6 +244,7 @@ class TensorNameMap:
239
244
"transformer.h.{bid}.self_attention.dense" , # falcon
240
245
"h.{bid}.self_attention.dense" , # bloom
241
246
"model.layers.{bid}.self_attn.o_proj" , # llama-hf nemotron olmoe olmo2 phimoe
247
+ "layers.{bid}.self_attn.o_proj" , # embeddinggemma
242
248
"model.layers.{bid}.self_attn.out_proj" , # lfm2
243
249
"model.layers.{bid}.self_attn.linear_attn" , # deci
244
250
"layers.{bid}.attention.wo" , # llama-pth
@@ -277,6 +283,7 @@ class TensorNameMap:
277
283
278
284
MODEL_TENSOR .ATTN_POST_NORM : (
279
285
"model.layers.{bid}.post_attention_layernorm" , # gemma2 olmo2 # ge
286
+ "layers.{bid}.post_attention_layernorm" , # embeddinggemma
280
287
"model.layers.{bid}.post_self_attn_layernorm" , # glm-4-0414
281
288
"model.layers.layers.{bid}.post_mixer_norm.weight" , # plamo2
282
289
),
@@ -320,12 +327,14 @@ class TensorNameMap:
320
327
# Post feed-forward norm
321
328
MODEL_TENSOR .FFN_PRE_NORM : (
322
329
"model.layers.{bid}.pre_feedforward_layernorm" , # gemma2
330
+ "layers.{bid}.pre_feedforward_layernorm" , # embeddinggemma
323
331
"model.layers.{bid}.pre_ff_layernorm.weight" ,
324
332
),
325
333
326
334
# Post feed-forward norm
327
335
MODEL_TENSOR .FFN_POST_NORM : (
328
336
"model.layers.{bid}.post_feedforward_layernorm" , # gemma2 olmo2
337
+ "layers.{bid}.post_feedforward_layernorm" , # embeddinggemma
329
338
"model.layers.{bid}.post_mlp_layernorm" , # glm-4-0414
330
339
"model.layers.layers.{bid}.post_mlp_norm.weight" , # plamo2
331
340
"model.layers.{bid}.feed_forward.up_proj" ,
@@ -362,6 +371,7 @@ class TensorNameMap:
362
371
"transformer.h.{bid}.mlp.dense_h_to_4h" , # falcon
363
372
"h.{bid}.mlp.dense_h_to_4h" , # bloom
364
373
"model.layers.{bid}.mlp.up_proj" , # llama-hf refact nemotron olmo2
374
+ "layers.{bid}.mlp.up_proj" , # embeddinggemma
365
375
"layers.{bid}.feed_forward.w3" , # llama-pth
366
376
"encoder.layer.{bid}.intermediate.dense" , # bert
367
377
"transformer.layer.{bid}.ffn.lin1" , # distillbert
@@ -421,6 +431,7 @@ class TensorNameMap:
421
431
# Feed-forward gate
422
432
MODEL_TENSOR .FFN_GATE : (
423
433
"model.layers.{bid}.mlp.gate_proj" , # llama-hf refact olmo2
434
+ "layers.{bid}.mlp.gate_proj" , # embeddinggemma
424
435
"layers.{bid}.feed_forward.w1" , # llama-pth
425
436
"transformer.h.{bid}.mlp.w2" , # qwen
426
437
"transformer.h.{bid}.mlp.c_fc2" , # jais
@@ -461,6 +472,7 @@ class TensorNameMap:
461
472
"transformer.h.{bid}.mlp.dense_4h_to_h" , # falcon
462
473
"h.{bid}.mlp.dense_4h_to_h" , # bloom
463
474
"model.layers.{bid}.mlp.down_proj" , # llama-hf nemotron olmo2
475
+ "layers.{bid}.mlp.down_proj" , # embeddinggemma
464
476
"layers.{bid}.feed_forward.w2" , # llama-pth
465
477
"encoder.layer.{bid}.output.dense" , # bert
466
478
"transformer.layer.{bid}.ffn.lin2" , # distillbert
@@ -513,6 +525,7 @@ class TensorNameMap:
513
525
"model.layers.{bid}.self_attn.q_layernorm" , # persimmon
514
526
"model.layers.{bid}.self_attn.query_layernorm" , # hunyuan
515
527
"model.layers.{bid}.self_attn.q_norm" , # cohere olmoe chameleon olmo2
528
+ "layers.{bid}.self_attn.q_norm" , # embeddinggemma
516
529
"transformer.blocks.{bid}.attn.q_ln" , # sea-lion
517
530
"encoder.layer.{bid}.attention.self.layer_norm_q" , # jina-bert-v2
518
531
"transformer.layers.{bid}.attn.q_norm" , # openelm
@@ -525,6 +538,7 @@ class TensorNameMap:
525
538
"model.layers.{bid}.self_attn.k_layernorm" , # persimmon
526
539
"model.layers.{bid}.self_attn.key_layernorm" , # hunyuan
527
540
"model.layers.{bid}.self_attn.k_norm" , # cohere olmoe chameleon olmo2
541
+ "layers.{bid}.self_attn.k_norm" , # embeddinggemma
528
542
"transformer.blocks.{bid}.attn.k_ln" , # sea-lion
529
543
"encoder.layer.{bid}.attention.self.layer_norm_k" , # jina-bert-v2
530
544
"transformer.layers.{bid}.attn.k_norm" , # openelm
0 commit comments