@@ -14,6 +14,7 @@ class TensorNameMap:
1414 "transformer.word_embeddings" , # falcon
1515 "word_embeddings" , # bloom
1616 "model.embed_tokens" , # llama-hf nemotron olmoe olmo2 rwkv6qwen2 glm4-0414 plamo2 granite-hybrid
17+ "embed_tokens" , # embeddinggemma
1718 "tok_embeddings" , # llama-pth
1819 "embeddings.word_embeddings" , # bert nomic-bert
1920 "language_model.embedding.word_embeddings" , # persimmon
@@ -141,6 +142,7 @@ class TensorNameMap:
141142 "rwkv.blocks.{bid}.ln1" , # rwkv6
142143 "model.layers.{bid}.ln1" , # rwkv7
143144 "model.layers.{bid}.input_layernorm" , # llama4
145+ "layers.{bid}.input_layernorm" , # embeddinggemma
144146 "transformer_encoder.{bid}.attention_norm" , # neobert
145147 "model.layers.{bid}.operator_norm" , # lfm2
146148 "model.transformer.blocks.{bid}.attn_norm" , # llada
@@ -179,6 +181,7 @@ class TensorNameMap:
179181 # Attention query
180182 MODEL_TENSOR .ATTN_Q : (
181183 "model.layers.{bid}.self_attn.q_proj" , # llama-hf nemotron olmoe olmo2 phimoe
184+ "layers.{bid}.self_attn.q_proj" , # embeddinggemma
182185 "model.layers.{bid}.self_attn.q_proj_no_perm" , # llama-custom
183186 "layers.{bid}.attention.wq" , # llama-pth
184187 "encoder.layer.{bid}.attention.self.query" , # bert
@@ -197,6 +200,7 @@ class TensorNameMap:
197200 # Attention key
198201 MODEL_TENSOR .ATTN_K : (
199202 "model.layers.{bid}.self_attn.k_proj" , # llama-hf nemotron olmoe olmo2 phimoe
203+ "layers.{bid}.self_attn.k_proj" , # embeddinggemma
200204 "model.layers.{bid}.self_attn.k_proj_no_perm" , # llama-custom
201205 "layers.{bid}.attention.wk" , # llama-pth
202206 "encoder.layer.{bid}.attention.self.key" , # bert
@@ -216,6 +220,7 @@ class TensorNameMap:
216220 # Attention value
217221 MODEL_TENSOR .ATTN_V : (
218222 "model.layers.{bid}.self_attn.v_proj" , # llama-hf nemotron olmoe olmo2 phimoe
223+ "layers.{bid}.self_attn.v_proj" , # embeddinggemma
219224 "layers.{bid}.attention.wv" , # llama-pth
220225 "encoder.layer.{bid}.attention.self.value" , # bert
221226 "transformer.layer.{bid}.attention.v_lin" , # distillbert
@@ -239,6 +244,7 @@ class TensorNameMap:
239244 "transformer.h.{bid}.self_attention.dense" , # falcon
240245 "h.{bid}.self_attention.dense" , # bloom
241246 "model.layers.{bid}.self_attn.o_proj" , # llama-hf nemotron olmoe olmo2 phimoe
247+ "layers.{bid}.self_attn.o_proj" , # embeddinggemma
242248 "model.layers.{bid}.self_attn.out_proj" , # lfm2
243249 "model.layers.{bid}.self_attn.linear_attn" , # deci
244250 "layers.{bid}.attention.wo" , # llama-pth
@@ -277,6 +283,7 @@ class TensorNameMap:
277283
278284 MODEL_TENSOR .ATTN_POST_NORM : (
279285 "model.layers.{bid}.post_attention_layernorm" , # gemma2 olmo2 # ge
286+ "layers.{bid}.post_attention_layernorm" , # embeddinggemma
280287 "model.layers.{bid}.post_self_attn_layernorm" , # glm-4-0414
281288 "model.layers.layers.{bid}.post_mixer_norm.weight" , # plamo2
282289 ),
@@ -320,12 +327,14 @@ class TensorNameMap:
320327 # Post feed-forward norm
321328 MODEL_TENSOR .FFN_PRE_NORM : (
322329 "model.layers.{bid}.pre_feedforward_layernorm" , # gemma2
330+ "layers.{bid}.pre_feedforward_layernorm" , # embeddinggemma
323331 "model.layers.{bid}.pre_ff_layernorm.weight" ,
324332 ),
325333
326334 # Post feed-forward norm
327335 MODEL_TENSOR .FFN_POST_NORM : (
328336 "model.layers.{bid}.post_feedforward_layernorm" , # gemma2 olmo2
337+ "layers.{bid}.post_feedforward_layernorm" , # embeddinggemma
329338 "model.layers.{bid}.post_mlp_layernorm" , # glm-4-0414
330339 "model.layers.layers.{bid}.post_mlp_norm.weight" , # plamo2
331340 "model.layers.{bid}.feed_forward.up_proj" ,
@@ -362,6 +371,7 @@ class TensorNameMap:
362371 "transformer.h.{bid}.mlp.dense_h_to_4h" , # falcon
363372 "h.{bid}.mlp.dense_h_to_4h" , # bloom
364373 "model.layers.{bid}.mlp.up_proj" , # llama-hf refact nemotron olmo2
374+ "layers.{bid}.mlp.up_proj" , # embeddinggemma
365375 "layers.{bid}.feed_forward.w3" , # llama-pth
366376 "encoder.layer.{bid}.intermediate.dense" , # bert
367377 "transformer.layer.{bid}.ffn.lin1" , # distillbert
@@ -421,6 +431,7 @@ class TensorNameMap:
421431 # Feed-forward gate
422432 MODEL_TENSOR .FFN_GATE : (
423433 "model.layers.{bid}.mlp.gate_proj" , # llama-hf refact olmo2
434+ "layers.{bid}.mlp.gate_proj" , # embeddinggemma
424435 "layers.{bid}.feed_forward.w1" , # llama-pth
425436 "transformer.h.{bid}.mlp.w2" , # qwen
426437 "transformer.h.{bid}.mlp.c_fc2" , # jais
@@ -461,6 +472,7 @@ class TensorNameMap:
461472 "transformer.h.{bid}.mlp.dense_4h_to_h" , # falcon
462473 "h.{bid}.mlp.dense_4h_to_h" , # bloom
463474 "model.layers.{bid}.mlp.down_proj" , # llama-hf nemotron olmo2
475+ "layers.{bid}.mlp.down_proj" , # embeddinggemma
464476 "layers.{bid}.feed_forward.w2" , # llama-pth
465477 "encoder.layer.{bid}.output.dense" , # bert
466478 "transformer.layer.{bid}.ffn.lin2" , # distillbert
@@ -513,6 +525,7 @@ class TensorNameMap:
513525 "model.layers.{bid}.self_attn.q_layernorm" , # persimmon
514526 "model.layers.{bid}.self_attn.query_layernorm" , # hunyuan
515527 "model.layers.{bid}.self_attn.q_norm" , # cohere olmoe chameleon olmo2
528+ "layers.{bid}.self_attn.q_norm" , # embeddinggemma
516529 "transformer.blocks.{bid}.attn.q_ln" , # sea-lion
517530 "encoder.layer.{bid}.attention.self.layer_norm_q" , # jina-bert-v2
518531 "transformer.layers.{bid}.attn.q_norm" , # openelm
@@ -525,6 +538,7 @@ class TensorNameMap:
525538 "model.layers.{bid}.self_attn.k_layernorm" , # persimmon
526539 "model.layers.{bid}.self_attn.key_layernorm" , # hunyuan
527540 "model.layers.{bid}.self_attn.k_norm" , # cohere olmoe chameleon olmo2
541+ "layers.{bid}.self_attn.k_norm" , # embeddinggemma
528542 "transformer.blocks.{bid}.attn.k_ln" , # sea-lion
529543 "encoder.layer.{bid}.attention.self.layer_norm_k" , # jina-bert-v2
530544 "transformer.layers.{bid}.attn.k_norm" , # openelm
0 commit comments