@@ -10,7 +10,7 @@ class TensorNameMap:
1010 # Token embeddings
1111 MODEL_TENSOR .TOKEN_EMBD : (
1212 "gpt_neox.embed_in" , # gptneox
13- "transformer.wte" , # gpt2 gpt-j mpt refact qwen dbrx jais
13+ "transformer.wte" , # gpt2 gpt-j mpt refact qwen dbrx jais exaone
1414 "transformer.word_embeddings" , # falcon
1515 "word_embeddings" , # bloom
1616 "model.embed_tokens" , # llama-hf nemotron
@@ -52,7 +52,7 @@ class TensorNameMap:
5252 # Output
5353 MODEL_TENSOR .OUTPUT : (
5454 "embed_out" , # gptneox
55- "lm_head" , # gpt2 mpt falcon llama-hf baichuan qwen mamba dbrx jais nemotron
55+ "lm_head" , # gpt2 mpt falcon llama-hf baichuan qwen mamba dbrx jais nemotron exaone
5656 "output" , # llama-pth bloom internlm2
5757 "word_embeddings_for_head" , # persimmon
5858 "lm_head.linear" , # phi2
@@ -62,7 +62,7 @@ class TensorNameMap:
6262 # Output norm
6363 MODEL_TENSOR .OUTPUT_NORM : (
6464 "gpt_neox.final_layer_norm" , # gptneox
65- "transformer.ln_f" , # gpt2 gpt-j falcon jais
65+ "transformer.ln_f" , # gpt2 gpt-j falcon jais exaone
6666 "model.norm" , # llama-hf baichuan internlm2
6767 "norm" , # llama-pth
6868 "transformer.norm_f" , # mpt dbrx
@@ -89,7 +89,7 @@ class TensorNameMap:
8989 # Attention norm
9090 MODEL_TENSOR .ATTN_NORM : (
9191 "gpt_neox.layers.{bid}.input_layernorm" , # gptneox
92- "transformer.h.{bid}.ln_1" , # gpt2 gpt-j refact qwen jais
92+ "transformer.h.{bid}.ln_1" , # gpt2 gpt-j refact qwen jais exaone
9393 "transformer.blocks.{bid}.norm_1" , # mpt
9494 "transformer.h.{bid}.input_layernorm" , # falcon7b
9595 "h.{bid}.input_layernorm" , # bloom
@@ -143,6 +143,7 @@ class TensorNameMap:
143143 "model.layers.layers.{bid}.self_attn.q_proj" , # plamo
144144 "model.layers.{bid}.attention.wq" , # internlm2
145145 "transformer.decoder_layer.{bid}.multi_head_attention.query" ,# Grok
146+ "transformer.h.{bid}.attn.attention.q_proj" , # exaone
146147 ),
147148
148149 # Attention key
@@ -155,6 +156,7 @@ class TensorNameMap:
155156 "model.layers.layers.{bid}.self_attn.k_proj" , # plamo
156157 "model.layers.{bid}.attention.wk" , # internlm2
157158 "transformer.decoder_layer.{bid}.multi_head_attention.key" ,# Grok
159+ "transformer.h.{bid}.attn.attention.k_proj" , # exaone
158160 ),
159161
160162 # Attention value
@@ -166,7 +168,8 @@ class TensorNameMap:
166168 "transformer.h.{bid}.attn.v" , # refact
167169 "model.layers.layers.{bid}.self_attn.v_proj" , # plamo
168170 "model.layers.{bid}.attention.wv" , # internlm2
169- "transformer.decoder_layer.{bid}.multi_head_attention.value" # Grok
171+ "transformer.decoder_layer.{bid}.multi_head_attention.value" ,# Grok
172+ "transformer.h.{bid}.attn.attention.v_proj" , # exaone
170173 ),
171174
172175 # Attention output
@@ -191,6 +194,7 @@ class TensorNameMap:
191194 "transformer.blocks.{bid}.norm_attn_norm.attn.out_proj" , # dbrx
192195 "encoder.layers.{bid}.self_attention.dense" , # chatglm
193196 "transformer.layers.{bid}.attn.out_proj" , # openelm
197+ "transformer.h.{bid}.attn.attention.out_proj" , # exaone
194198 ),
195199
196200 # Attention output norm
@@ -216,7 +220,7 @@ class TensorNameMap:
216220 # Feed-forward norm
217221 MODEL_TENSOR .FFN_NORM : (
218222 "gpt_neox.layers.{bid}.post_attention_layernorm" , # gptneox
219- "transformer.h.{bid}.ln_2" , # gpt2 refact qwen jais
223+ "transformer.h.{bid}.ln_2" , # gpt2 refact qwen jais exaone
220224 "h.{bid}.post_attention_layernorm" , # bloom
221225 "transformer.blocks.{bid}.norm_2" , # mpt
222226 "model.layers.{bid}.post_attention_layernorm" , # llama-hf nemotron
@@ -278,6 +282,7 @@ class TensorNameMap:
278282 "encoder.layer.{bid}.mlp.gated_layers_v" , # jina-bert-v2
279283 "model.layers.{bid}.residual_mlp.w3" , # arctic
280284 "encoder.layers.{bid}.mlp.dense_h_to_4h" , # chatglm
285+ "transformer.h.{bid}.mlp.c_fc_1" , # exaone
281286 ),
282287
283288 MODEL_TENSOR .FFN_UP_EXP : (
@@ -309,6 +314,7 @@ class TensorNameMap:
309314 "encoder.layer.{bid}.mlp.gated_layers_w" , # jina-bert-v2
310315 "transformer.h.{bid}.mlp.linear_1" , # refact
311316 "model.layers.{bid}.residual_mlp.w1" , # arctic
317+ "transformer.h.{bid}.mlp.c_fc_0" , # exaone
312318 ),
313319
314320 MODEL_TENSOR .FFN_GATE_EXP : (
@@ -348,6 +354,7 @@ class TensorNameMap:
348354 "model.layers.{bid}.residual_mlp.w2" , # arctic
349355 "encoder.layer.{bid}.mlp.down_layer" , # jina-bert-v2
350356 "encoder.layers.{bid}.mlp.dense_4h_to_h" , # chatglm
357+ "model.layers.h.{bid}.mlp.c_proj" , # exaone
351358 ),
352359
353360 MODEL_TENSOR .FFN_DOWN_EXP : (
0 commit comments