@@ -13,7 +13,7 @@ class TensorNameMap:
1313 "transformer.wte" , # gpt2 gpt-j mpt refact qwen dbrx jais
1414 "transformer.word_embeddings" , # falcon
1515 "word_embeddings" , # bloom
16- "model.embed_tokens" , # llama-hf
16+ "model.embed_tokens" , # llama-hf nemotron
1717 "tok_embeddings" , # llama-pth
1818 "embeddings.word_embeddings" , # bert nomic-bert
1919 "language_model.embedding.word_embeddings" , # persimmon
@@ -52,7 +52,7 @@ class TensorNameMap:
5252 # Output
5353 MODEL_TENSOR .OUTPUT : (
5454 "embed_out" , # gptneox
55- "lm_head" , # gpt2 mpt falcon llama-hf baichuan qwen mamba dbrx jais
55+ "lm_head" , # gpt2 mpt falcon llama-hf baichuan qwen mamba dbrx jais nemotron
5656 "output" , # llama-pth bloom internlm2
5757 "word_embeddings_for_head" , # persimmon
5858 "lm_head.linear" , # phi2
@@ -75,6 +75,7 @@ class TensorNameMap:
7575 "transformer.rms_norm" , # Grok
7676 "encoder.final_layernorm" , # chatglm
7777 "transformer.norm" , # openelm
78+ "model.norm" , # nemotron
7879 ),
7980
8081 # Rope frequencies
@@ -93,7 +94,7 @@ class TensorNameMap:
9394 "transformer.h.{bid}.input_layernorm" , # falcon7b
9495 "h.{bid}.input_layernorm" , # bloom
9596 "transformer.h.{bid}.ln_mlp" , # falcon40b
96- "model.layers.{bid}.input_layernorm" , # llama-hf
97+ "model.layers.{bid}.input_layernorm" , # llama-hf nemotron
9798 "layers.{bid}.attention_norm" , # llama-pth
9899 "language_model.encoder.layers.{bid}.input_layernorm" , # persimmon
99100 "model.layers.{bid}.ln1" , # yi
@@ -135,7 +136,7 @@ class TensorNameMap:
135136
136137 # Attention query
137138 MODEL_TENSOR .ATTN_Q : (
138- "model.layers.{bid}.self_attn.q_proj" , # llama-hf
139+ "model.layers.{bid}.self_attn.q_proj" , # llama-hf nemotron
139140 "layers.{bid}.attention.wq" , # llama-pth
140141 "encoder.layer.{bid}.attention.self.query" , # bert
141142 "transformer.h.{bid}.attn.q_proj" , # gpt-j
@@ -146,7 +147,7 @@ class TensorNameMap:
146147
147148 # Attention key
148149 MODEL_TENSOR .ATTN_K : (
149- "model.layers.{bid}.self_attn.k_proj" , # llama-hf
150+ "model.layers.{bid}.self_attn.k_proj" , # llama-hf nemotron
150151 "layers.{bid}.attention.wk" , # llama-pth
151152 "encoder.layer.{bid}.attention.self.key" , # bert
152153 "transformer.h.{bid}.attn.k_proj" , # gpt-j
@@ -158,7 +159,7 @@ class TensorNameMap:
158159
159160 # Attention value
160161 MODEL_TENSOR .ATTN_V : (
161- "model.layers.{bid}.self_attn.v_proj" , # llama-hf
162+ "model.layers.{bid}.self_attn.v_proj" , # llama-hf nemotron
162163 "layers.{bid}.attention.wv" , # llama-pth
163164 "encoder.layer.{bid}.attention.self.value" , # bert
164165 "transformer.h.{bid}.attn.v_proj" , # gpt-j
@@ -175,7 +176,7 @@ class TensorNameMap:
175176 "transformer.blocks.{bid}.attn.out_proj" , # mpt
176177 "transformer.h.{bid}.self_attention.dense" , # falcon
177178 "h.{bid}.self_attention.dense" , # bloom
178- "model.layers.{bid}.self_attn.o_proj" , # llama-hf
179+ "model.layers.{bid}.self_attn.o_proj" , # llama-hf nemotron
179180 "layers.{bid}.attention.wo" , # llama-pth
180181 "encoder.layer.{bid}.attention.output.dense" , # bert
181182 "transformer.h.{bid}.attn.out_proj" , # gpt-j
@@ -218,7 +219,7 @@ class TensorNameMap:
218219 "transformer.h.{bid}.ln_2" , # gpt2 refact qwen jais
219220 "h.{bid}.post_attention_layernorm" , # bloom
220221 "transformer.blocks.{bid}.norm_2" , # mpt
221- "model.layers.{bid}.post_attention_layernorm" , # llama-hf
222+ "model.layers.{bid}.post_attention_layernorm" , # llama-hf nemotron
222223 "layers.{bid}.ffn_norm" , # llama-pth
223224 "language_model.encoder.layers.{bid}.post_attention_layernorm" , # persimmon
224225 "model.layers.{bid}.ln2" , # yi
@@ -258,7 +259,7 @@ class TensorNameMap:
258259 "transformer.blocks.{bid}.ffn.up_proj" , # mpt
259260 "transformer.h.{bid}.mlp.dense_h_to_4h" , # falcon
260261 "h.{bid}.mlp.dense_h_to_4h" , # bloom
261- "model.layers.{bid}.mlp.up_proj" , # llama-hf refact
262+ "model.layers.{bid}.mlp.up_proj" , # llama-hf refact nemotron
262263 "layers.{bid}.feed_forward.w3" , # llama-pth
263264 "encoder.layer.{bid}.intermediate.dense" , # bert
264265 "transformer.h.{bid}.mlp.fc_in" , # gpt-j
@@ -329,7 +330,7 @@ class TensorNameMap:
329330 "transformer.blocks.{bid}.ffn.down_proj" , # mpt
330331 "transformer.h.{bid}.mlp.dense_4h_to_h" , # falcon
331332 "h.{bid}.mlp.dense_4h_to_h" , # bloom
332- "model.layers.{bid}.mlp.down_proj" , # llama-hf
333+ "model.layers.{bid}.mlp.down_proj" , # llama-hf nemotron
333334 "layers.{bid}.feed_forward.w2" , # llama-pth
334335 "encoder.layer.{bid}.output.dense" , # bert
335336 "transformer.h.{bid}.mlp.fc_out" , # gpt-j
0 commit comments