@@ -177,7 +177,6 @@ class TensorNameMap:
177177 "transformer.layer.{bid}.attention.q_lin" , # distillbert
178178 "transformer.h.{bid}.attn.q_proj" , # gpt-j
179179 "model.layers.layers.{bid}.self_attn.q_proj" , # plamo
180- "model.layers.layers.{bid}.mixer.q" , # plamo2
181180 "model.layers.{bid}.attention.wq" , # internlm2
182181 "transformer.decoder_layer.{bid}.multi_head_attention.query" ,# Grok
183182 "transformer.h.{bid}.attn.attention.q_proj" , # exaone
@@ -194,7 +193,6 @@ class TensorNameMap:
194193 "transformer.h.{bid}.attn.k_proj" , # gpt-j
195194 "transformer.h.{bid}.attn.k" , # refact
196195 "model.layers.layers.{bid}.self_attn.k_proj" , # plamo
197- "model.layers.layers.{bid}.mixer.k" , # plamo2
198196 "model.layers.{bid}.attention.wk" , # internlm2
199197 "transformer.decoder_layer.{bid}.multi_head_attention.key" ,# Grok
200198 "transformer.h.{bid}.attn.attention.k_proj" , # exaone
@@ -472,6 +470,7 @@ class TensorNameMap:
472470 "transformer.blocks.{bid}.attn.q_ln" , # sea-lion
473471 "encoder.layer.{bid}.attention.self.layer_norm_q" , # jina-bert-v2
474472 "transformer.layers.{bid}.attn.q_norm" , # openelm
473+ "model.layers.layers.{bid}.mixer.q" , # plamo2
475474 ),
476475
477476 MODEL_TENSOR .ATTN_K_NORM : (
@@ -481,6 +480,7 @@ class TensorNameMap:
481480 "transformer.blocks.{bid}.attn.k_ln" , # sea-lion
482481 "encoder.layer.{bid}.attention.self.layer_norm_k" , # jina-bert-v2
483482 "transformer.layers.{bid}.attn.k_norm" , # openelm
483+ "model.layers.layers.{bid}.mixer.k" , # plamo2
484484 ),
485485
486486 MODEL_TENSOR .ROPE_FREQS : (
0 commit comments