@@ -134,6 +134,7 @@ class TensorNameMap:
134134 "rwkv.blocks.{bid}.ln1" , # rwkv6
135135 "model.layers.{bid}.ln1" , # rwkv7
136136 "model.layers.{bid}.input_layernorm" , # llama4
137+ "model.layers.{bid}.pre_mixer_norm" , # plamo2
137138 ),
138139
139140 # Attention norm 2
@@ -142,6 +143,7 @@ class TensorNameMap:
142143 "encoder.layer.{bid}.layer_norm_1" , # jina-v2-code
143144 "rwkv.blocks.{bid}.ln2" , # rwkv6
144145 "model.layers.{bid}.ln2" , # rwkv7
146+ "model.layers.{bid}.post_mixer_norm" , # plamo2
145147 ),
146148
147149 # Attention query-key-value
@@ -160,6 +162,7 @@ class TensorNameMap:
160162 "model.layers.{bid}.self_attn.qkv_proj" , # phi3
161163 "encoder.layers.{bid}.self_attention.query_key_value" , # chatglm
162164 "transformer.layers.{bid}.attn.qkv_proj" , # openelm
165+ "model.layers.{bid}.mixer.qkv_proj" , # plamo2
163166 ),
164167
165168 # Attention query
@@ -174,6 +177,7 @@ class TensorNameMap:
174177 "transformer.decoder_layer.{bid}.multi_head_attention.query" ,# Grok
175178 "transformer.h.{bid}.attn.attention.q_proj" , # exaone
176179 "model.layers.{bid}.self_attn.q_proj" , # llama4
180+ "model.layers.{bid}.mixer.q_weight" , # plamo2
177181 ),
178182
179183 # Attention key
@@ -189,6 +193,7 @@ class TensorNameMap:
189193 "transformer.decoder_layer.{bid}.multi_head_attention.key" ,# Grok
190194 "transformer.h.{bid}.attn.attention.k_proj" , # exaone
191195 "model.layers.{bid}.self_attn.k_proj" , # llama4
196+ "model.layers.{bid}.mixer.k_weight" , # plamo2
192197 ),
193198
194199 # Attention value
@@ -230,6 +235,7 @@ class TensorNameMap:
230235 "transformer.layers.{bid}.attn.out_proj" , # openelm
231236 "transformer.h.{bid}.attn.attention.out_proj" , # exaone
232237 "model.layers.{bid}.self_attn.o_proj" , # llama4
238+ "model.layers.{bid}.mixer.o_proj" , # plamo2
233239 ),
234240
235241 # Attention output norm
@@ -271,15 +277,17 @@ class TensorNameMap:
271277 "model.layers.{bid}.post_attention_layernorm" , # llama4
272278 ),
273279
274- # Post feed-forward norm
280+ # Pre feed-forward norm
275281 MODEL_TENSOR .FFN_PRE_NORM : (
276282 "model.layers.{bid}.pre_feedforward_layernorm" , # gemma2
283+ "model.layers.{bid}.pre_mlp_norm" , # plamo2
277284 ),
278285
279286 # Post feed-forward norm
280287 MODEL_TENSOR .FFN_POST_NORM : (
281288 "model.layers.{bid}.post_feedforward_layernorm" , # gemma2 olmo2
282- "model.layers.{bid}.post_mlp_layernorm" , # glm-4-0414
289+ "model.layers.{bid}.post_mlp_layernorm" , # glm-4-0414
290+ "model.layers.{bid}.post_mlp_norm" , # plamo2
283291 ),
284292
285293 MODEL_TENSOR .FFN_GATE_INP : (
@@ -476,7 +484,10 @@ class TensorNameMap:
476484 MODEL_TENSOR .SSM_X : (
477485 "model.layers.{bid}.x_proj" ,
478486 "backbone.layers.{bid}.mixer.x_proj" ,
479- "model.layers.{bid}.mixer.x_proj" , # plamo2
487+ ),
488+
489+ MODEL_TENSOR .SSM_BCDT : (
490+ "model.layers.{bid}.mixer.bcdt_proj" , # plamo2
480491 ),
481492
482493 MODEL_TENSOR .SSM_DT : (
@@ -485,6 +496,10 @@ class TensorNameMap:
485496 "model.layers.{bid}.mixer.dt_proj" , # plamo2
486497 ),
487498
499+ MODEL_TENSOR .SSM_DT_BIAS : (
500+ "model.layers.{bid}.mixer.dt_bias" , # plamo2
501+ ),
502+
488503 MODEL_TENSOR .SSM_A : (
489504 "model.layers.{bid}.A_log" ,
490505 "backbone.layers.{bid}.mixer.A_log" ,
@@ -497,6 +512,18 @@ class TensorNameMap:
497512 "model.layers.{bid}.mixer.D" , # plamo2
498513 ),
499514
515+ MODEL_TENSOR .SSM_DT_NORM_WEIGHT : (
516+ "model.layers.{bid}.mixer.dt_norm_weight" , # plamo2
517+ ),
518+
519+ MODEL_TENSOR .SSM_B_NORM_WEIGHT : (
520+ "model.layers.{bid}.mixer.B_norm_weight" , # plamo2
521+ ),
522+
523+ MODEL_TENSOR .SSM_C_NORM_WEIGHT : (
524+ "model.layers.{bid}.mixer.C_norm_weight" , # plamo2
525+ ),
526+
500527 MODEL_TENSOR .SSM_OUT : (
501528 "model.layers.{bid}.out_proj" ,
502529 "backbone.layers.{bid}.mixer.out_proj" ,
0 commit comments