Skip to content

Commit 5c04994

Browse files
committed
Fix mappings
1 parent 8fcfba1 commit 5c04994

File tree

2 files changed

+49
-6
lines changed

2 files changed

+49
-6
lines changed

gguf-py/gguf/constants.py

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -396,10 +396,15 @@ class MODEL_TENSOR(IntEnum):
396396
SSM_IN = auto()
397397
SSM_CONV1D = auto()
398398
SSM_X = auto()
399+
SSM_BCDT = auto()
399400
SSM_DT = auto()
401+
SSM_DT_BIAS = auto()
400402
SSM_A = auto()
401403
SSM_D = auto()
402404
SSM_OUT = auto()
405+
SSM_DT_NORM_WEIGHT = auto()
406+
SSM_B_NORM_WEIGHT = auto()
407+
SSM_C_NORM_WEIGHT = auto()
403408
TIME_MIX_W0 = auto()
404409
TIME_MIX_W1 = auto()
405410
TIME_MIX_W2 = auto()
@@ -677,9 +682,14 @@ class MODEL_TENSOR(IntEnum):
677682
MODEL_TENSOR.SSM_IN: "blk.{bid}.ssm_in",
678683
MODEL_TENSOR.SSM_CONV1D: "blk.{bid}.ssm_conv1d",
679684
MODEL_TENSOR.SSM_X: "blk.{bid}.ssm_x",
685+
MODEL_TENSOR.SSM_BCDT: "blk.{bid}.ssm_bcdt",
680686
MODEL_TENSOR.SSM_DT: "blk.{bid}.ssm_dt",
687+
MODEL_TENSOR.SSM_DT_BIAS: "blk.{bid}.ssm_dt_bias",
681688
MODEL_TENSOR.SSM_A: "blk.{bid}.ssm_a",
682689
MODEL_TENSOR.SSM_D: "blk.{bid}.ssm_d",
690+
MODEL_TENSOR.SSM_DT_NORM_WEIGHT: "blk.{bid}.ssm_dt_norm_weight",
691+
MODEL_TENSOR.SSM_B_NORM_WEIGHT: "blk.{bid}.ssm_b_norm_weight",
692+
MODEL_TENSOR.SSM_C_NORM_WEIGHT: "blk.{bid}.ssm_c_norm_weight",
683693
MODEL_TENSOR.SSM_OUT: "blk.{bid}.ssm_out",
684694
MODEL_TENSOR.TIME_MIX_W0: "blk.{bid}.time_mix_w0",
685695
MODEL_TENSOR.TIME_MIX_W1: "blk.{bid}.time_mix_w1",
@@ -1280,23 +1290,29 @@ class MODEL_TENSOR(IntEnum):
12801290
MODEL_TENSOR.OUTPUT,
12811291
MODEL_TENSOR.ROPE_FREQS,
12821292
MODEL_TENSOR.ATTN_NORM,
1293+
MODEL_TENSOR.ATTN_NORM_2,
1294+
MODEL_TENSOR.ATTN_QKV,
12831295
MODEL_TENSOR.ATTN_Q,
12841296
MODEL_TENSOR.ATTN_K,
1285-
MODEL_TENSOR.ATTN_V,
12861297
MODEL_TENSOR.ATTN_OUT,
12871298
MODEL_TENSOR.ATTN_ROT_EMBD,
1288-
MODEL_TENSOR.FFN_NORM,
1299+
MODEL_TENSOR.FFN_PRE_NORM,
1300+
MODEL_TENSOR.FFN_POST_NORM,
12891301
MODEL_TENSOR.FFN_GATE,
12901302
MODEL_TENSOR.FFN_DOWN,
12911303
MODEL_TENSOR.FFN_UP,
12921304
# SSM/Mamba tensors for hybrid architecture
12931305
MODEL_TENSOR.SSM_IN,
12941306
MODEL_TENSOR.SSM_CONV1D,
1295-
MODEL_TENSOR.SSM_X,
1307+
MODEL_TENSOR.SSM_BCDT,
12961308
MODEL_TENSOR.SSM_DT,
1309+
MODEL_TENSOR.SSM_DT_BIAS,
12971310
MODEL_TENSOR.SSM_A,
12981311
MODEL_TENSOR.SSM_D,
12991312
MODEL_TENSOR.SSM_OUT,
1313+
MODEL_TENSOR.SSM_DT_NORM_WEIGHT,
1314+
MODEL_TENSOR.SSM_B_NORM_WEIGHT,
1315+
MODEL_TENSOR.SSM_C_NORM_WEIGHT,
13001316
],
13011317
MODEL_ARCH.GPT2: [
13021318
MODEL_TENSOR.TOKEN_EMBD,

gguf-py/gguf/tensor_mapping.py

Lines changed: 30 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,7 @@ class TensorNameMap:
134134
"rwkv.blocks.{bid}.ln1", # rwkv6
135135
"model.layers.{bid}.ln1", # rwkv7
136136
"model.layers.{bid}.input_layernorm", # llama4
137+
"model.layers.{bid}.pre_mixer_norm", # plamo2
137138
),
138139

139140
# Attention norm 2
@@ -142,6 +143,7 @@ class TensorNameMap:
142143
"encoder.layer.{bid}.layer_norm_1", # jina-v2-code
143144
"rwkv.blocks.{bid}.ln2", # rwkv6
144145
"model.layers.{bid}.ln2", # rwkv7
146+
"model.layers.{bid}.post_mixer_norm", # plamo2
145147
),
146148

147149
# Attention query-key-value
@@ -160,6 +162,7 @@ class TensorNameMap:
160162
"model.layers.{bid}.self_attn.qkv_proj", # phi3
161163
"encoder.layers.{bid}.self_attention.query_key_value", # chatglm
162164
"transformer.layers.{bid}.attn.qkv_proj", # openelm
165+
"model.layers.{bid}.mixer.qkv_proj", # plamo2
163166
),
164167

165168
# Attention query
@@ -174,6 +177,7 @@ class TensorNameMap:
174177
"transformer.decoder_layer.{bid}.multi_head_attention.query",# Grok
175178
"transformer.h.{bid}.attn.attention.q_proj", # exaone
176179
"model.layers.{bid}.self_attn.q_proj", # llama4
180+
"model.layers.{bid}.mixer.q_weight", # plamo2
177181
),
178182

179183
# Attention key
@@ -189,6 +193,7 @@ class TensorNameMap:
189193
"transformer.decoder_layer.{bid}.multi_head_attention.key",# Grok
190194
"transformer.h.{bid}.attn.attention.k_proj", # exaone
191195
"model.layers.{bid}.self_attn.k_proj", # llama4
196+
"model.layers.{bid}.mixer.k_weight", # plamo2
192197
),
193198

194199
# Attention value
@@ -230,6 +235,7 @@ class TensorNameMap:
230235
"transformer.layers.{bid}.attn.out_proj", # openelm
231236
"transformer.h.{bid}.attn.attention.out_proj", # exaone
232237
"model.layers.{bid}.self_attn.o_proj", # llama4
238+
"model.layers.{bid}.mixer.o_proj", # plamo2
233239
),
234240

235241
# Attention output norm
@@ -271,15 +277,17 @@ class TensorNameMap:
271277
"model.layers.{bid}.post_attention_layernorm", # llama4
272278
),
273279

274-
# Post feed-forward norm
280+
# Pre feed-forward norm
275281
MODEL_TENSOR.FFN_PRE_NORM: (
276282
"model.layers.{bid}.pre_feedforward_layernorm", # gemma2
283+
"model.layers.{bid}.pre_mlp_norm", # plamo2
277284
),
278285

279286
# Post feed-forward norm
280287
MODEL_TENSOR.FFN_POST_NORM: (
281288
"model.layers.{bid}.post_feedforward_layernorm", # gemma2 olmo2
282-
"model.layers.{bid}.post_mlp_layernorm", # glm-4-0414
289+
"model.layers.{bid}.post_mlp_layernorm", # glm-4-0414
290+
"model.layers.{bid}.post_mlp_norm", # plamo2
283291
),
284292

285293
MODEL_TENSOR.FFN_GATE_INP: (
@@ -476,7 +484,10 @@ class TensorNameMap:
476484
MODEL_TENSOR.SSM_X: (
477485
"model.layers.{bid}.x_proj",
478486
"backbone.layers.{bid}.mixer.x_proj",
479-
"model.layers.{bid}.mixer.x_proj", # plamo2
487+
),
488+
489+
MODEL_TENSOR.SSM_BCDT: (
490+
"model.layers.{bid}.mixer.bcdt_proj", # plamo2
480491
),
481492

482493
MODEL_TENSOR.SSM_DT: (
@@ -485,6 +496,10 @@ class TensorNameMap:
485496
"model.layers.{bid}.mixer.dt_proj", # plamo2
486497
),
487498

499+
MODEL_TENSOR.SSM_DT_BIAS: (
500+
"model.layers.{bid}.mixer.dt_bias", # plamo2
501+
),
502+
488503
MODEL_TENSOR.SSM_A: (
489504
"model.layers.{bid}.A_log",
490505
"backbone.layers.{bid}.mixer.A_log",
@@ -497,6 +512,18 @@ class TensorNameMap:
497512
"model.layers.{bid}.mixer.D", # plamo2
498513
),
499514

515+
MODEL_TENSOR.SSM_DT_NORM_WEIGHT: (
516+
"model.layers.{bid}.mixer.dt_norm_weight", # plamo2
517+
),
518+
519+
MODEL_TENSOR.SSM_B_NORM_WEIGHT: (
520+
"model.layers.{bid}.mixer.B_norm_weight", # plamo2
521+
),
522+
523+
MODEL_TENSOR.SSM_C_NORM_WEIGHT: (
524+
"model.layers.{bid}.mixer.C_norm_weight", # plamo2
525+
),
526+
500527
MODEL_TENSOR.SSM_OUT: (
501528
"model.layers.{bid}.out_proj",
502529
"backbone.layers.{bid}.mixer.out_proj",

0 commit comments

Comments
 (0)