Skip to content

Commit 41f25cf

Browse files
committed
add missing elements in py files
1 parent 837be3e commit 41f25cf

File tree

3 files changed

+68
-1
lines changed

3 files changed

+68
-1
lines changed

gguf-py/gguf/constants.py

Lines changed: 41 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,7 @@ class SSM:
166166
TIME_STEP_RANK = "{arch}.ssm.time_step_rank"
167167
GROUP_COUNT = "{arch}.ssm.group_count"
168168
DT_B_C_RMS = "{arch}.ssm.dt_b_c_rms"
169+
HEAD_DIM = "{arch}.ssm.head_dim"
169170

170171
class WKV:
171172
HEAD_SIZE = "{arch}.wkv.head_size"
@@ -348,6 +349,7 @@ class MODEL_ARCH(IntEnum):
348349
BAILINGMOE = auto()
349350
DOTS1 = auto()
350351
ARCEE = auto()
352+
FALCON_H1 = auto()
351353

352354

353355
class VISION_PROJECTOR_TYPE(IntEnum):
@@ -408,6 +410,7 @@ class MODEL_TENSOR(IntEnum):
408410
SSM_D = auto()
409411
SSM_NORM = auto()
410412
SSM_OUT = auto()
413+
SSM_MUP_VEC = auto()
411414
TIME_MIX_W0 = auto()
412415
TIME_MIX_W1 = auto()
413416
TIME_MIX_W2 = auto()
@@ -633,6 +636,7 @@ class MODEL_TENSOR(IntEnum):
633636
MODEL_ARCH.BAILINGMOE: "bailingmoe",
634637
MODEL_ARCH.DOTS1: "dots1",
635638
MODEL_ARCH.ARCEE: "arcee",
639+
MODEL_ARCH.FALCON_H1: "falcon-h1",
636640
}
637641

638642
VISION_PROJECTOR_TYPE_NAMES: dict[VISION_PROJECTOR_TYPE, str] = {
@@ -670,7 +674,7 @@ class MODEL_TENSOR(IntEnum):
670674
MODEL_TENSOR.FFN_GATE_INP: "blk.{bid}.ffn_gate_inp",
671675
MODEL_TENSOR.FFN_GATE_INP_SHEXP: "blk.{bid}.ffn_gate_inp_shexp",
672676
MODEL_TENSOR.FFN_NORM: "blk.{bid}.ffn_norm",
673-
MODEL_TENSOR.FFN_PRE_NORM: "blk.{bid}.ffn_norm",
677+
MODEL_TENSOR.FFN_PRE_NORM: "blk.{bid}.ffn_pre_norm",
674678
MODEL_TENSOR.FFN_POST_NORM: "blk.{bid}.post_ffw_norm",
675679
MODEL_TENSOR.FFN_GATE: "blk.{bid}.ffn_gate",
676680
MODEL_TENSOR.FFN_DOWN: "blk.{bid}.ffn_down",
@@ -693,6 +697,7 @@ class MODEL_TENSOR(IntEnum):
693697
MODEL_TENSOR.SSM_D: "blk.{bid}.ssm_d",
694698
MODEL_TENSOR.SSM_NORM: "blk.{bid}.ssm_norm",
695699
MODEL_TENSOR.SSM_OUT: "blk.{bid}.ssm_out",
700+
MODEL_TENSOR.SSM_MUP_VEC: "blk.{bid}.ssm_mup_vec",
696701
MODEL_TENSOR.TIME_MIX_W0: "blk.{bid}.time_mix_w0",
697702
MODEL_TENSOR.TIME_MIX_W1: "blk.{bid}.time_mix_w1",
698703
MODEL_TENSOR.TIME_MIX_W2: "blk.{bid}.time_mix_w2",
@@ -2174,6 +2179,41 @@ class MODEL_TENSOR(IntEnum):
21742179
MODEL_ARCH.BAILINGMOE: [
21752180
MODEL_TENSOR.ROPE_FREQS,
21762181
],
2182+
MODEL_ARCH.FALCON_H1: [
2183+
# Token embedding
2184+
MODEL_TENSOR.TOKEN_EMBD,
2185+
2186+
# Input layernorm
2187+
MODEL_TENSOR.ATTN_NORM,
2188+
2189+
# Attention components
2190+
MODEL_TENSOR.ATTN_Q, # Query projection
2191+
MODEL_TENSOR.ATTN_K, # Key projection
2192+
MODEL_TENSOR.ATTN_V, # Value projection
2193+
MODEL_TENSOR.ATTN_OUT, # Output projection
2194+
2195+
# SSM components (Mamba2 specific)
2196+
MODEL_TENSOR.SSM_MUP_VEC, # Mup vector
2197+
MODEL_TENSOR.SSM_IN, # Input projection for SSM
2198+
MODEL_TENSOR.SSM_CONV1D, # Convolution layer
2199+
MODEL_TENSOR.SSM_DT, # Delta time projection
2200+
MODEL_TENSOR.SSM_A, # A parameter (log form)
2201+
MODEL_TENSOR.SSM_D, # D parameter
2202+
MODEL_TENSOR.SSM_NORM, # Normalization in SSM
2203+
MODEL_TENSOR.SSM_OUT, # Output projection
2204+
2205+
# Pre-feedforward layernorm
2206+
MODEL_TENSOR.FFN_PRE_NORM,
2207+
2208+
# Feed-forward network components
2209+
MODEL_TENSOR.FFN_GATE, # Gate projection (SwiGLU)
2210+
MODEL_TENSOR.FFN_DOWN, # Down projection
2211+
MODEL_TENSOR.FFN_UP, # Up projection
2212+
2213+
# Post-feedforward layernorm
2214+
MODEL_TENSOR.OUTPUT_NORM, # Final layer norm
2215+
MODEL_TENSOR.OUTPUT, # Output projection (lm_head)
2216+
],
21772217
}
21782218

21792219
#

gguf-py/gguf/gguf_writer.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -849,6 +849,16 @@ def add_ssm_group_count(self, value: int) -> None:
849849
def add_ssm_dt_b_c_rms(self, value: bool) -> None:
850850
self.add_bool(Keys.SSM.DT_B_C_RMS.format(arch=self.arch), value)
851851

852+
def add_ssm_head_dim(self, value: int) -> None:
853+
self.add_uint32(Keys.SSM.HEAD_DIM.format(arch=self.arch), value)
854+
855+
def add_attn_head_count(self, count: int) -> None:
856+
self.add_uint32(Keys.Attention.HEAD_COUNT.format(arch=self.arch), count)
857+
858+
def add_key_value_head_count(self, count: int) -> None:
859+
self.add_uint32(Keys.Attention.HEAD_COUNT_KV.format(arch=self.arch), count)
860+
861+
852862
def add_tokenizer_model(self, model: str) -> None:
853863
self.add_string(Keys.Tokenizer.MODEL, model)
854864

gguf-py/gguf/tensor_mapping.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -286,6 +286,7 @@ class TensorNameMap:
286286
# Post feed-forward norm
287287
MODEL_TENSOR.FFN_PRE_NORM: (
288288
"model.layers.{bid}.pre_feedforward_layernorm", # gemma2
289+
"model.layers.{bid}.pre_ff_layernorm.weight", # falcon-h1
289290
),
290291

291292
# Post feed-forward norm
@@ -356,6 +357,7 @@ class TensorNameMap:
356357
"model.layers.{bid}.block_sparse_moe.experts.w3", # phimoe (merged)
357358
"model.layers.{bid}.feed_forward.experts.up_proj", # llama4
358359
"encoder.layers.{bid}.mlp.experts.mlp.w1", # nomic-bert-moe
360+
"model.layers.{bid}.feed_forward.up_proj", # falcon-h1
359361
),
360362

361363
MODEL_TENSOR.FFN_UP_SHEXP: (
@@ -392,6 +394,7 @@ class TensorNameMap:
392394
"model.layers.{bid}.mlp.experts.gate_proj", # qwen2moe olmoe (merged)
393395
"model.layers.{bid}.block_sparse_moe.experts.w1", # phimoe (merged)
394396
"model.layers.{bid}.feed_forward.experts.gate_proj", # llama4
397+
"model.layers.{bid}.feed_forward.down_proj", # falcon-h1
395398
),
396399

397400
MODEL_TENSOR.FFN_GATE_SHEXP: (
@@ -431,6 +434,14 @@ class TensorNameMap:
431434
"transformer_encoder.{bid}.ffn.w3", # neobert
432435
),
433436

437+
MODEL_TENSOR.SSM_MUP_VEC: (
438+
"model.layers.{bid}.mamba.mup_vector", # falcon-h1
439+
),
440+
441+
MODEL_TENSOR.SSM_NORM: (
442+
"model.layers.{bid}.mamba.norm",
443+
),
444+
434445
MODEL_TENSOR.FFN_DOWN_EXP: (
435446
"layers.{bid}.feed_forward.experts.w2", # mixtral (merged)
436447
"transformer.decoder_layer.{bid}.moe.linear_1", # Grok (merged)
@@ -483,11 +494,13 @@ class TensorNameMap:
483494
MODEL_TENSOR.SSM_IN: (
484495
"model.layers.{bid}.in_proj",
485496
"backbone.layers.{bid}.mixer.in_proj",
497+
"model.layers.{bid}.mamba.in_proj", # falcon-h1
486498
),
487499

488500
MODEL_TENSOR.SSM_CONV1D: (
489501
"model.layers.{bid}.conv1d",
490502
"backbone.layers.{bid}.mixer.conv1d",
503+
"model.layers.{bid}.mamba.conv1d", # falcon-h1
491504
),
492505

493506
MODEL_TENSOR.SSM_X: (
@@ -498,16 +511,19 @@ class TensorNameMap:
498511
MODEL_TENSOR.SSM_DT: (
499512
"model.layers.{bid}.dt_proj",
500513
"backbone.layers.{bid}.mixer.dt_proj",
514+
"model.layers.{bid}.mamba.dt_proj", # falcon-h1
501515
),
502516

503517
MODEL_TENSOR.SSM_A: (
504518
"model.layers.{bid}.A_log",
505519
"backbone.layers.{bid}.mixer.A_log",
520+
"model.layers.{bid}.mamba.A_log", # falcon-h1
506521
),
507522

508523
MODEL_TENSOR.SSM_D: (
509524
"model.layers.{bid}.D",
510525
"backbone.layers.{bid}.mixer.D",
526+
"model.layers.{bid}.mamba.D", # falcon-h1
511527
),
512528

513529
MODEL_TENSOR.SSM_NORM: (
@@ -517,6 +533,7 @@ class TensorNameMap:
517533
MODEL_TENSOR.SSM_OUT: (
518534
"model.layers.{bid}.out_proj",
519535
"backbone.layers.{bid}.mixer.out_proj",
536+
"model.layers.{bid}.mamba.out_proj", # falcon-h1
520537
),
521538

522539
MODEL_TENSOR.TIME_MIX_W0: (

0 commit comments

Comments
 (0)