Skip to content

Commit cc39800

Browse files
Add support for bitnet2b_2501 model (#337)
* add support for bitnet2b_2501 model * Fixes * Support both model names --------- Co-authored-by: potassiummmm <[email protected]>
1 parent 93cd77b commit cc39800

File tree

4 files changed

+330
-1
lines changed

4 files changed

+330
-1
lines changed

convert_hf_to_gguf.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1598,6 +1598,7 @@ def prepare_tensors(self):
15981598

15991599

16001600
@Model.register("BitnetForCausalLM")
1601+
@Model.register("BitNetForCausalLM")
16011602
class BitnetModel(Model):
16021603
model_arch = gguf.MODEL_ARCH.BITNET
16031604

gguf-py/gguf/constants.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -219,6 +219,7 @@ class MODEL_ARCH(IntEnum):
219219
DEEPSEEK2 = auto()
220220
CHATGLM = auto()
221221
BITNET = auto()
222+
BITNET_25 = auto()
222223
T5 = auto()
223224
T5ENCODER = auto()
224225
JAIS = auto()
@@ -351,6 +352,7 @@ class MODEL_TENSOR(IntEnum):
351352
MODEL_ARCH.DEEPSEEK2: "deepseek2",
352353
MODEL_ARCH.CHATGLM: "chatglm",
353354
MODEL_ARCH.BITNET: "bitnet",
355+
MODEL_ARCH.BITNET_25: "bitnet-25",
354356
MODEL_ARCH.T5: "t5",
355357
MODEL_ARCH.T5ENCODER: "t5encoder",
356358
MODEL_ARCH.JAIS: "jais",
@@ -1019,6 +1021,28 @@ class MODEL_TENSOR(IntEnum):
10191021
MODEL_TENSOR.ATTN_SUB_NORM,
10201022
MODEL_TENSOR.FFN_SUB_NORM,
10211023
],
1024+
MODEL_ARCH.BITNET_25: [
1025+
MODEL_TENSOR.TOKEN_EMBD,
1026+
MODEL_TENSOR.OUTPUT_NORM,
1027+
MODEL_TENSOR.OUTPUT,
1028+
MODEL_TENSOR.ROPE_FREQS,
1029+
MODEL_TENSOR.ATTN_NORM,
1030+
MODEL_TENSOR.ATTN_Q,
1031+
MODEL_TENSOR.ATTN_K,
1032+
MODEL_TENSOR.ATTN_V,
1033+
MODEL_TENSOR.ATTN_OUT,
1034+
MODEL_TENSOR.ATTN_ROT_EMBD,
1035+
MODEL_TENSOR.FFN_GATE_INP,
1036+
MODEL_TENSOR.FFN_NORM,
1037+
MODEL_TENSOR.FFN_GATE,
1038+
MODEL_TENSOR.FFN_DOWN,
1039+
MODEL_TENSOR.FFN_UP,
1040+
MODEL_TENSOR.FFN_GATE_EXP,
1041+
MODEL_TENSOR.FFN_DOWN_EXP,
1042+
MODEL_TENSOR.FFN_UP_EXP,
1043+
MODEL_TENSOR.ATTN_SUB_NORM,
1044+
MODEL_TENSOR.FFN_SUB_NORM,
1045+
],
10221046
MODEL_ARCH.T5: [
10231047
MODEL_TENSOR.TOKEN_EMBD,
10241048
MODEL_TENSOR.OUTPUT,

gguf-py/gguf/tensor_mapping.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,7 @@ class TensorNameMap:
131131
"model.layers.{bid}.self_attn.qkv_proj", # phi3
132132
"encoder.layers.{bid}.self_attention.query_key_value", # chatglm
133133
"transformer.layers.{bid}.attn.qkv_proj", # openelm
134+
"layers.{bid}.attention.wqkv",
134135
),
135136

136137
# Attention query
@@ -464,10 +465,14 @@ class TensorNameMap:
464465

465466
MODEL_TENSOR.ATTN_SUB_NORM: (
466467
"model.layers.{bid}.self_attn.inner_attn_ln", # bitnet
468+
"layers.{bid}.attention.attn_sub_norm", # bitnet
469+
"model.layers.{bid}.self_attn.attn_sub_norm",
467470
),
468471

469472
MODEL_TENSOR.FFN_SUB_NORM: (
470473
"model.layers.{bid}.mlp.ffn_layernorm", # bitnet
474+
"layers.{bid}.feed_forward.ffn_sub_norm", # bitnet
475+
"model.layers.{bid}.mlp.ffn_sub_norm",
471476
),
472477

473478
MODEL_TENSOR.DEC_ATTN_NORM: (

0 commit comments

Comments
 (0)