Skip to content

Commit 3fd69c5

Browse files
committed
Add AFMOE model support
1 parent 92bb84f commit 3fd69c5

16 files changed

+635
-2
lines changed

convert_hf_to_gguf.py

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1057,6 +1057,9 @@ def get_vocab_base_pre(self, tokenizer) -> str:
10571057
if chkhsh == "f4f37b6c8eb9ea29b3eac6bb8c8487c5ab7885f8d8022e67edc1c68ce8403e95":
10581058
# ref: https://huggingface.co/MiniMaxAI/MiniMax-M2
10591059
res = "minimax-m2"
1060+
if chkhsh == "49fc0303c9e0d2c2c565c510f64b2d9b271276acdcdadff733249eda9f7d59df":
1061+
# ref: https://huggingface.co/arcee-ai/Trinity-Tokenizer
1062+
res = "afmoe"
10601063

10611064
if res is None:
10621065
logger.warning("\n")
@@ -2457,6 +2460,100 @@ def set_gguf_parameters(self):
24572460
self.gguf_writer.add_rope_scaling_orig_ctx_len(rope_scaling["original_max_position_embeddings"])
24582461

24592462

2463+
@ModelBase.register("AfmoeForCausalLM")
2464+
class AfmoeModel(LlamaModel):
2465+
model_arch = gguf.MODEL_ARCH.AFMOE
2466+
2467+
def set_gguf_parameters(self):
2468+
super().set_gguf_parameters()
2469+
2470+
# MoE parameters
2471+
if (n_experts := self.hparams.get("num_experts")) is not None:
2472+
self.gguf_writer.add_expert_count(n_experts)
2473+
if (n_shared_experts := self.hparams.get("num_shared_experts")) is not None:
2474+
self.gguf_writer.add_expert_shared_count(n_shared_experts)
2475+
if (moe_intermediate_size := self.hparams.get("moe_intermediate_size")) is not None:
2476+
self.gguf_writer.add_expert_feed_forward_length(moe_intermediate_size)
2477+
if (n_dense_layers := self.hparams.get("num_dense_layers")) is not None:
2478+
self.gguf_writer.add_leading_dense_block_count(n_dense_layers)
2479+
2480+
# Gating function (sigmoid)
2481+
if (score_func := self.hparams.get("score_func")) is not None and score_func == "sigmoid":
2482+
self.gguf_writer.add_expert_gating_func(gguf.ExpertGatingFuncType.SIGMOID)
2483+
2484+
# Route normalization and scaling
2485+
if (route_norm := self.hparams.get("route_norm")) is not None:
2486+
self.gguf_writer.add_expert_weights_norm(route_norm)
2487+
if (route_scale := self.hparams.get("route_scale")) is not None:
2488+
self.gguf_writer.add_expert_weights_scale(route_scale)
2489+
2490+
# Sliding window attention
2491+
if (sliding_window := self.hparams.get("sliding_window")) is not None:
2492+
self.gguf_writer.add_sliding_window(sliding_window)
2493+
2494+
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
2495+
# Handle expert weights - they're already merged in the HF format
2496+
# process the experts separately
2497+
if name.find("mlp.experts") != -1:
2498+
n_experts = self.hparams["num_experts"]
2499+
assert bid is not None
2500+
2501+
if self._experts is None:
2502+
self._experts = [{} for _ in range(self.block_count)]
2503+
2504+
self._experts[bid][name] = data_torch
2505+
2506+
if len(self._experts[bid]) >= n_experts * 3:
2507+
tensors: list[tuple[str, Tensor]] = []
2508+
2509+
# merge the experts into a single 3d tensor
2510+
for w_name in ["gate_proj", "up_proj", "down_proj"]:
2511+
datas: list[Tensor] = []
2512+
2513+
for xid in range(n_experts):
2514+
ename_to_retrieve = f"model.layers.{bid}.mlp.experts.{xid}.{w_name}.weight"
2515+
datas.append(self._experts[bid][ename_to_retrieve])
2516+
del self._experts[bid][ename_to_retrieve]
2517+
2518+
data_torch = torch.stack(datas, dim=0)
2519+
merged_name = f"model.layers.{bid}.mlp.experts.{w_name}.weight"
2520+
new_name = self.map_tensor_name(merged_name)
2521+
tensors.append((new_name, data_torch))
2522+
2523+
return tensors
2524+
else:
2525+
return []
2526+
2527+
# Map attention gate
2528+
elif ".self_attn.gate_proj." in name and bid is not None:
2529+
return [(self.format_tensor_name(gguf.MODEL_TENSOR.ATTN_GATE, bid), data_torch)]
2530+
2531+
# Map shared experts
2532+
elif ".mlp.shared_experts.gate_proj." in name and bid is not None:
2533+
return [(self.format_tensor_name(gguf.MODEL_TENSOR.FFN_GATE_SHEXP, bid), data_torch)]
2534+
elif ".mlp.shared_experts.up_proj." in name and bid is not None:
2535+
return [(self.format_tensor_name(gguf.MODEL_TENSOR.FFN_UP_SHEXP, bid), data_torch)]
2536+
elif ".mlp.shared_experts.down_proj." in name and bid is not None:
2537+
return [(self.format_tensor_name(gguf.MODEL_TENSOR.FFN_DOWN_SHEXP, bid), data_torch)]
2538+
2539+
# Pre FFN norm
2540+
elif ".pre_mlp_layernorm." in name and bid is not None:
2541+
return [(self.format_tensor_name(gguf.MODEL_TENSOR.FFN_PRE_NORM, bid), data_torch)]
2542+
2543+
# Post FFN norm
2544+
elif ".post_mlp_layernorm." in name and bid is not None:
2545+
return [(self.format_tensor_name(gguf.MODEL_TENSOR.FFN_POST_NORM, bid), data_torch)]
2546+
2547+
# Map router
2548+
elif ".mlp.router.gate." in name and bid is not None:
2549+
return [(self.format_tensor_name(gguf.MODEL_TENSOR.FFN_GATE_INP, bid), data_torch)]
2550+
2551+
if name.endswith(".expert_bias"):
2552+
name = name.replace(".expert_bias", ".expert_bias.bias")
2553+
2554+
return [(self.map_tensor_name(name), data_torch)]
2555+
2556+
24602557
@ModelBase.register(
24612558
"LlavaForConditionalGeneration", # pixtral
24622559
"Mistral3ForConditionalGeneration", # mistral small 3.1

convert_hf_to_gguf_update.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,7 @@ class TOKENIZER_TYPE(IntEnum):
139139
{"name": "lfm2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/LiquidAI/LFM2-Tokenizer"},
140140
{"name": "exaone4", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/LGAI-EXAONE/EXAONE-4.0-32B", },
141141
{"name": "mellum", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/JetBrains/Mellum-4b-base", },
142+
{"name": "afmoe", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/arcee-ai/Trinity-Tokenizer", },
142143
{"name": "bailingmoe2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/inclusionAI/Ling-mini-base-2.0", },
143144
{"name": "granite-docling", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/ibm-granite/granite-docling-258M", },
144145
{"name": "minimax-m2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/MiniMaxAI/MiniMax-M2", },

gguf-py/gguf/constants.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -409,6 +409,7 @@ class MODEL_ARCH(IntEnum):
409409
BAILINGMOE2 = auto()
410410
DOTS1 = auto()
411411
ARCEE = auto()
412+
AFMOE = auto()
412413
ERNIE4_5 = auto()
413414
ERNIE4_5_MOE = auto()
414415
HUNYUAN_MOE = auto()
@@ -464,6 +465,7 @@ class MODEL_TENSOR(IntEnum):
464465
ATTN_POST_NORM = auto()
465466
ATTN_ROT_EMBD = auto()
466467
ATTN_SINKS = auto()
468+
ATTN_GATE = auto()
467469
FFN_GATE_INP = auto()
468470
FFN_GATE_INP_SHEXP = auto()
469471
FFN_NORM = auto()
@@ -776,6 +778,7 @@ class MODEL_TENSOR(IntEnum):
776778
MODEL_ARCH.BAILINGMOE2: "bailingmoe2",
777779
MODEL_ARCH.DOTS1: "dots1",
778780
MODEL_ARCH.ARCEE: "arcee",
781+
MODEL_ARCH.AFMOE: "afmoe",
779782
MODEL_ARCH.ERNIE4_5: "ernie4_5",
780783
MODEL_ARCH.ERNIE4_5_MOE: "ernie4_5-moe",
781784
MODEL_ARCH.FALCON_H1: "falcon-h1",
@@ -828,6 +831,7 @@ class MODEL_TENSOR(IntEnum):
828831
MODEL_TENSOR.ATTN_OUT: "blk.{bid}.attn_output",
829832
MODEL_TENSOR.ATTN_ROT_EMBD: "blk.{bid}.attn_rot_embd",
830833
MODEL_TENSOR.ATTN_SINKS: "blk.{bid}.attn_sinks",
834+
MODEL_TENSOR.ATTN_GATE: "blk.{bid}.attn_gate",
831835
MODEL_TENSOR.ATTN_Q_NORM: "blk.{bid}.attn_q_norm",
832836
MODEL_TENSOR.ATTN_K_NORM: "blk.{bid}.attn_k_norm",
833837
MODEL_TENSOR.ATTN_OUT_NORM: "blk.{bid}.attn_output_norm",
@@ -2693,6 +2697,33 @@ class MODEL_TENSOR(IntEnum):
26932697
MODEL_TENSOR.FFN_DOWN,
26942698
MODEL_TENSOR.FFN_UP,
26952699
],
2700+
MODEL_ARCH.AFMOE: [
2701+
MODEL_TENSOR.TOKEN_EMBD,
2702+
MODEL_TENSOR.OUTPUT_NORM,
2703+
MODEL_TENSOR.OUTPUT,
2704+
MODEL_TENSOR.ATTN_NORM,
2705+
MODEL_TENSOR.ATTN_POST_NORM,
2706+
MODEL_TENSOR.ATTN_Q,
2707+
MODEL_TENSOR.ATTN_K,
2708+
MODEL_TENSOR.ATTN_V,
2709+
MODEL_TENSOR.ATTN_OUT,
2710+
MODEL_TENSOR.ATTN_Q_NORM,
2711+
MODEL_TENSOR.ATTN_K_NORM,
2712+
MODEL_TENSOR.ATTN_GATE,
2713+
MODEL_TENSOR.FFN_GATE,
2714+
MODEL_TENSOR.FFN_DOWN,
2715+
MODEL_TENSOR.FFN_UP,
2716+
MODEL_TENSOR.FFN_GATE_INP,
2717+
MODEL_TENSOR.FFN_GATE_EXP,
2718+
MODEL_TENSOR.FFN_DOWN_EXP,
2719+
MODEL_TENSOR.FFN_UP_EXP,
2720+
MODEL_TENSOR.FFN_GATE_SHEXP,
2721+
MODEL_TENSOR.FFN_UP_SHEXP,
2722+
MODEL_TENSOR.FFN_DOWN_SHEXP,
2723+
MODEL_TENSOR.FFN_PRE_NORM,
2724+
MODEL_TENSOR.FFN_POST_NORM,
2725+
MODEL_TENSOR.FFN_EXP_PROBS_B,
2726+
],
26962727
MODEL_ARCH.ERNIE4_5: [
26972728
MODEL_TENSOR.TOKEN_EMBD,
26982729
MODEL_TENSOR.OUTPUT_NORM,

gguf-py/gguf/tensor_mapping.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -340,11 +340,12 @@ class TensorNameMap:
340340
"model.layers.{bid}.feedforward_layernorm", # apertus
341341
),
342342

343-
# Post feed-forward norm
343+
# Pre feed-forward norm
344344
MODEL_TENSOR.FFN_PRE_NORM: (
345345
"model.layers.{bid}.pre_feedforward_layernorm", # gemma2
346346
"layers.{bid}.pre_feedforward_layernorm", # embeddinggemma
347347
"model.layers.{bid}.pre_ff_layernorm.weight",
348+
"model.layers.{bid}.pre_mlp_layernorm", # afmoe
348349
),
349350

350351
# Post feed-forward norm
@@ -380,6 +381,7 @@ class TensorNameMap:
380381
"model.layers.{bid}.mlp.gate.e_score_correction", # deepseek-v3 dots1
381382
"model.layers.{bid}.mlp.moe_statics.e_score_correction", # ernie4.5-moe
382383
"model.layers.{bid}.mlp.gate.expert_bias", # bailingmoe2
384+
"model.layers.{bid}.mlp.expert_bias", # afmoe
383385
"model.layers.{bid}.feed_forward.expert_bias", # lfm2moe
384386
"model.layers.{bid}.block_sparse_moe.e_score_correction", # minimax-m2
385387
),

models/ggml-vocab-afmoe.gguf

6.99 MB
Binary file not shown.

models/ggml-vocab-afmoe.gguf.inp

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
ied 4 ½ months
2+
__ggml_vocab_test__
3+
Äpfel
4+
__ggml_vocab_test__
5+
6+
__ggml_vocab_test__
7+
8+
__ggml_vocab_test__
9+
10+
__ggml_vocab_test__
11+
12+
__ggml_vocab_test__
13+
14+
__ggml_vocab_test__
15+
16+
17+
__ggml_vocab_test__
18+
19+
20+
21+
__ggml_vocab_test__
22+
23+
24+
25+
26+
__ggml_vocab_test__
27+
28+
29+
__ggml_vocab_test__
30+
Hello world
31+
__ggml_vocab_test__
32+
Hello world
33+
__ggml_vocab_test__
34+
Hello World
35+
__ggml_vocab_test__
36+
Hello World
37+
__ggml_vocab_test__
38+
Hello World!
39+
__ggml_vocab_test__
40+
Hello, world!
41+
__ggml_vocab_test__
42+
Hello, world!
43+
__ggml_vocab_test__
44+
this is 🦙.cpp
45+
__ggml_vocab_test__
46+
w048 7tuijk dsdfhu
47+
__ggml_vocab_test__
48+
нещо на Български
49+
__ggml_vocab_test__
50+
កាន់តែពិសេសអាចខលចេញ
51+
__ggml_vocab_test__
52+
🚀 (normal) 😶‍🌫️ (multiple emojis concatenated) ✅ (only emoji that has its own token)
53+
__ggml_vocab_test__
54+
Hello
55+
__ggml_vocab_test__
56+
Hello
57+
__ggml_vocab_test__
58+
Hello
59+
__ggml_vocab_test__
60+
Hello
61+
__ggml_vocab_test__
62+
Hello
63+
__ggml_vocab_test__
64+
Hello
65+
Hello
66+
__ggml_vocab_test__
67+
(
68+
__ggml_vocab_test__
69+
70+
=
71+
__ggml_vocab_test__
72+
' era
73+
__ggml_vocab_test__
74+
Hello, y'all! How are you 😁 ?我想在apple工作1314151天~
75+
__ggml_vocab_test__
76+
!!!!!!
77+
__ggml_vocab_test__
78+
3
79+
__ggml_vocab_test__
80+
33
81+
__ggml_vocab_test__
82+
333
83+
__ggml_vocab_test__
84+
3333
85+
__ggml_vocab_test__
86+
33333
87+
__ggml_vocab_test__
88+
333333
89+
__ggml_vocab_test__
90+
3333333
91+
__ggml_vocab_test__
92+
33333333
93+
__ggml_vocab_test__
94+
333333333
95+
__ggml_vocab_test__
96+
Cửa Việt
97+
__ggml_vocab_test__
98+
discards
99+
__ggml_vocab_test__
100+
101+
102+
103+
104+
105+
106+
107+
108+
109+
110+
111+
🚀 (normal) 😶‍🌫️ (multiple emojis concatenated) ✅ 🦙🦙 3 33 333 3333 33333 333333 3333333 33333333 3.3 3..3 3...3 កាន់តែពិសេសអាច😁 ?我想在apple工作1314151天~ ------======= нещо на Български ''''''```````""""......!!!!!!?????? I've been 'told he's there, 'RE you sure? 'M not sure I'll make it, 'D you like some tea? We'Ve a'lL
112+
__ggml_vocab_test__

models/ggml-vocab-afmoe.gguf.out

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
1129 252 51 252 20861 3621
2+
49116 25524 343
3+
4+
252
5+
288
6+
344
7+
229
8+
230
9+
327
10+
1866
11+
4402
12+
14795 1117
13+
30197 1117
14+
14795 3295
15+
30197 3295
16+
30197 3295 32
17+
14795 43 1117 32
18+
30197 43 1117 32
19+
483 351 69865 279 45 11112
20+
118 18799 252 54 115 4546 30869 25372 4191 13934
21+
23835 183893 7432 30515 125974 185839 20324
22+
124940 92255 273 160060 191869 44968 256 188211 21207 147 142156 195704 142156 21207 127 92255 259 21207 255 190792 21207 259 195704 21207 263
23+
12479 387 10171 40 22860 146 18932 15540 136 10094 387 49707 77415 91293 40 70574 387 9266 56494 384 651 692 1204 9776 40
24+
14795
25+
30197
26+
252 30197
27+
288 30197
28+
344 30197
29+
344 30197 230 344 30197
30+
387
31+
230 399
32+
38 6260
33+
14795 43 366 76896 32 822 429 383 22860 255 2972 111778 3712 27304 19409 48 23988 18044 13814 73996
34+
183574
35+
50
36+
2158
37+
11805
38+
50 11805
39+
2158 11805
40+
11805 11805
41+
50 11805 11805
42+
2158 11805 11805
43+
11805 11805 11805
44+
66 70789 96 140747
45+
104867
46+
144635 20623 120822 22300 4402 71947 2759 24373 12479 387 10171 40 22860 146 18932 15540 136 10094 387 49707 77415 91293 40 70574 69865 279 63816 279 252 50 252 2158 252 11805 252 50 11805 252 2158 11805 252 11805 11805 252 50 11805 11805 252 2158 11805 11805 252 50 45 50 252 50 634 50 252 50 1472 50 252 124940 92255 273 160060 191869 44968 256 188211 21207 147 142156 195704 142156 21207 127 92255 259 45614 255 2972 111778 3712 27304 19409 48 23988 18044 13814 73996 79520 1235 23427 13373 183893 7432 30515 125974 185839 20324 27123 36632 25121 3124 36057 36678 183574 31148 10446 365 1908 874 578 63490 438 414 765 43 578 1954 383 2259 62 578 76 487 2259 365 2130 960 394 43 578 67 383 679 766 8748 62 1155 38 35185 290 66450 75

src/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ add_library(llama
3535
unicode-data.cpp
3636
unicode.cpp
3737
unicode.h
38+
models/afmoe.cpp
3839
models/apertus.cpp
3940
models/arcee.cpp
4041
models/arctic.cpp

0 commit comments

Comments
 (0)