@@ -6605,9 +6605,9 @@ def set_vocab(self):
66056605 self .gguf_writer .add_token_types (toktypes )
66066606
66076607 # Special tokens
6608- # BOS should be [gMASK] (151331), EOS should be <|endoftext|> (151329) as per official config
6608+ # BOS should be [gMASK] (151331), EOS should be <|endoftext|> (151329) as per tokenizer analysis
66096609 special_vocab ._set_special_token (
6610- "eos" , tokenizer .get_added_vocab ()["<|endoftext|>" ] # 151329 - official EOS token
6610+ "eos" , tokenizer .get_added_vocab ()["<|endoftext|>" ] # 151329 - correct EOS token
66116611 )
66126612 special_vocab ._set_special_token (
66136613 "eot" , tokenizer .get_added_vocab ()["<|endoftext|>" ] # 151329 - same as EOS
@@ -6620,9 +6620,25 @@ def set_vocab(self):
66206620 )
66216621 special_vocab ._set_special_token ("eom" , tokenizer .get_added_vocab ()["<|observation|>" ]) # 151338
66226622
6623- if "/nothink" in tokenizer .get_added_vocab ():
6624- special_vocab ._set_special_token ("nothink" , tokenizer .get_added_vocab ()["/nothink" ]) # 151360
6623+ if "<sop>" in tokenizer .get_added_vocab ():
6624+ special_vocab ._set_special_token ("sop" , tokenizer .get_added_vocab ()["<sop>" ]) # 151333
6625+ if "<eop>" in tokenizer .get_added_vocab ():
6626+ special_vocab ._set_special_token ("eop" , tokenizer .get_added_vocab ()["<eop>" ]) # 151334
6627+ if "[sMASK]" in tokenizer .get_added_vocab ():
6628+ special_vocab ._set_special_token ("smask" , tokenizer .get_added_vocab ()["[sMASK]" ]) # 151332
6629+
6630+ # TODO: clean up once decided on an approach to think and /nothink
6631+ #
6632+ # Previously:
6633+ # if "/nothink" in tokenizer.get_added_vocab():
6634+ # special_vocab._set_special_token("nothink", tokenizer.get_added_vocab()["/nothink"]) # 151360
66256635 # Note: <think> and </think> are regular tokens (special=false in official config), not special tokens
6636+ #
6637+ # Latest thinking is:
6638+ # NOTE: /nothink token exists but causes generation issues as mentioned in
6639+ # https://huggingface.co/zai-org/GLM-4.5/discussions/9
6640+ # "it is a very special token. Even as input, it will be encoded into a special token, causing generation issues."
6641+ # Therefore we do NOT add it to avoid generation problems
66266642
66276643 special_vocab .add_to_gguf (self .gguf_writer )
66286644
@@ -6639,6 +6655,8 @@ def set_gguf_parameters(self):
66396655 # MoE parameters - Use only routed expert count (shared experts handled separately)
66406656 if (n_routed_experts := self .hparams .get ("n_routed_experts" )) is not None :
66416657 self .gguf_writer .add_expert_count (n_routed_experts )
6658+ if (num_experts_per_tok := self .hparams .get ("num_experts_per_tok" )) is not None :
6659+ self .gguf_writer .add_expert_used_count (num_experts_per_tok )
66426660 if (moe_intermediate_size := self .hparams .get ("moe_intermediate_size" )) is not None :
66436661 self .gguf_writer .add_expert_feed_forward_length (moe_intermediate_size )
66446662 if (n_shared_experts := self .hparams .get ("n_shared_experts" )) is not None :
0 commit comments