@@ -6605,12 +6605,12 @@ def set_vocab(self):
66056605 self .gguf_writer .add_token_types (toktypes )
66066606
66076607 # Special tokens
6608- # BOS should be [gMASK] (151331), EOT should be <|endoftext|> (151329)
6608+ # BOS should be [gMASK] (151331), EOS should be <|endoftext|> (151329) as per official config
66096609 special_vocab ._set_special_token (
6610- "eos" , tokenizer .get_added_vocab ()["<|endoftext|>" ]
6610+ "eos" , tokenizer .get_added_vocab ()["<|endoftext|>" ] # 151329 - official EOS token
66116611 )
66126612 special_vocab ._set_special_token (
6613- "eot" , tokenizer .get_added_vocab ()["<|endoftext|>" ]
6613+ "eot" , tokenizer .get_added_vocab ()["<|endoftext|>" ] # 151329 - same as EOS
66146614 )
66156615 special_vocab ._set_special_token (
66166616 "unk" , tokenizer .get_added_vocab ()["<|endoftext|>" ]
@@ -6620,6 +6620,9 @@ def set_vocab(self):
66206620 )
66216621 special_vocab ._set_special_token ("eom" , tokenizer .get_added_vocab ()["<|observation|>" ]) # 151338
66226622
6623+ if "/nothink" in tokenizer .get_added_vocab ():
6624+ special_vocab ._set_special_token ("nothink" , tokenizer .get_added_vocab ()["/nothink" ]) # 151360
6625+ # Note: <think> and </think> are regular tokens (special=false in official config), not special tokens
66236626
66246627 special_vocab .add_to_gguf (self .gguf_writer )
66256628
@@ -6654,6 +6657,9 @@ def set_gguf_parameters(self):
66546657 if (norm_topk_prob := self .hparams .get ("norm_topk_prob" )) is not None :
66556658 self .gguf_writer .add_expert_weights_norm (norm_topk_prob )
66566659
6660+ # GLM models should not prepend BOS token
6661+ self .gguf_writer .add_add_bos_token (False )
6662+
66576663 _experts : list [dict [str , Tensor ]] | None = None
66586664 _shared_experts : list [dict [str , Tensor ]] | None = None
66596665
0 commit comments