Skip to content

Commit a631456

Browse files
committed
append mult-eos,half-rope,bos to GLM4-0414
1 parent 37b9f0d commit a631456

File tree

1 file changed

+17
-1
lines changed

1 file changed

+17
-1
lines changed

convert_hf_to_gguf.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4934,10 +4934,26 @@ class Glm4Model(Model):
49344934
model_arch = gguf.MODEL_ARCH.GLM4
49354935

49364936
def set_vocab(self):
4937-
self._set_vocab_gpt2()
4937+
from transformers import AutoTokenizer
4938+
tokenizer = AutoTokenizer.from_pretrained(self.dir_model, trust_remote_code=True)
4939+
special_vocab = gguf.SpecialVocab(self.dir_model, load_merges=True)
4940+
vocab_size = self.hparams["vocab_size"]
4941+
tokens, toktypes, tokpre = self.get_vocab_base()
4942+
self.gguf_writer.add_tokenizer_model("gpt2")
4943+
self.gguf_writer.add_tokenizer_pre(tokpre)
4944+
self.gguf_writer.add_token_list(tokens)
4945+
self.gguf_writer.add_token_types(toktypes)
4946+
special_vocab = gguf.SpecialVocab(self.dir_model, load_merges=True)
4947+
special_vocab._set_special_token("eos", tokenizer.get_added_vocab()["<|endoftext|>"])
4948+
special_vocab._set_special_token("eot", tokenizer.get_added_vocab()["<|user|>"])
4949+
special_vocab._set_special_token("unk", tokenizer.get_added_vocab()["<|endoftext|>"])
4950+
special_vocab._set_special_token("bos", tokenizer.get_added_vocab()["[gMASK]"])
4951+
special_vocab.add_to_gguf(self.gguf_writer)
49384952

49394953
def set_gguf_parameters(self):
49404954
super().set_gguf_parameters()
4955+
rope_dim = self.hparams["head_dim"]
4956+
self.gguf_writer.add_rope_dimension_count(int(rope_dim * self.hparams.get("partial_rotary_factor", 0.5)))
49414957
if self.hparams.get("rope_scaling") is not None and "factor" in self.hparams["rope_scaling"]:
49424958
if self.hparams["rope_scaling"].get("type") == "yarn":
49434959
self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.YARN)

0 commit comments

Comments
 (0)