Skip to content

Commit 7b42c07

Browse files
committed
Supports compatibility with GLM variant models, including both LLaMA and GPT-2 style tokenizers.
1 parent 1606e81 commit 7b42c07

File tree

1 file changed

+6
-1
lines changed

1 file changed

+6
-1
lines changed

convert_hf_to_gguf.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5058,14 +5058,19 @@ def set_vocab(self):
50585058
self.gguf_writer.add_token_list(tokens)
50595059
self.gguf_writer.add_token_types(toktypes)
50605060
try:
5061+
tokenizer_file = self.dir_model / 'tokenizer.json'
5062+
if not tokenizer_file.is_file():
5063+
raise ValueError("tokenizer.json not found")
5064+
50615065
# for https://huggingface.co/THUDM/glm-4-9b
50625066
special_vocab=gguf.SpecialVocab(
50635067
self.dir_model,
50645068
load_merges=True,
50655069
n_vocab=vocab_size
50665070
)
5067-
5071+
50685072
self.gguf_writer.add_tokenizer_model("gpt2")
5073+
50695074
except Exception as e:
50705075
logger.warning(f'Failed to load special tokens: {e}')
50715076
# for https://huggingface.co/THUDM/glm-4-9b-hf

0 commit comments

Comments
 (0)