Supports compatibility with GLM variant models, including both LLaMA and GPT-2 style tokenizers.

glide-the · glide-the · commit 7b42c07d1c88 · 2025-04-23T13:08:43.000+08:00
diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py
@@ -5058,14 +5058,19 @@ def set_vocab(self):
         self.gguf_writer.add_token_list(tokens)
         self.gguf_writer.add_token_types(toktypes)
         try:
+            tokenizer_file = self.dir_model / 'tokenizer.json'
+            if not tokenizer_file.is_file():  
+                raise ValueError("tokenizer.json not found")
+                  
             # for https://huggingface.co/THUDM/glm-4-9b
             special_vocab=gguf.SpecialVocab(
                 self.dir_model, 
                 load_merges=True,
                 n_vocab=vocab_size
             ) 
-            
+        
             self.gguf_writer.add_tokenizer_model("gpt2")
+                
         except Exception as e:
             logger.warning(f'Failed to load special tokens: {e}')
             # for https://huggingface.co/THUDM/glm-4-9b-hf