Skip to content

Commit 45e1884

Browse files
committed
check config["text_config"] for special tokens
1 parent caa6e10 commit 45e1884

File tree

2 files changed

+5
-14
lines changed

2 files changed

+5
-14
lines changed

convert_hf_to_gguf.py

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -6113,19 +6113,6 @@ def set_vocab(self):
61136113
self.gguf_writer.add_token_merges(merges)
61146114

61156115
special_vocab = gguf.SpecialVocab(self.dir_model, load_merges=False)
6116-
6117-
# fix for Kimi-VL: Use <|im_end|> as EOS token instead of [EOS]
6118-
# This ensures text generation stops correctly at sentence boundaries, rather than at commas (which would happen with wrong EOS token)
6119-
if self.hf_arch == "KimiVLForConditionalGeneration":
6120-
im_end_id = None
6121-
for i, token in enumerate(tokens):
6122-
if token == "<|im_end|>":
6123-
im_end_id = i
6124-
break
6125-
if im_end_id is not None:
6126-
logger.info(f"Kimi-VL: Overriding EOS token from {special_vocab.special_token_ids.get('eos', 'N/A')} to <|im_end|> (ID: {im_end_id})")
6127-
special_vocab.special_token_ids["eos"] = im_end_id
6128-
61296116
special_vocab.add_to_gguf(self.gguf_writer)
61306117
else:
61316118
raise NotImplementedError(f"Deepseek pre-tokenizer {tokpre!r} is not supported yet!")

gguf-py/gguf/vocab.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -312,7 +312,11 @@ def _try_load_from_config_json(self, path: Path) -> bool:
312312
with open(config_file, encoding = 'utf-8') as f:
313313
config = json.load(f)
314314
for typ in self.special_token_types:
315-
self._set_special_token(typ, config.get(f'{typ}_token_id'))
315+
token_id = config.get(f'{typ}_token_id')
316+
# If not found at root, check in text_config (for multimodal models like Kimi-VL)
317+
if token_id is None and 'text_config' in config:
318+
token_id = config['text_config'].get(f'{typ}_token_id')
319+
self._set_special_token(typ, token_id)
316320
return True
317321

318322

0 commit comments

Comments
 (0)