Skip to content

Commit caa6e10

Browse files
committed
fix EOS
1 parent 1ffa83f commit caa6e10

File tree

1 file changed

+13
-0
lines changed

1 file changed

+13
-0
lines changed

convert_hf_to_gguf.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6113,6 +6113,19 @@ def set_vocab(self):
61136113
self.gguf_writer.add_token_merges(merges)
61146114

61156115
special_vocab = gguf.SpecialVocab(self.dir_model, load_merges=False)
6116+
6117+
# fix for Kimi-VL: Use <|im_end|> as EOS token instead of [EOS]
6118+
# This ensures text generation stops correctly at sentence boundaries, rather than at commas (which would happen with wrong EOS token)
6119+
if self.hf_arch == "KimiVLForConditionalGeneration":
6120+
im_end_id = None
6121+
for i, token in enumerate(tokens):
6122+
if token == "<|im_end|>":
6123+
im_end_id = i
6124+
break
6125+
if im_end_id is not None:
6126+
logger.info(f"Kimi-VL: Overriding EOS token from {special_vocab.special_token_ids.get('eos', 'N/A')} to <|im_end|> (ID: {im_end_id})")
6127+
special_vocab.special_token_ids["eos"] = im_end_id
6128+
61166129
special_vocab.add_to_gguf(self.gguf_writer)
61176130
else:
61186131
raise NotImplementedError(f"Deepseek pre-tokenizer {tokpre!r} is not supported yet!")

0 commit comments

Comments
 (0)