Skip to content

Commit b6a1896

Browse files
CISCMinh141120
authored andcommitted
gguf-py : fix Qwen3-Embedding eos token (ggml-org#14314)
1 parent afe7dd5 commit b6a1896

File tree

1 file changed

+10
-0
lines changed

1 file changed

+10
-0
lines changed

gguf-py/gguf/vocab.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,16 @@ def _try_load_from_tokenizer_json(self, path: Path) -> bool:
198198
if special_last := tmpl_single[-1].get('SpecialToken', {}).get('id'):
199199
if not tokenizer_config:
200200
special_eos = special_last
201+
elif special_last != special_eos:
202+
if 'eot' not in self.special_token_types:
203+
self.special_token_types = tuple(self.special_token_types) + ('eot', )
204+
tokenizer_config['eot_token'] = special_eos
205+
elif 'eom' not in self.special_token_types:
206+
self.special_token_types = tuple(self.special_token_types) + ('eom', )
207+
tokenizer_config['eom_token'] = special_eos
208+
else:
209+
logger.warning(f'Overriding EOS token {special_eos!r} with {special_last!r} without EOT/EOM fallback!')
210+
tokenizer_config['eos_token'] = special_eos = special_last
201211
self.add_special_token['eos'] = True if special_last == special_eos else False
202212
if special_last != special_eos:
203213
logger.warning(f'Unknown trailing special token {special_last!r} in TemplateProcessing<single>')

0 commit comments

Comments
 (0)