Skip to content

Commit c469e8a

Browse files
author
liyang
committed
gguf: clean Gemma vision/audio markers to <media>
1 parent 65d1ee8 commit c469e8a

File tree

1 file changed

+26
-0
lines changed

1 file changed

+26
-0
lines changed

convert_hf_to_gguf.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5170,6 +5170,19 @@ def set_vocab(self):
51705170
self._set_vocab_sentencepiece()
51715171

51725172
self.gguf_writer.add_add_space_prefix(False)
5173+
# 清洗 Gemma3 的 chat template:将视觉/音频占位符统一为 MTMD 标记
5174+
try:
5175+
from transformers import AutoTokenizer # type: ignore
5176+
tokenizer = AutoTokenizer.from_pretrained(self.dir_model)
5177+
chat_template = getattr(tokenizer, "chat_template", None)
5178+
if isinstance(chat_template, str):
5179+
cleaned = chat_template.replace("<start_of_image>", "<__media__>") .replace("<end_of_image>", "") .replace("<start_of_audio>", "<__media__>") .replace("<end_of_audio>", "")
5180+
if cleaned != chat_template:
5181+
logger.info("gguf: clean Gemma vision/audio markers to <__media__>")
5182+
self.gguf_writer.add_chat_template(cleaned)
5183+
except Exception as e:
5184+
logger.warning(f"gguf: failed to clean chat_template: {e}")
5185+
51735186

51745187
def set_gguf_parameters(self):
51755188
hparams = self.hparams
@@ -5218,6 +5231,19 @@ def set_vocab(self):
52185231
self._set_vocab_sentencepiece()
52195232

52205233
self.gguf_writer.add_add_space_prefix(False)
5234+
# 清洗 Gemma3 的 chat template:将视觉/音频占位符统一为 MTMD 标记
5235+
try:
5236+
from transformers import AutoTokenizer # type: ignore
5237+
tokenizer = AutoTokenizer.from_pretrained(self.dir_model)
5238+
chat_template = getattr(tokenizer, "chat_template", None)
5239+
if isinstance(chat_template, str):
5240+
cleaned = chat_template.replace("<start_of_image>", "<__media__>") .replace("<end_of_image>", "") .replace("<start_of_audio>", "<__media__>") .replace("<end_of_audio>", "")
5241+
if cleaned != chat_template:
5242+
logger.info("gguf: clean Gemma vision/audio markers to <__media__>")
5243+
self.gguf_writer.add_chat_template(cleaned)
5244+
except Exception as e:
5245+
logger.warning(f"gguf: failed to clean chat_template: {e}")
5246+
52215247

52225248
def set_gguf_parameters(self):
52235249
hparams = self.hparams

0 commit comments

Comments
 (0)