From ea44482b2d3b1abe9da6b4d63de624e19a0729e5 Mon Sep 17 00:00:00 2001 From: liyang Date: Thu, 23 Oct 2025 17:10:17 +0800 Subject: [PATCH 1/3] server(chat): inject image_url via MTMD marker --- tools/server/utils.hpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tools/server/utils.hpp b/tools/server/utils.hpp index cc48f5a9d0ac7..151e713f63b06 100644 --- a/tools/server/utils.hpp +++ b/tools/server/utils.hpp @@ -756,7 +756,13 @@ static json oaicompat_chat_params_parse( } llama_params["chat_format"] = static_cast(chat_params.format); - llama_params["prompt"] = chat_params.prompt; + if (!out_files.empty()) { + std::string prompt_mm = chat_params.prompt; + string_replace_all(prompt_mm, "", mtmd_default_marker()); + llama_params["prompt"] = std::move(prompt_mm); + } else { + llama_params["prompt"] = chat_params.prompt; + } if (!chat_params.grammar.empty()) { llama_params["grammar"] = chat_params.grammar; } From 65d1ee800ec2a3ba8695976f881ba3964ec63d06 Mon Sep 17 00:00:00 2001 From: liyang Date: Thu, 23 Oct 2025 18:34:03 +0800 Subject: [PATCH 2/3] ci: retrigger workflows From c469e8ac1f61a2d0ac5e529d6280e1fa3a754463 Mon Sep 17 00:00:00 2001 From: liyang Date: Fri, 24 Oct 2025 11:23:29 +0800 Subject: [PATCH 3/3] gguf: clean Gemma vision/audio markers to --- convert_hf_to_gguf.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index ed99dc8477231..1aa3ffcc2b1b9 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -5170,6 +5170,19 @@ def set_vocab(self): self._set_vocab_sentencepiece() self.gguf_writer.add_add_space_prefix(False) + # 清洗 Gemma3 的 chat template:将视觉/音频占位符统一为 MTMD 标记 + try: + from transformers import AutoTokenizer # type: ignore + tokenizer = AutoTokenizer.from_pretrained(self.dir_model) + chat_template = getattr(tokenizer, "chat_template", None) + if isinstance(chat_template, str): + cleaned = chat_template.replace("", "<__media__>") .replace("", "") .replace("", "<__media__>") .replace("", "") + if cleaned != chat_template: + logger.info("gguf: clean Gemma vision/audio markers to <__media__>") + self.gguf_writer.add_chat_template(cleaned) + except Exception as e: + logger.warning(f"gguf: failed to clean chat_template: {e}") + def set_gguf_parameters(self): hparams = self.hparams @@ -5218,6 +5231,19 @@ def set_vocab(self): self._set_vocab_sentencepiece() self.gguf_writer.add_add_space_prefix(False) + # 清洗 Gemma3 的 chat template:将视觉/音频占位符统一为 MTMD 标记 + try: + from transformers import AutoTokenizer # type: ignore + tokenizer = AutoTokenizer.from_pretrained(self.dir_model) + chat_template = getattr(tokenizer, "chat_template", None) + if isinstance(chat_template, str): + cleaned = chat_template.replace("", "<__media__>") .replace("", "") .replace("", "<__media__>") .replace("", "") + if cleaned != chat_template: + logger.info("gguf: clean Gemma vision/audio markers to <__media__>") + self.gguf_writer.add_chat_template(cleaned) + except Exception as e: + logger.warning(f"gguf: failed to clean chat_template: {e}") + def set_gguf_parameters(self): hparams = self.hparams