优化极爱阿狗

shell-nlp · shell-nlp · commit 94fef765dfb5 · 2025-09-16T21:46:03.000+08:00
diff --git a/gpt_server/model_backend/lmdeploy_backend.py b/gpt_server/model_backend/lmdeploy_backend.py
@@ -138,7 +138,7 @@ async def stream_chat(self, params: Dict[str, Any]) -> AsyncGenerator:
         temperature = float(params.get("temperature", 0.8))
         top_p = float(params.get("top_p", 0.8))
         top_k = params.get("top_k", 50)
-
+        chat_template = params["chat_template"]
         max_new_tokens = int(params.get("max_new_tokens", 1024 * 8))
         stop_str = params.get("stop", None)
         stop_token_ids = params.get("stop_words_ids", None) or []
diff --git a/gpt_server/model_worker/base/model_worker_base.py b/gpt_server/model_worker/base/model_worker_base.py
@@ -124,6 +124,7 @@ def __init__(
         self.model = None
         self.backend = None
         self.chat_template = None
+        self.vl_chat_template = None
         self.tokenizer: PreTrainedTokenizer | None = None
         self.load_model_tokenizer(model_path)
         self.context_len = self.get_context_length()
@@ -137,6 +138,10 @@ def __init__(
     def preprocess_params(self, params: dict) -> dict:
         """预处理 params"""
         messages = params["messages"]
+        params["chat_template"] = self.chat_template
+        if self.vision_config:
+            params["multimodal"] = True
+            params["chat_template"] = self.vl_chat_template
         if isinstance(messages, str):
             messages = [{"role": "user", "content": messages}]
             params["messages"] = messages
diff --git a/gpt_server/model_worker/qwen.py b/gpt_server/model_worker/qwen.py
@@ -41,11 +41,11 @@ def __init__(
         logger.warning(f"{model_names[0]} 停用词: {self.stop}")
 
         self.chat_template = get_chat_template(model_name="qwen", lang="zh")
+        # from https://github.com/xorbitsai/inference/blob/c70ea74fa820a613f8d577047ef1818da20a96b3/xinference/model/llm/llm_family_modelscope.json
+        self.vl_chat_template = "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}"
         self.tool_parser = ToolParserManager.module_dict["qwen2_5"](
             tokenizer=self.tokenizer
         )
-        # from https://github.com/xorbitsai/inference/blob/c70ea74fa820a613f8d577047ef1818da20a96b3/xinference/model/llm/llm_family_modelscope.json
-        self.vl_chat_template = "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}"
 
     async def generate_stream_gate(self, params):
         self.call_ct += 1
@@ -60,13 +60,12 @@ async def generate_stream_gate(self, params):
                 text = await asyncio.to_thread(
                     self.tokenizer.apply_chat_template,
                     messages,
-                    # chat_template=self.vl_chat_template,
+                    chat_template=params["chat_template"],
                     tokenize=False,
                     add_generation_prompt=True,
                     tools=tools,
                     enable_thinking=bool(params.get("enable_thinking", True)),
                 )
-
                 params["prompt"] = text
                 # 多模态不需要传入input_ids
             # ---------------添加额外的参数------------------------