fix sglang response_prefix (#5125)

Jintao-Huang · Jintao-Huang · commit 687e13b20530 · 2025-07-28T15:38:29.000+08:00
diff --git a/docs/source/Instruction/命令行参数.md b/docs/source/Instruction/命令行参数.md
@@ -647,7 +647,7 @@ App参数继承于[部署参数](#部署参数), [Web-UI参数](#Web-UI参数)
 ## 特定模型参数
 特定模型参数可以通过`--model_kwargs`或者环境变量进行设置，例如: `--model_kwargs '{"fps_max_frames": 12}'`或者`FPS_MAX_FRAMES=12`。
 
-### qwen2_vl, qvq, qwen2_5_vl
+### qwen2_vl, qvq, qwen2_5_vl, mimo_vl, keye_vl
 参数含义同`qwen_vl_utils`或者`qwen_omni_utils`库，可以查看[这里](https://github.com/QwenLM/Qwen2.5-VL/blob/main/qwen-vl-utils/src/qwen_vl_utils/vision_process.py#L24)。
 
 - IMAGE_FACTOR: 默认为28。
diff --git a/docs/source_en/Instruction/Command-line-parameters.md b/docs/source_en/Instruction/Command-line-parameters.md
@@ -664,7 +664,7 @@ Export Arguments include the [basic arguments](#base-arguments) and [merge argum
 
 Specific model arguments can be set using `--model_kwargs` or environment variables, for example: `--model_kwargs '{"fps_max_frames": 12}'` or `FPS_MAX_FRAMES=12`.
 
-### qwen2_vl, qvq, qwen2_5_vl
+### qwen2_vl, qvq, qwen2_5_vl, mimo_vl, keye_vl
 The parameter meanings are the same as in the `qwen_vl_utils` or `qwen_omni_utils` library. You can refer to [here](https://github.com/QwenLM/Qwen2.5-VL/blob/main/qwen-vl-utils/src/qwen_vl_utils/vision_process.py#L24)
 
 - IMAGE_FACTOR: Default is 28
diff --git a/swift/llm/argument/base_args/base_args.py b/swift/llm/argument/base_args/base_args.py
@@ -224,8 +224,6 @@ def load_args_from_ckpt(self) -> None:
             'bnb_4bit_quant_type',
             'bnb_4bit_use_double_quant',
         ]
-        if 'megatron' in self.__class__.__name__.lower():
-            force_load_keys = []
         # If the current value is None or an empty list and it is among the following keys
         load_keys = [
             'custom_register_path',
@@ -252,7 +250,9 @@ def load_args_from_ckpt(self) -> None:
             'use_chat_template',
             'response_prefix',
         ]
-
+        if 'megatron' in self.__class__.__name__.lower():
+            force_load_keys = []
+            load_keys.remove('use_chat_template')
         data_keys = list(f.name for f in fields(DataArguments))
         for key, old_value in old_args.items():
             if old_value is None:
diff --git a/swift/llm/data_loader.py b/swift/llm/data_loader.py
@@ -3,6 +3,7 @@
 import torch
 import torch.distributed as dist
 from torch.utils.data import DataLoader
+from tqdm import tqdm
 
 from swift.llm import to_device
 
@@ -104,7 +105,7 @@ def _scatter_object_list(self, inputs):
 
     def _skip_batches(self, base_iter):
         if self.rank == 0 and self.skip_batches > 0:
-            for _ in range(self.skip_batches):
+            for _ in tqdm(range(self.skip_batches), dynamic_ncols=True, desc='Skip Batches: '):
                 [next(base_iter) for _ in range(self.world_size)]
 
     def __iter__(self):
diff --git a/swift/llm/infer/infer_engine/sglang_engine.py b/swift/llm/infer/infer_engine/sglang_engine.py
@@ -123,6 +123,8 @@ def _create_chat_completion_response(self, output, template):
         meta_info = output['meta_info']
         usage_info = self._get_usage_info(meta_info['prompt_tokens'], meta_info['completion_tokens'])
         response = output['text']
+        if template.template_meta.response_prefix:
+            response = template.template_meta.response_prefix + response
         toolcall = self._get_toolcall(response, template)
         choice = ChatCompletionResponseChoice(
             index=0,
@@ -188,6 +190,8 @@ def _create_chat_completion_stream_response(self, output, template, generation_c
                                                 idx) -> Optional[ChatCompletionStreamResponse]:
         assert output is not None
         response = output['text']
+        if template.template_meta.response_prefix:
+            response = template.template_meta.response_prefix + response
         meta_info = output['meta_info']
         finish_reason = meta_info['finish_reason']
         delta_text = response[idx[0]:]
diff --git a/swift/llm/template/base.py b/swift/llm/template/base.py
@@ -129,7 +129,7 @@ def __init__(
             self.init_processor(processor)
 
     def init_processor(self, processor: Processor) -> None:
-        if processor is None:
+        if processor is None or self._processor_inited:
             return
         self._processor_inited = True
         self.processor = processor