Skip to content

Commit 687e13b

Browse files
committed
fix sglang response_prefix (#5125)
1 parent 7af5b61 commit 687e13b

File tree

6 files changed

+12
-7
lines changed

6 files changed

+12
-7
lines changed

docs/source/Instruction/命令行参数.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -647,7 +647,7 @@ App参数继承于[部署参数](#部署参数), [Web-UI参数](#Web-UI参数)
647647
## 特定模型参数
648648
特定模型参数可以通过`--model_kwargs`或者环境变量进行设置,例如: `--model_kwargs '{"fps_max_frames": 12}'`或者`FPS_MAX_FRAMES=12`
649649

650-
### qwen2_vl, qvq, qwen2_5_vl
650+
### qwen2_vl, qvq, qwen2_5_vl, mimo_vl, keye_vl
651651
参数含义同`qwen_vl_utils`或者`qwen_omni_utils`库,可以查看[这里](https://github.com/QwenLM/Qwen2.5-VL/blob/main/qwen-vl-utils/src/qwen_vl_utils/vision_process.py#L24)
652652

653653
- IMAGE_FACTOR: 默认为28。

docs/source_en/Instruction/Command-line-parameters.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -664,7 +664,7 @@ Export Arguments include the [basic arguments](#base-arguments) and [merge argum
664664

665665
Specific model arguments can be set using `--model_kwargs` or environment variables, for example: `--model_kwargs '{"fps_max_frames": 12}'` or `FPS_MAX_FRAMES=12`.
666666

667-
### qwen2_vl, qvq, qwen2_5_vl
667+
### qwen2_vl, qvq, qwen2_5_vl, mimo_vl, keye_vl
668668
The parameter meanings are the same as in the `qwen_vl_utils` or `qwen_omni_utils` library. You can refer to [here](https://github.com/QwenLM/Qwen2.5-VL/blob/main/qwen-vl-utils/src/qwen_vl_utils/vision_process.py#L24)
669669

670670
- IMAGE_FACTOR: Default is 28

swift/llm/argument/base_args/base_args.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -224,8 +224,6 @@ def load_args_from_ckpt(self) -> None:
224224
'bnb_4bit_quant_type',
225225
'bnb_4bit_use_double_quant',
226226
]
227-
if 'megatron' in self.__class__.__name__.lower():
228-
force_load_keys = []
229227
# If the current value is None or an empty list and it is among the following keys
230228
load_keys = [
231229
'custom_register_path',
@@ -252,7 +250,9 @@ def load_args_from_ckpt(self) -> None:
252250
'use_chat_template',
253251
'response_prefix',
254252
]
255-
253+
if 'megatron' in self.__class__.__name__.lower():
254+
force_load_keys = []
255+
load_keys.remove('use_chat_template')
256256
data_keys = list(f.name for f in fields(DataArguments))
257257
for key, old_value in old_args.items():
258258
if old_value is None:

swift/llm/data_loader.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import torch
44
import torch.distributed as dist
55
from torch.utils.data import DataLoader
6+
from tqdm import tqdm
67

78
from swift.llm import to_device
89

@@ -104,7 +105,7 @@ def _scatter_object_list(self, inputs):
104105

105106
def _skip_batches(self, base_iter):
106107
if self.rank == 0 and self.skip_batches > 0:
107-
for _ in range(self.skip_batches):
108+
for _ in tqdm(range(self.skip_batches), dynamic_ncols=True, desc='Skip Batches: '):
108109
[next(base_iter) for _ in range(self.world_size)]
109110

110111
def __iter__(self):

swift/llm/infer/infer_engine/sglang_engine.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,8 @@ def _create_chat_completion_response(self, output, template):
123123
meta_info = output['meta_info']
124124
usage_info = self._get_usage_info(meta_info['prompt_tokens'], meta_info['completion_tokens'])
125125
response = output['text']
126+
if template.template_meta.response_prefix:
127+
response = template.template_meta.response_prefix + response
126128
toolcall = self._get_toolcall(response, template)
127129
choice = ChatCompletionResponseChoice(
128130
index=0,
@@ -188,6 +190,8 @@ def _create_chat_completion_stream_response(self, output, template, generation_c
188190
idx) -> Optional[ChatCompletionStreamResponse]:
189191
assert output is not None
190192
response = output['text']
193+
if template.template_meta.response_prefix:
194+
response = template.template_meta.response_prefix + response
191195
meta_info = output['meta_info']
192196
finish_reason = meta_info['finish_reason']
193197
delta_text = response[idx[0]:]

swift/llm/template/base.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@ def __init__(
129129
self.init_processor(processor)
130130

131131
def init_processor(self, processor: Processor) -> None:
132-
if processor is None:
132+
if processor is None or self._processor_inited:
133133
return
134134
self._processor_inited = True
135135
self.processor = processor

0 commit comments

Comments
 (0)