[bugfix] fix SglangEngine (#5828)

Jintao-Huang · Jintao-Huang · commit 1f8cd7308afc · 2025-09-16T14:50:23.000+08:00
diff --git a/README.md b/README.md
@@ -134,7 +134,7 @@ Running Environment:
 | vllm         | >=0.5.1      | 0.10.1.1                | Inference/Deployment                      |
 | sglang       | >=0.4.6      | 0.4.10.post2         | Inference/Deployment                      |
 | lmdeploy     | >=0.5   | 0.9.2.post1                 | Inference/Deployment                      |
-| evalscope    | >=0.11       |                     | Evaluation                                |
+| evalscope    | >=1.0       |                     | Evaluation                                |
 | gradio       |              | 5.32.1              | Web-UI/App                                |
 
 For more optional dependencies, you can refer to [here](https://github.com/modelscope/ms-swift/blob/main/requirements/install_all.sh).
diff --git a/README_CN.md b/README_CN.md
@@ -130,7 +130,7 @@ pip install -e .
 | vllm         | >=0.5.1      | 0.10.1.1                | 推理/部署              |
 | sglang       | >=0.4.6      | 0.4.10.post2         | 推理/部署              |
 | lmdeploy     | >=0.5   | 0.9.2.post1                 | 推理/部署              |
-| evalscope    | >=0.11       |                     | 评测                 |
+| evalscope    | >=1.0       |                     | 评测                 |
 | gradio       |              | 5.32.1              | Web-UI/App         |
 
 更多可选依赖可以参考[这里](https://github.com/modelscope/ms-swift/blob/main/requirements/install_all.sh)。
diff --git a/docs/source/GetStarted/SWIFT安装.md b/docs/source/GetStarted/SWIFT安装.md
@@ -103,7 +103,7 @@ modelscope-registry.us-west-1.cr.aliyuncs.com/modelscope-repo/modelscope:ubuntu2
 | vllm         | >=0.5.1      | 0.10.1.1                | 推理/部署              |
 | sglang       | >=0.4.6      | 0.4.10.post2         | 推理/部署              |
 | lmdeploy     | >=0.5   | 0.9.2.post1                 | 推理/部署              |
-| evalscope    | >=0.11       |                     | 评测                 |
+| evalscope    | >=1.0       |                     | 评测                 |
 | gradio       |              | 5.32.1              | Web-UI/App         |
 
 更多可选依赖可以参考[这里](https://github.com/modelscope/ms-swift/blob/main/requirements/install_all.sh)。
diff --git a/docs/source_en/GetStarted/SWIFT-installation.md b/docs/source_en/GetStarted/SWIFT-installation.md
@@ -104,7 +104,7 @@ More images can be found [here](https://modelscope.cn/docs/intro/environment-set
 | vllm         | >=0.5.1      | 0.10.1.1                | Inference/Deployment                      |
 | sglang       | >=0.4.6      | 0.4.10.post2         | Inference/Deployment                      |
 | lmdeploy     | >=0.5   | 0.9.2.post1                 | Inference/Deployment                      |
-| evalscope    | >=0.11       |                     | Evaluation                                |
+| evalscope    | >=1.0       |                     | Evaluation                                |
 | gradio       |              | 5.32.1              | Web-UI/App                                |
 
 
diff --git a/requirements/install_all.sh b/requirements/install_all.sh
@@ -5,7 +5,7 @@ pip install "vllm>=0.5.1,<0.10.2" "transformers<4.57" "trl<0.21" -U
 pip install "lmdeploy>=0.5" -U
 pip install autoawq -U --no-deps
 pip install auto_gptq optimum bitsandbytes "gradio<5.33" -U
-pip install git+https://github.com/modelscope/ms-swift.git
+pip install git+https://github.com/modelscope/ms-swift.git#egg=ms-swift[all]
 pip install timm deepspeed -U
 pip install qwen_vl_utils qwen_omni_utils keye_vl_utils -U
 pip install decord librosa icecream soundfile -U
diff --git a/swift/llm/infer/infer_engine/sglang_engine.py b/swift/llm/infer/infer_engine/sglang_engine.py
@@ -222,13 +222,16 @@ async def _infer_embedding_async(self, template: Template, inputs: Dict[str, Any
 
     async def _infer_full_async(self, template: Template, inputs: Dict[str, Any], generation_config: Dict[str, Any],
                                 request_config: RequestConfig) -> ChatCompletionResponse:
-        output = await self.engine.async_generate(**inputs, sampling_params=generation_config)
+        engine_inputs = {k: v for k, v in inputs.items() if k != 'template_inputs'}
+        output = await self.engine.async_generate(**engine_inputs, sampling_params=generation_config)
         output['prompt_token_ids'] = inputs['input_ids']
         return self._create_chat_completion_response(output, inputs, template, request_config.return_details)
 
     async def _infer_stream_async(self, template: Template, inputs: Dict[str, Any], generation_config: Dict[str, Any],
                                   **kwargs) -> AsyncIterator[ChatCompletionStreamResponse]:
-        result_generator = await self.engine.async_generate(**inputs, sampling_params=generation_config, stream=True)
+        engine_inputs = {k: v for k, v in inputs.items() if k != 'template_inputs'}
+        result_generator = await self.engine.async_generate(
+            **engine_inputs, sampling_params=generation_config, stream=True)
         infer_streamer = InferStreamer(template)
         async for output in result_generator:
             res = self._create_chat_completion_stream_response(output, template, infer_streamer)