Skip to content

Commit 96ddca4

Browse files
committed
vllm 多模态 支持 enable_thinking
1 parent 58d73d1 commit 96ddca4

File tree

4 files changed

+10
-11
lines changed

4 files changed

+10
-11
lines changed

README.md

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -262,14 +262,9 @@ Chat UI界面:
262262
| :-------------------: | :--------: | :---: | :---: | :----------------: | :--------------: | :----: |
263263
| chatglm4-9b | chatglm ||||||
264264
| chatglm3-6b | chatglm ||| × |||
265-
| Qwen (7B, 14B, etc.)) | qwen ||||||
266-
| Qwen-1.5 (0.5B--72B) | qwen ||||||
267-
| Qwen-2 | qwen ||||||
268-
| Qwen-2.5 | qwen ||||||
269-
| Qwen-3 | qwen ||||||
265+
| Qwen-1.0--3.0 | qwen ||||||
270266
| Yi-34B | yi ||||||
271-
| Internlm-1.0 | internlm ||||||
272-
| Internlm-2.0 | internlm ||||||
267+
| Internlm-1.0--2.0 | internlm ||||||
273268
| Deepseek | deepseek ||||||
274269
| Llama-3 | llama ||||||
275270
| Baichuan-2 | baichuan ||||||
@@ -281,9 +276,9 @@ Chat UI界面:
281276
| :--------------: | :--------: | :---: | :---: | :----------------: | :--------------: | :----: |
282277
| glm-4v-9b | chatglm | × | × | × || × |
283278
| InternVL2 | internvl | × | × ||| × |
284-
| InternVL2.5 | internvl | × | × ||| × |
285-
| InternVL3 | internvl | × ||| | × |
286-
| MiniCPM-V-2_6 | minicpmv | × || | × | × |
279+
|InternVL2.5--3.5 | internvl | × | × ||| × |
280+
| MiniCPM-V-2.6 | minicpmv | × ||| × | × |
281+
| MiniCPM-V-4.5 | minicpmv | × || × | × | × |
287282
| Qwen2-VL | qwen | × || × |||
288283
| Qwen2.5-VL | qwen | × || × |||
289284
| QVQ | qwen | × || × | × | × |

gpt_server/model_backend/vllm_backend.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ async def stream_chat(self, params: Dict[str, Any]) -> AsyncGenerator:
7272
presence_penalty = float(params.get("presence_penalty", 0.0))
7373
frequency_penalty = float(params.get("frequency_penalty", 0.0))
7474
repetition_penalty = float(params.get("repetition_penalty", 1.0))
75+
enable_thinking = bool(params.get("enable_thinking", True))
7576
request = params.get("request", None)
7677
# Handle stop_str
7778
stop = set()
@@ -96,6 +97,7 @@ async def stream_chat(self, params: Dict[str, Any]) -> AsyncGenerator:
9697
add_generation_prompt=True,
9798
tools=tools,
9899
model_config=await self.engine.get_model_config(),
100+
enable_thinking=enable_thinking,
99101
)
100102
mm_data = await mm_data_future
101103
inputs = {"multi_modal_data": mm_data, "prompt": prompt}

tests/test_openai_chat.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
model="qwen", # internlm chatglm3 qwen llama3 chatglm4 qwen-72b
99
messages=[{"role": "user", "content": "你是谁"}],
1010
stream=stream,
11+
extra_body={"enable_thinking": True}, # 可以控制是否 think,部分模型支持
1112
)
1213
if stream:
1314
for chunk in output:

tests/test_openai_vl_chat.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ def image_to_base64(image_path):
2323

2424
stream = True
2525
output = client.chat.completions.create(
26-
model="glm4.1v", # internlm chatglm3 qwen llama3 chatglm4
26+
model="minicpmv", # internlm chatglm3 qwen llama3 chatglm4
2727
messages=[
2828
{
2929
"role": "user",
@@ -42,6 +42,7 @@ def image_to_base64(image_path):
4242
}
4343
],
4444
stream=stream,
45+
extra_body={"enable_thinking": True}, # 可以控制是否 think,部分模型支持
4546
)
4647
if stream:
4748
for chunk in output:

0 commit comments

Comments
 (0)