Skip to content

Commit fdd8e92

Browse files
committed
升级 vllm版本
1 parent 36cdde1 commit fdd8e92

File tree

4 files changed

+154
-295
lines changed

4 files changed

+154
-295
lines changed

gpt_server/model_backend/vllm_backend.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ async def stream_chat(self, params: Dict[str, Any]) -> AsyncGenerator:
8787
if multimodal: # 多模态模型
8888
# ----------------------------------------------------------------
8989
model_config = await self.engine.get_model_config()
90-
conversation, mm_data_future = parse_chat_messages_futures(
90+
conversation, mm_data_future, _ = parse_chat_messages_futures(
9191
messages, model_config, tokenizer, content_format="string"
9292
)
9393

pyproject.toml

Lines changed: 13 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "gpt_server"
3-
version = "0.6.3"
3+
version = "0.6.4"
44
description = "gpt_server是一个用于生产级部署LLMs、Embedding、Reranker、ASR和TTS的开源框架。"
55
readme = "README.md"
66
license = { text = "Apache 2.0" }
@@ -11,57 +11,50 @@ dependencies = [
1111
"fastapi==0.115.0",
1212
"ffmpy",
1313
"fschat==0.2.36",
14-
"infinity-emb[all]==0.0.76",
14+
"infinity-emb[all]==0.0.77",
1515
"lmdeploy==0.10.0",
1616
"loguru>=0.7.2",
1717
"openai==1.99.1",
1818
"setuptools==75.2.0",
1919
"streamlit==1.39.0",
20-
"torch==2.6.0",
20+
"torch==2.8.0",
2121
"torchvision==0.20.1",
22-
"vllm",
22+
"vllm==0.10.2",
2323
"qwen_vl_utils",
2424
"evalscope[perf,rag]==0.16.1",
2525
"modelscope==1.26.0",
2626
"edge-tts>=7.0.0",
2727
"funasr>=1.2.6",
28-
"sglang[all]>=0.5.1.post3",
28+
"sglang[all]>=0.5.2",
2929
"flashinfer-python",
3030
"flashtts>=0.1.7",
3131
"diffusers>=0.35.1",
3232
#"sqlmodel>=0.0.24",
3333
"autoawq>=0.2.9",
34+
"flash-attn",
3435
]
3536

3637
[tool.uv]
3738
default-groups = [] # 默认只安装dependencies中的库
3839
override-dependencies = [
3940
"setuptools==77.0.3",
4041
"torchvision==0.23.0",
41-
"torchaudio==2.8.0",
42-
"torch==2.8.0",
43-
"triton==3.4.0",
44-
"transformers==4.56.1", # infinity-emb
45-
"soundfile==0.13.1", # infinity
46-
"xgrammar==0.1.23", # sglang[all]==0.4.5 depends on xgrammar==0.1.17
47-
"flashinfer-python==0.2.10",
48-
"outlines-core==0.2.10", # sglang 和 vllm 的冲突
49-
"peft>=0.17.0", # 和 lmdeloy 冲突
42+
"transformers==4.56.1", # infinity-emb
43+
"soundfile==0.13.1", # infinity
44+
"xgrammar==0.1.24", # sglang[all]==0.4.5 depends on xgrammar==0.1.17
45+
"outlines-core==0.2.11", # sglang 和 vllm 的冲突
46+
"peft>=0.17.0", # 和 lmdeloy 冲突
5047
]
5148

5249
[project.scripts]
5350
gpt_server = "gpt_server.cli:main"
5451

55-
[tool.uv.sources]
56-
vllm = { index = "vllm-custom" }
52+
[tool.uv.extra-build-dependencies]
53+
flash-attn = ["torch"]
5754

5855
[[tool.uv.index]]
5956
url = "https://pypi.tuna.tsinghua.edu.cn/simple"
60-
default = true
6157

62-
[[tool.uv.index]]
63-
name = "vllm-custom"
64-
url = "https://wheels.vllm.ai/006e7a34aeb3e905ca4131a3251fe079f0511e2f"
6558

6659
[build-system]
6760
requires = ["setuptools", "wheel"]

0 commit comments

Comments
 (0)