Skip to content

Commit 8fad613

Browse files
committed
升级 vllm lmdeploy sglang 为最新, 升级版本为0.6.9
1 parent 4296e37 commit 8fad613

File tree

3 files changed

+527
-290
lines changed

3 files changed

+527
-290
lines changed

gpt_server/model_backend/vllm_backend.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
apply_hf_chat_template,
1212
parse_chat_messages_futures,
1313
)
14+
from vllm.config.structured_outputs import StructuredOutputsConfig
1415
from gpt_server.settings import get_model_config
1516

1617

@@ -46,11 +47,13 @@ def __init__(self, model_path, tokenizer: PreTrainedTokenizer) -> None:
4647
enable_prefix_caching=model_config.enable_prefix_caching,
4748
dtype=model_config.dtype,
4849
max_model_len=model_config.max_model_len,
49-
guided_decoding_backend="xgrammar",
50+
# guided_decoding_backend="xgrammar",
5051
# 支持LMCache的KV传输
5152
kv_transfer_config=KVTransferConfig(
5253
kv_connector="LMCacheConnectorV1", kv_role="kv_both"
5354
),
55+
prefix_caching_hash_algo="xxhash",
56+
structured_outputs_config=StructuredOutputsConfig(backend="xgrammar"),
5457
)
5558
self.engine = AsyncLLMEngine.from_engine_args(self.engine_args)
5659
self.tokenizer = tokenizer
@@ -86,11 +89,11 @@ async def stream_chat(self, params: Dict[str, Any]) -> AsyncGenerator:
8689

8790
multimodal = params.get("multimodal", False)
8891
tokenizer = await self.engine.get_tokenizer()
92+
model_config = self.engine.model_config
8993
if multimodal: # 多模态模型
9094
# ----------------------------------------------------------------
91-
model_config = await self.engine.get_model_config()
9295
conversation, mm_data_future, _ = parse_chat_messages_futures(
93-
messages, model_config, tokenizer, content_format="string"
96+
messages, model_config, content_format="string"
9497
)
9598

9699
prompt = apply_hf_chat_template(
@@ -101,7 +104,7 @@ async def stream_chat(self, params: Dict[str, Any]) -> AsyncGenerator:
101104
),
102105
add_generation_prompt=True,
103106
tools=tools,
104-
model_config=await self.engine.get_model_config(),
107+
model_config=model_config,
105108
enable_thinking=enable_thinking,
106109
)
107110
mm_data = await mm_data_future
@@ -116,7 +119,7 @@ async def stream_chat(self, params: Dict[str, Any]) -> AsyncGenerator:
116119
),
117120
add_generation_prompt=True,
118121
tools=tools,
119-
model_config=await self.engine.get_model_config(),
122+
model_config=model_config,
120123
enable_thinking=enable_thinking,
121124
)
122125
input_ids = params.get("input_ids", None)

pyproject.toml

Lines changed: 23 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "gpt_server"
3-
version = "0.6.8.1"
3+
version = "0.6.9"
44
description = "gpt_server是一个用于生产级部署LLMs、Embedding、Reranker、ASR和TTS的开源框架。"
55
readme = "README.md"
66
license = { text = "Apache 2.0" }
@@ -12,38 +12,45 @@ dependencies = [
1212
"ffmpy",
1313
"fschat==0.2.36",
1414
"infinity-emb[all]==0.0.77",
15-
"lmdeploy==0.10.2",
15+
"lmdeploy==0.11.0",
1616
"loguru>=0.7.2",
1717
"openai==2.6.1",
1818
"setuptools==75.2.0",
1919
"streamlit>=1.50.0",
20-
"torch==2.8.0",
21-
"torchvision==0.23.0",
22-
"vllm==0.11.0",
20+
"torch==2.9.0",
21+
"torchvision==0.24.0",
22+
"vllm",
2323
"qwen_vl_utils",
2424
"evalscope[perf,rag]>=1.1.1",
2525
"modelscope>=1.31.0",
2626
"edge-tts>=7.0.0",
2727
"funasr>=1.2.6",
28-
"sglang[all]>=0.5.5",
28+
"sglang[all]>=0.5.6.post2",
2929
"flashinfer-python",
3030
"flashtts>=0.1.7",
31-
"diffusers>=0.35.2",
31+
"diffusers>=0.36.0",
3232
"sqlmodel>=0.0.27",
3333
"autoawq>=0.2.9",
34-
"lmcache>=0.3.9.post1",
34+
"lmcache>=0.3.11",
3535
]
3636

3737
[tool.uv]
38-
default-groups = [] # 默认只安装dependencies中的库
39-
prerelease = "allow"
4038
override-dependencies = [
4139
"setuptools==77.0.3",
42-
"transformers==4.57.1", # infinity-emb
40+
"transformers==4.57.3", # infinity-emb
4341
"soundfile==0.13.1", # infinity
4442
"outlines-core==0.2.11", # sglang 和 vllm 的冲突
4543
"peft>=0.17.0", # 和 lmdeloy 冲突
44+
"torchvision==0.24.0",
45+
"torchaudio==2.9.1",
46+
"torch==2.9.0",
47+
"llguidance==1.3.0",
48+
"starlette==0.49.1",
49+
"triton==3.5.1",
50+
4651
]
52+
default-groups = [] # 默认只安装dependencies中的库
53+
prerelease = "allow"
4754

4855
[project.scripts]
4956
gpt_server = "gpt_server.cli:main"
@@ -55,12 +62,12 @@ gpt_server = "gpt_server.cli:main"
5562
url = "https://pypi.tuna.tsinghua.edu.cn/simple"
5663
default = true
5764

58-
[tool.uv.sources]
59-
diffusers = { git = "https://gitee.com/liuyu_1997/diffusers.git" }
65+
# [tool.uv.sources]
66+
# diffusers = { git = "https://gitee.com/liuyu_1997/diffusers.git" }
6067

61-
# [[tool.uv.index]]
62-
# name = "vllm-custom"
63-
# url = "https://wheels.vllm.ai/006e7a34aeb3e905ca4131a3251fe079f0511e2f"
68+
[[tool.uv.index]]
69+
name = "vllm-custom"
70+
url = "https://wheels.vllm.ai/9e67c4ce985b0b8852603cfe3fcaf8f37de137ed"
6471

6572
[build-system]
6673
requires = ["setuptools", "wheel"]

0 commit comments

Comments
 (0)