Skip to content

Commit ad92b40

Browse files
committed
升级 vllm sglang版本
1 parent a97a5fb commit ad92b40

File tree

6 files changed

+446
-168
lines changed

6 files changed

+446
-168
lines changed

gpt_server/model_worker/base/model_worker_base.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -309,6 +309,8 @@ def run(cls):
309309
parser.add_argument("--limit_worker_concurrency", type=int, default=1024)
310310
# port
311311
parser.add_argument("--port", type=int, default=None)
312+
# model_type
313+
parser.add_argument("--model_type", type=str, default="auto")
312314
args = parser.parse_args()
313315
os.environ["num_gpus"] = str(args.num_gpus)
314316
if args.backend == "vllm":
@@ -331,6 +333,7 @@ def run(cls):
331333
if args.punc_model:
332334
os.environ["punc_model"] = args.punc_model
333335

336+
os.environ["model_type"] = args.model_type
334337
os.environ["enable_prefix_caching"] = args.enable_prefix_caching
335338
os.environ["gpu_memory_utilization"] = args.gpu_memory_utilization
336339
os.environ["kv_cache_quant_policy"] = args.kv_cache_quant_policy

gpt_server/serving/main.py

Lines changed: 2 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -23,38 +23,11 @@
2323
start_api_server,
2424
start_model_worker,
2525
delete_log,
26+
pre_processing,
2627
)
2728

2829

29-
def delete_flash_attn():
30-
"删除 flash_attn,避免报错"
31-
import shutil
32-
import os
33-
from pathlib import Path
34-
from loguru import logger
35-
36-
root_path = Path(__file__).parent.parent.parent
37-
flash_attn_path = root_path.joinpath(
38-
".venv/lib/python3.11/site-packages/flash_attn"
39-
)
40-
41-
try:
42-
# 检查路径是否存在
43-
if os.path.exists(flash_attn_path):
44-
# 删除整个目录树
45-
shutil.rmtree(flash_attn_path)
46-
logger.info(f"成功删除: {flash_attn_path}")
47-
48-
except PermissionError:
49-
logger.error("权限不足,无法删除 flash_attn")
50-
except Exception as e:
51-
logger.error(f"删除 flash_attn 失败: {e}")
52-
53-
54-
# 删除日志
55-
delete_log()
56-
57-
delete_flash_attn()
30+
pre_processing()
5831

5932
config_path = os.path.join(root_dir, "gpt_server/script/config.yaml")
6033
env = os.getenv("ENV")

gpt_server/utils.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,45 @@
1818
STATIC_DIR = root_dir / "static"
1919

2020

21+
def clear_flashinfer_cache():
22+
os.system("flashinfer clear-cache")
23+
24+
25+
def delete_flash_attn():
26+
"删除 flash_attn,避免报错"
27+
import shutil
28+
import os
29+
from pathlib import Path
30+
from loguru import logger
31+
32+
root_path = Path(__file__).parent.parent
33+
flash_attn_path = root_path.joinpath(
34+
".venv/lib/python3.11/site-packages/flash_attn"
35+
)
36+
37+
try:
38+
# 检查路径是否存在
39+
if os.path.exists(flash_attn_path):
40+
# 删除整个目录树
41+
shutil.rmtree(flash_attn_path)
42+
logger.info(f"成功删除: {flash_attn_path}")
43+
44+
except PermissionError:
45+
logger.error("权限不足,无法删除 flash_attn")
46+
except Exception as e:
47+
logger.error(f"删除 flash_attn 失败: {e}")
48+
49+
50+
def pre_processing():
51+
"前置处理"
52+
# 删除日志
53+
delete_log()
54+
# 删除 垃圾flash attn
55+
delete_flash_attn()
56+
# 清理 flashinfer 缓存
57+
clear_flashinfer_cache()
58+
59+
2160
def kill_child_processes(parent_pid, including_parent=False):
2261
"杀死子进程/僵尸进程"
2362
try:
@@ -263,6 +302,7 @@ def start_model_worker(config: dict):
263302
+ f" --log_level {log_level}" # 日志水平
264303
+ f" --task_type {task_type}" # 日志水平
265304
+ f" --limit_worker_concurrency {limit_worker_concurrency}" # 限制worker并发数
305+
+ f" --model_type {model_type}" # 默认类型
266306
)
267307
# 处理为 None的情况
268308
if port:

pyproject.toml

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "gpt_server"
3-
version = "0.6.4"
3+
version = "0.6.5"
44
description = "gpt_server是一个用于生产级部署LLMs、Embedding、Reranker、ASR和TTS的开源框架。"
55
readme = "README.md"
66
license = { text = "Apache 2.0" }
@@ -16,16 +16,16 @@ dependencies = [
1616
"loguru>=0.7.2",
1717
"openai==1.99.1",
1818
"setuptools==75.2.0",
19-
"streamlit==1.39.0",
19+
"streamlit>=1.50.0",
2020
"torch==2.8.0",
2121
"torchvision==0.23.0",
22-
"vllm==0.10.2",
22+
"vllm==0.11.0",
2323
"qwen_vl_utils",
2424
"evalscope[perf,rag]==0.16.1",
2525
"modelscope==1.26.0",
2626
"edge-tts>=7.0.0",
2727
"funasr>=1.2.6",
28-
"sglang[all]>=0.5.2",
28+
"sglang[all]>=0.5.3.post1",
2929
"flashinfer-python",
3030
"flashtts>=0.1.7",
3131
"diffusers>=0.35.1",
@@ -35,9 +35,10 @@ dependencies = [
3535

3636
[tool.uv]
3737
default-groups = [] # 默认只安装dependencies中的库
38+
prerelease = "allow"
3839
override-dependencies = [
3940
"setuptools==77.0.3",
40-
"transformers==4.56.1", # infinity-emb
41+
"transformers==4.57.0", # infinity-emb
4142
"soundfile==0.13.1", # infinity
4243
"xgrammar==0.1.24", # sglang[all]==0.4.5 depends on xgrammar==0.1.17
4344
"outlines-core==0.2.11", # sglang 和 vllm 的冲突

0 commit comments

Comments
 (0)