Skip to content

Commit 288c67e

Browse files
committed
python 升级为 3.11 升级 推理引擎版本 版本设置为 0.6.2
1 parent 6206484 commit 288c67e

File tree

7 files changed

+341
-1130
lines changed

7 files changed

+341
-1130
lines changed

.python-version

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
3.10
1+
3.11

gpt_server/model_handler/pitch.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
from typing import Optional
2+
from flashtts.llm.vllm_generator import VllmGenerator
3+
import flashtts
4+
from loguru import logger
5+
6+
7+
class VllmGenerator_(VllmGenerator):
8+
def __init__(
9+
self,
10+
model_path: str,
11+
max_length: int = 32768,
12+
gpu_memory_utilization: float = 0.6,
13+
device: str = "cuda",
14+
stop_tokens: Optional[list[str]] = None,
15+
stop_token_ids: Optional[list[int]] = None,
16+
**kwargs,
17+
):
18+
from vllm import AsyncEngineArgs, AsyncLLMEngine
19+
20+
engine_kwargs = dict(
21+
model=model_path,
22+
max_model_len=max_length,
23+
gpu_memory_utilization=gpu_memory_utilization,
24+
# device=device,
25+
disable_log_stats=True,
26+
# disable_log_requests=True,
27+
**kwargs,
28+
)
29+
async_args = AsyncEngineArgs(**engine_kwargs)
30+
31+
self.model = AsyncLLMEngine.from_engine_args(async_args)
32+
33+
super(VllmGenerator, self).__init__(
34+
tokenizer=model_path,
35+
max_length=max_length,
36+
stop_tokens=stop_tokens,
37+
stop_token_ids=stop_token_ids,
38+
)
39+
40+
41+
def pitch_flashtts():
42+
flashtts.llm.vllm_generator.VllmGenerator = VllmGenerator_
43+
logger.info("patch flashtts.llm.vllm_generator.VllmGenerator")

gpt_server/model_worker/base/model_worker_base.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -285,7 +285,7 @@ def run(cls):
285285
logger.remove(0)
286286
log_level = os.getenv("log_level", "WARNING")
287287
logger.add(sys.stderr, level=log_level)
288-
os.environ["VLLM_USE_V1"] = "0"
288+
289289

290290
host = args.host
291291
controller_address = args.controller_address

gpt_server/model_worker/spark_tts.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,16 @@
33
import os
44
from typing import List
55
from loguru import logger
6+
from gpt_server.model_handler.pitch import pitch_flashtts
7+
8+
pitch_flashtts()
69
from gpt_server.model_worker.base.model_worker_base import ModelWorkerBase
710
from gpt_server.model_worker.utils import load_base64_or_url
811
from flashtts.engine import AutoEngine
912
from flashtts.server.utils.audio_writer import StreamingAudioWriter
1013

1114
root_dir = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))
15+
# os.environ["VLLM_USE_V1"] = "0"
1216

1317

1418
class SparkTTSWorker(ModelWorkerBase):

pyproject.toml

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
[project]
22
name = "gpt_server"
3-
version = "0.6.0"
3+
version = "0.6.1"
44
description = "gpt_server是一个用于生产级部署LLMs或Embedding的开源框架。"
55
readme = "README.md"
66
license = { text = "Apache 2.0" }
77
authors = [{ name = "Yu Liu", email = "[email protected]" }]
8-
requires-python = ">=3.10"
8+
requires-python = ">=3.11"
99
dependencies = [
1010
"accelerate>=1.0.1",
1111
"fastapi==0.115.0",
@@ -14,18 +14,18 @@ dependencies = [
1414
"infinity-emb[all]==0.0.76",
1515
"lmdeploy==0.9.2",
1616
"loguru>=0.7.2",
17-
"openai==1.86.0",
17+
"openai==1.99.1",
1818
"setuptools==75.2.0",
1919
"streamlit==1.39.0",
2020
"torch==2.6.0",
2121
"torchvision==0.20.1",
22-
"vllm==0.9.2",
22+
"vllm==0.10.1",
2323
"qwen_vl_utils",
2424
"evalscope[perf,rag]==0.16.1",
2525
"modelscope==1.26.0",
2626
"edge-tts>=7.0.0",
2727
"funasr>=1.2.6",
28-
"sglang[all]>=0.4.10",
28+
"sglang[all]>=0.4.10.post2",
2929
"flashinfer-python",
3030
"flashtts>=0.1.7",
3131
"diffusers>=0.33.1",
@@ -41,8 +41,9 @@ override-dependencies = [
4141
"triton",
4242
"transformers==4.53.3", # infinity-emb
4343
"soundfile==0.13.1", # infinity
44-
"xgrammar==0.1.21", # sglang[all]==0.4.5 depends on xgrammar==0.1.17
44+
"xgrammar==0.1.23", # sglang[all]==0.4.5 depends on xgrammar==0.1.17
4545
"flashinfer-python==0.2.10",
46+
"outlines-core==0.2.10", # sglang 和 vllm 的冲突
4647
]
4748

4849
[project.scripts]

0 commit comments

Comments
 (0)