File tree Expand file tree Collapse file tree 7 files changed +341
-1130
lines changed
Expand file tree Collapse file tree 7 files changed +341
-1130
lines changed Original file line number Diff line number Diff line change 1- 3.10
1+ 3.11
Original file line number Diff line number Diff line change 1+ from typing import Optional
2+ from flashtts .llm .vllm_generator import VllmGenerator
3+ import flashtts
4+ from loguru import logger
5+
6+
7+ class VllmGenerator_ (VllmGenerator ):
8+ def __init__ (
9+ self ,
10+ model_path : str ,
11+ max_length : int = 32768 ,
12+ gpu_memory_utilization : float = 0.6 ,
13+ device : str = "cuda" ,
14+ stop_tokens : Optional [list [str ]] = None ,
15+ stop_token_ids : Optional [list [int ]] = None ,
16+ ** kwargs ,
17+ ):
18+ from vllm import AsyncEngineArgs , AsyncLLMEngine
19+
20+ engine_kwargs = dict (
21+ model = model_path ,
22+ max_model_len = max_length ,
23+ gpu_memory_utilization = gpu_memory_utilization ,
24+ # device=device,
25+ disable_log_stats = True ,
26+ # disable_log_requests=True,
27+ ** kwargs ,
28+ )
29+ async_args = AsyncEngineArgs (** engine_kwargs )
30+
31+ self .model = AsyncLLMEngine .from_engine_args (async_args )
32+
33+ super (VllmGenerator , self ).__init__ (
34+ tokenizer = model_path ,
35+ max_length = max_length ,
36+ stop_tokens = stop_tokens ,
37+ stop_token_ids = stop_token_ids ,
38+ )
39+
40+
41+ def pitch_flashtts ():
42+ flashtts .llm .vllm_generator .VllmGenerator = VllmGenerator_
43+ logger .info ("patch flashtts.llm.vllm_generator.VllmGenerator" )
Original file line number Diff line number Diff line change @@ -285,7 +285,7 @@ def run(cls):
285285 logger .remove (0 )
286286 log_level = os .getenv ("log_level" , "WARNING" )
287287 logger .add (sys .stderr , level = log_level )
288- os . environ [ "VLLM_USE_V1" ] = "0"
288+
289289
290290 host = args .host
291291 controller_address = args .controller_address
Original file line number Diff line number Diff line change 33import os
44from typing import List
55from loguru import logger
6+ from gpt_server .model_handler .pitch import pitch_flashtts
7+
8+ pitch_flashtts ()
69from gpt_server .model_worker .base .model_worker_base import ModelWorkerBase
710from gpt_server .model_worker .utils import load_base64_or_url
811from flashtts .engine import AutoEngine
912from flashtts .server .utils .audio_writer import StreamingAudioWriter
1013
1114root_dir = os .path .dirname (os .path .dirname (os .path .dirname (__file__ )))
15+ # os.environ["VLLM_USE_V1"] = "0"
1216
1317
1418class SparkTTSWorker (ModelWorkerBase ):
Original file line number Diff line number Diff line change 11[project ]
22name = " gpt_server"
3- version = " 0.6.0 "
3+ version = " 0.6.1 "
44description = " gpt_server是一个用于生产级部署LLMs或Embedding的开源框架。"
55readme = " README.md"
66license = { text = " Apache 2.0" }
77authors = [{
name =
" Yu Liu" ,
email =
" [email protected] " }]
8- requires-python = " >=3.10 "
8+ requires-python = " >=3.11 "
99dependencies = [
1010 " accelerate>=1.0.1" ,
1111 " fastapi==0.115.0" ,
@@ -14,18 +14,18 @@ dependencies = [
1414 " infinity-emb[all]==0.0.76" ,
1515 " lmdeploy==0.9.2" ,
1616 " loguru>=0.7.2" ,
17- " openai==1.86.0 " ,
17+ " openai==1.99.1 " ,
1818 " setuptools==75.2.0" ,
1919 " streamlit==1.39.0" ,
2020 " torch==2.6.0" ,
2121 " torchvision==0.20.1" ,
22- " vllm==0.9.2 " ,
22+ " vllm==0.10.1 " ,
2323 " qwen_vl_utils" ,
2424 " evalscope[perf,rag]==0.16.1" ,
2525 " modelscope==1.26.0" ,
2626 " edge-tts>=7.0.0" ,
2727 " funasr>=1.2.6" ,
28- " sglang[all]>=0.4.10" ,
28+ " sglang[all]>=0.4.10.post2 " ,
2929 " flashinfer-python" ,
3030 " flashtts>=0.1.7" ,
3131 " diffusers>=0.33.1" ,
@@ -41,8 +41,9 @@ override-dependencies = [
4141 " triton" ,
4242 " transformers==4.53.3" , # infinity-emb
4343 " soundfile==0.13.1" , # infinity
44- " xgrammar==0.1.21 " , # sglang[all]==0.4.5 depends on xgrammar==0.1.17
44+ " xgrammar==0.1.23 " , # sglang[all]==0.4.5 depends on xgrammar==0.1.17
4545 " flashinfer-python==0.2.10" ,
46+ " outlines-core==0.2.10" , # sglang 和 vllm 的冲突
4647]
4748
4849[project .scripts ]
You can’t perform that action at this time.
0 commit comments