File tree Expand file tree Collapse file tree 3 files changed +4
-5
lines changed Expand file tree Collapse file tree 3 files changed +4
-5
lines changed Original file line number Diff line number Diff line change 19
19
# 解决vllm中 ray集群在 TP>1时死的Bug
20
20
import ray
21
21
22
- ray .init (ignore_reinit_error = True , num_cpus = 4 )
23
-
24
- os .environ ["VLLM_USE_V1" ] = "1"
22
+ ray .init (ignore_reinit_error = True , num_cpus = 8 )
25
23
26
24
27
25
class VllmBackend (ModelBackend ):
Original file line number Diff line number Diff line change @@ -259,6 +259,7 @@ def run(cls):
259
259
logger .remove (0 )
260
260
log_level = os .getenv ("log_level" , "WARNING" )
261
261
logger .add (sys .stderr , level = log_level )
262
+ os .environ ["VLLM_USE_V1" ] = "0"
262
263
263
264
host = args .host
264
265
controller_address = args .controller_address
Original file line number Diff line number Diff line change 10
10
11
11
root_dir = os .path .dirname (os .path .dirname (os .path .dirname (__file__ )))
12
12
13
-
14
- os .environ ["VLLM_USE_V1" ] = "1"
15
13
import httpx
16
14
from fastapi import HTTPException
17
15
import base64
@@ -69,6 +67,7 @@ def __init__(
69
67
model_type = "tts" ,
70
68
)
71
69
backend = os .environ ["backend" ]
70
+ gpu_memory_utilization = float (os .getenv ("gpu_memory_utilization" , 0.6 ))
72
71
self .engine = AutoEngine (
73
72
model_path = model_path ,
74
73
max_length = 32768 ,
@@ -79,6 +78,7 @@ def __init__(
79
78
wav2vec_attn_implementation = "sdpa" , # 使用flash attn加速wav2vec
80
79
llm_gpu_memory_utilization = 0.6 ,
81
80
seed = 0 ,
81
+ llm_gpu_memory_utilization = gpu_memory_utilization ,
82
82
)
83
83
loop = asyncio .get_running_loop ()
84
84
# ------------- 添加声音 -------------
You can’t perform that action at this time.
0 commit comments