File tree Expand file tree Collapse file tree 3 files changed +4
-5
lines changed
Expand file tree Collapse file tree 3 files changed +4
-5
lines changed Original file line number Diff line number Diff line change 1919# 解决vllm中 ray集群在 TP>1时死的Bug
2020import ray
2121
22- ray .init (ignore_reinit_error = True , num_cpus = 4 )
23-
24- os .environ ["VLLM_USE_V1" ] = "1"
22+ ray .init (ignore_reinit_error = True , num_cpus = 8 )
2523
2624
2725class VllmBackend (ModelBackend ):
Original file line number Diff line number Diff line change @@ -259,6 +259,7 @@ def run(cls):
259259 logger .remove (0 )
260260 log_level = os .getenv ("log_level" , "WARNING" )
261261 logger .add (sys .stderr , level = log_level )
262+ os .environ ["VLLM_USE_V1" ] = "0"
262263
263264 host = args .host
264265 controller_address = args .controller_address
Original file line number Diff line number Diff line change 1010
1111root_dir = os .path .dirname (os .path .dirname (os .path .dirname (__file__ )))
1212
13-
14- os .environ ["VLLM_USE_V1" ] = "1"
1513import httpx
1614from fastapi import HTTPException
1715import base64
@@ -69,6 +67,7 @@ def __init__(
6967 model_type = "tts" ,
7068 )
7169 backend = os .environ ["backend" ]
70+ gpu_memory_utilization = float (os .getenv ("gpu_memory_utilization" , 0.6 ))
7271 self .engine = AutoEngine (
7372 model_path = model_path ,
7473 max_length = 32768 ,
@@ -79,6 +78,7 @@ def __init__(
7978 wav2vec_attn_implementation = "sdpa" , # 使用flash attn加速wav2vec
8079 llm_gpu_memory_utilization = 0.6 ,
8180 seed = 0 ,
81+ llm_gpu_memory_utilization = gpu_memory_utilization ,
8282 )
8383 loop = asyncio .get_running_loop ()
8484 # ------------- 添加声音 -------------
You can’t perform that action at this time.
0 commit comments