@@ -203,7 +203,7 @@ def get_worker(
203203 controller_addr : str = "http://localhost:21001" ,
204204 worker_id : str = str (uuid .uuid4 ())[:8 ],
205205 model_names : List [str ] = ["" ],
206- limit_worker_concurrency : int = 10000 ,
206+ limit_worker_concurrency : int = 512 ,
207207 conv_template : str = None , # type: ignore
208208 ):
209209 worker = cls (
@@ -318,14 +318,9 @@ def acquire_worker_semaphore():
318318
319319
320320def create_background_tasks (request_id ):
321- async def abort_request () -> None :
322- await worker .backend .engine .abort (request_id )
323-
324321 background_tasks = BackgroundTasks ()
325322 background_tasks .add_task (release_worker_semaphore )
326- #
327- if os .getenv ("backend" ) == "vllm" :
328- background_tasks .add_task (abort_request )
323+
329324 return background_tasks
330325
331326
@@ -348,8 +343,8 @@ async def api_generate_stream(request: Request):
348343 params .pop ("prompt" )
349344 logger .debug (f"params { params } " )
350345 generator = worker .generate_stream_gate (params )
351- # background_tasks = create_background_tasks(request_id)
352- return StreamingResponse (generator , background = None )
346+ background_tasks = create_background_tasks (request_id )
347+ return StreamingResponse (generator , background = background_tasks )
353348
354349
355350@app .post ("/worker_generate_voice_stream" )
@@ -361,7 +356,7 @@ async def api_generate_stream(request: Request):
361356 params ["request" ] = request
362357 logger .debug (f"params { params } " )
363358 generator = worker .generate_voice_stream (params )
364- # background_tasks = create_background_tasks(request_id)
359+ background_tasks = create_background_tasks (request_id )
365360 response_format = params ["response_format" ]
366361 content_type = {
367362 "mp3" : "audio/mpeg" ,
@@ -373,7 +368,7 @@ async def api_generate_stream(request: Request):
373368 }.get (response_format , f"audio/{ response_format } " )
374369 return StreamingResponse (
375370 generator ,
376- background = None ,
371+ background = background_tasks ,
377372 media_type = content_type ,
378373 headers = {
379374 "Content-Disposition" : f"attachment; filename=speech.{ response_format } " ,
@@ -395,8 +390,7 @@ async def api_generate(request: Request):
395390 logger .debug (f"params { params } " )
396391 output = await worker .generate_gate (params )
397392 release_worker_semaphore ()
398- # if os.getenv("backend") == "vllm":
399- # await worker.backend.engine.abort(request_id)
393+
400394 return JSONResponse (output )
401395
402396
0 commit comments