@@ -203,7 +203,7 @@ def get_worker(
203
203
controller_addr : str = "http://localhost:21001" ,
204
204
worker_id : str = str (uuid .uuid4 ())[:8 ],
205
205
model_names : List [str ] = ["" ],
206
- limit_worker_concurrency : int = 10000 ,
206
+ limit_worker_concurrency : int = 512 ,
207
207
conv_template : str = None , # type: ignore
208
208
):
209
209
worker = cls (
@@ -318,14 +318,9 @@ def acquire_worker_semaphore():
318
318
319
319
320
320
def create_background_tasks (request_id ):
321
- async def abort_request () -> None :
322
- await worker .backend .engine .abort (request_id )
323
-
324
321
background_tasks = BackgroundTasks ()
325
322
background_tasks .add_task (release_worker_semaphore )
326
- #
327
- if os .getenv ("backend" ) == "vllm" :
328
- background_tasks .add_task (abort_request )
323
+
329
324
return background_tasks
330
325
331
326
@@ -348,8 +343,8 @@ async def api_generate_stream(request: Request):
348
343
params .pop ("prompt" )
349
344
logger .debug (f"params { params } " )
350
345
generator = worker .generate_stream_gate (params )
351
- # background_tasks = create_background_tasks(request_id)
352
- return StreamingResponse (generator , background = None )
346
+ background_tasks = create_background_tasks (request_id )
347
+ return StreamingResponse (generator , background = background_tasks )
353
348
354
349
355
350
@app .post ("/worker_generate_voice_stream" )
@@ -361,7 +356,7 @@ async def api_generate_stream(request: Request):
361
356
params ["request" ] = request
362
357
logger .debug (f"params { params } " )
363
358
generator = worker .generate_voice_stream (params )
364
- # background_tasks = create_background_tasks(request_id)
359
+ background_tasks = create_background_tasks (request_id )
365
360
response_format = params ["response_format" ]
366
361
content_type = {
367
362
"mp3" : "audio/mpeg" ,
@@ -373,7 +368,7 @@ async def api_generate_stream(request: Request):
373
368
}.get (response_format , f"audio/{ response_format } " )
374
369
return StreamingResponse (
375
370
generator ,
376
- background = None ,
371
+ background = background_tasks ,
377
372
media_type = content_type ,
378
373
headers = {
379
374
"Content-Disposition" : f"attachment; filename=speech.{ response_format } " ,
@@ -395,8 +390,7 @@ async def api_generate(request: Request):
395
390
logger .debug (f"params { params } " )
396
391
output = await worker .generate_gate (params )
397
392
release_worker_semaphore ()
398
- # if os.getenv("backend") == "vllm":
399
- # await worker.backend.engine.abort(request_id)
393
+
400
394
return JSONResponse (output )
401
395
402
396
0 commit comments