Skip to content

Commit f104ba8

Browse files
committed
修复信号量 未释放bug
1 parent fbeec3c commit f104ba8

File tree

2 files changed

+11
-18
lines changed

2 files changed

+11
-18
lines changed

gpt_server/model_worker/base/model_worker_base.py

Lines changed: 7 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -203,7 +203,7 @@ def get_worker(
203203
controller_addr: str = "http://localhost:21001",
204204
worker_id: str = str(uuid.uuid4())[:8],
205205
model_names: List[str] = [""],
206-
limit_worker_concurrency: int = 10000,
206+
limit_worker_concurrency: int = 512,
207207
conv_template: str = None, # type: ignore
208208
):
209209
worker = cls(
@@ -318,14 +318,9 @@ def acquire_worker_semaphore():
318318

319319

320320
def create_background_tasks(request_id):
321-
async def abort_request() -> None:
322-
await worker.backend.engine.abort(request_id)
323-
324321
background_tasks = BackgroundTasks()
325322
background_tasks.add_task(release_worker_semaphore)
326-
#
327-
if os.getenv("backend") == "vllm":
328-
background_tasks.add_task(abort_request)
323+
329324
return background_tasks
330325

331326

@@ -348,8 +343,8 @@ async def api_generate_stream(request: Request):
348343
params.pop("prompt")
349344
logger.debug(f"params {params}")
350345
generator = worker.generate_stream_gate(params)
351-
# background_tasks = create_background_tasks(request_id)
352-
return StreamingResponse(generator, background=None)
346+
background_tasks = create_background_tasks(request_id)
347+
return StreamingResponse(generator, background=background_tasks)
353348

354349

355350
@app.post("/worker_generate_voice_stream")
@@ -361,7 +356,7 @@ async def api_generate_stream(request: Request):
361356
params["request"] = request
362357
logger.debug(f"params {params}")
363358
generator = worker.generate_voice_stream(params)
364-
# background_tasks = create_background_tasks(request_id)
359+
background_tasks = create_background_tasks(request_id)
365360
response_format = params["response_format"]
366361
content_type = {
367362
"mp3": "audio/mpeg",
@@ -373,7 +368,7 @@ async def api_generate_stream(request: Request):
373368
}.get(response_format, f"audio/{response_format}")
374369
return StreamingResponse(
375370
generator,
376-
background=None,
371+
background=background_tasks,
377372
media_type=content_type,
378373
headers={
379374
"Content-Disposition": f"attachment; filename=speech.{response_format}",
@@ -395,8 +390,7 @@ async def api_generate(request: Request):
395390
logger.debug(f"params {params}")
396391
output = await worker.generate_gate(params)
397392
release_worker_semaphore()
398-
# if os.getenv("backend") == "vllm":
399-
# await worker.backend.engine.abort(request_id)
393+
400394
return JSONResponse(output)
401395

402396

gpt_server/serving/openai_api_server.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -508,7 +508,8 @@ async def chat_completion_stream_generator(
508508
try:
509509
error_code = content["error_code"]
510510
except Exception as e:
511-
print(content)
511+
logger.exception(f"发生异常 content:{content}")
512+
content["error_code"] = ErrorCode.INTERNAL_ERROR
512513
if content["error_code"] != 0:
513514
yield f"data: {json.dumps(content, ensure_ascii=False)}\n\n"
514515
yield "data: [DONE]\n\n"
@@ -678,16 +679,14 @@ async def generate_completion_stream_generator(
678679

679680

680681
async def generate_completion_stream(payload: Dict[str, Any], worker_addr: str):
681-
async with httpx.AsyncClient(
682-
limits=httpx.Limits(max_connections=1000, max_keepalive_connections=100)
683-
) as client:
682+
async with httpx.AsyncClient() as client:
684683
delimiter = b"\0"
685684
async with client.stream(
686685
"POST",
687686
worker_addr + "/worker_generate_stream",
688687
headers=headers,
689688
json=payload,
690-
timeout=WORKER_API_TIMEOUT,
689+
timeout=30,
691690
) as response:
692691
# content = await response.aread()
693692
buffer = b""

0 commit comments

Comments
 (0)