Skip to content

Commit 565650b

Browse files
committed
fix log spew on drain exit; fix Ctrl-C killing workers immediately when TP>1 by isolating process with setpgrp()
Signed-off-by: Will Eaton <weaton@redhat.com>
1 parent 2d06d95 commit 565650b

File tree

2 files changed

+26
-12
lines changed

2 files changed

+26
-12
lines changed

vllm/entrypoints/serve/middleware.py

Lines changed: 21 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# SPDX-License-Identifier: Apache-2.0
22
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
33

4-
from collections.abc import Awaitable
4+
import asyncio
55

66
from fastapi.responses import JSONResponse
77
from starlette.types import ASGIApp, Receive, Scope, Send
@@ -26,23 +26,34 @@ class ServiceUnavailableMiddleware:
2626
"""
2727
Middleware that checks if the server is currently unavailable
2828
(e.g., scaling or draining) and returns a 503 Service Unavailable.
29-
3029
"""
3130

3231
def __init__(self, app: ASGIApp) -> None:
3332
self.app = app
3433

35-
def __call__(self, scope: Scope, receive: Receive, send: Send) -> Awaitable[None]:
34+
async def __call__(self, scope: Scope, receive: Receive, send: Send) -> None:
3635
if scope["type"] != "http":
37-
return self.app(scope, receive, send)
36+
await self.app(scope, receive, send)
37+
return
3838

39-
path = scope.get("path", "")
40-
rejecting = is_rejecting_requests()
41-
if rejecting and path not in _EXEMPT_PATHS:
39+
if is_rejecting_requests() and scope.get("path", "") not in _EXEMPT_PATHS:
4240
response = JSONResponse(
4341
content={"error": "Server is unavailable. Please try again later."},
4442
status_code=503,
4543
)
46-
return response(scope, receive, send)
47-
48-
return self.app(scope, receive, send)
44+
await response(scope, receive, send)
45+
return
46+
47+
try:
48+
await self.app(scope, receive, send)
49+
except asyncio.CancelledError:
50+
if not is_rejecting_requests():
51+
raise
52+
try:
53+
response = JSONResponse(
54+
content={"error": "Server is shutting down."},
55+
status_code=503,
56+
)
57+
await response(scope, receive, send)
58+
except (Exception, asyncio.CancelledError):
59+
pass

vllm/v1/engine/core.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -991,9 +991,12 @@ def signal_handler(*_):
991991
(EngineCoreRequestType.SHUTDOWN, None)
992992
)
993993

994-
# Either SIGTERM or SIGINT will terminate the engine_core
995994
signal.signal(signal.SIGTERM, signal_handler)
996-
signal.signal(signal.SIGINT, signal_handler)
995+
if shutdown_pipe is not None:
996+
# Isolate from terminal Ctrl-C; parent uses shutdown pipe.
997+
os.setpgrp()
998+
else:
999+
signal.signal(signal.SIGINT, signal_handler)
9971000
try:
9981001
vllm_config: VllmConfig = kwargs["vllm_config"]
9991002
parallel_config: ParallelConfig = vllm_config.parallel_config

0 commit comments

Comments
 (0)