Skip to content

Commit 6be5f50

Browse files
committed
fix log spew on drain exit; fix Ctrl-C killing workers immediately when TP>1 by isolating process with setpgrp()
Signed-off-by: Will Eaton <[email protected]>
1 parent 2d06d95 commit 6be5f50

File tree

2 files changed

+27
-16
lines changed

2 files changed

+27
-16
lines changed
Lines changed: 22 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# SPDX-License-Identifier: Apache-2.0
22
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
33

4-
from collections.abc import Awaitable
4+
import asyncio
55

66
from fastapi.responses import JSONResponse
77
from starlette.types import ASGIApp, Receive, Scope, Send
@@ -23,26 +23,34 @@ def set_rejecting_requests(value: bool) -> None:
2323

2424

2525
class ServiceUnavailableMiddleware:
26-
"""
27-
Middleware that checks if the server is currently unavailable
28-
(e.g., scaling or draining) and returns a 503 Service Unavailable.
29-
30-
"""
26+
"""Returns 503 during drain; suppresses CancelledError on force-exit."""
3127

3228
def __init__(self, app: ASGIApp) -> None:
3329
self.app = app
3430

35-
def __call__(self, scope: Scope, receive: Receive, send: Send) -> Awaitable[None]:
31+
async def __call__(self, scope: Scope, receive: Receive, send: Send) -> None:
3632
if scope["type"] != "http":
37-
return self.app(scope, receive, send)
33+
await self.app(scope, receive, send)
34+
return
3835

39-
path = scope.get("path", "")
40-
rejecting = is_rejecting_requests()
41-
if rejecting and path not in _EXEMPT_PATHS:
36+
if is_rejecting_requests() and scope.get("path", "") not in _EXEMPT_PATHS:
4237
response = JSONResponse(
4338
content={"error": "Server is unavailable. Please try again later."},
4439
status_code=503,
4540
)
46-
return response(scope, receive, send)
47-
48-
return self.app(scope, receive, send)
41+
await response(scope, receive, send)
42+
return
43+
44+
try:
45+
await self.app(scope, receive, send)
46+
except asyncio.CancelledError:
47+
if not is_rejecting_requests():
48+
raise
49+
try:
50+
response = JSONResponse(
51+
content={"error": "Server is shutting down."},
52+
status_code=503,
53+
)
54+
await response(scope, receive, send)
55+
except (Exception, asyncio.CancelledError):
56+
pass

vllm/v1/engine/core.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -991,9 +991,12 @@ def signal_handler(*_):
991991
(EngineCoreRequestType.SHUTDOWN, None)
992992
)
993993

994-
# Either SIGTERM or SIGINT will terminate the engine_core
995994
signal.signal(signal.SIGTERM, signal_handler)
996-
signal.signal(signal.SIGINT, signal_handler)
995+
if shutdown_pipe is not None:
996+
# Isolate from terminal Ctrl-C; parent uses shutdown pipe.
997+
os.setpgrp()
998+
else:
999+
signal.signal(signal.SIGINT, signal_handler)
9971000
try:
9981001
vllm_config: VllmConfig = kwargs["vllm_config"]
9991002
parallel_config: ParallelConfig = vllm_config.parallel_config

0 commit comments

Comments
 (0)