Skip to content

Commit 178d4ba

Browse files
jberkhahndtrifiro
authored andcommitted
create socket before starting the engine and pass socket into the vllm server on startup
1 parent 197f101 commit 178d4ba

File tree

2 files changed

+14
-1
lines changed

2 files changed

+14
-1
lines changed

src/vllm_tgis_adapter/__main__.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
import vllm
1212
from vllm.entrypoints.openai.api_server import (
1313
build_async_engine_client,
14+
create_server_socket,
1415
)
1516
from vllm.entrypoints.openai.cli_args import make_arg_parser
1617
from vllm.utils import FlexibleArgumentParser
@@ -32,12 +33,18 @@
3233
async def start_servers(args: argparse.Namespace) -> None:
3334
loop = asyncio.get_running_loop()
3435

36+
# workaround to make sure that we bind the port before the engine is set up.
37+
# This avoids race conditions with ray.
38+
# see https://github.com/vllm-project/vllm/issues/8204
39+
sock_addr = (args.host or "", args.port)
40+
sock = create_server_socket(sock_addr)
41+
3542
tasks: list[asyncio.Task] = []
3643
async with build_async_engine_client(args) as engine:
3744
add_logging_wrappers(engine)
3845

3946
http_server_task = loop.create_task(
40-
run_http_server(args, engine),
47+
run_http_server(args, engine, sock),
4148
name="http_server",
4249
)
4350
# The http server task will catch interrupt signals for us

src/vllm_tgis_adapter/http.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
if TYPE_CHECKING:
1313
import argparse
14+
import socket
1415

1516
from fastapi import Request, Response
1617
from vllm.engine.async_llm_engine import AsyncLLMEngine
@@ -24,6 +25,7 @@
2425
async def run_http_server(
2526
args: argparse.Namespace,
2627
engine: AsyncLLMEngine | AsyncEngineClient,
28+
sock: socket.socket | None = None,
2729
**uvicorn_kwargs, # noqa: ANN003
2830
) -> None:
2931
# modified copy of vllm.entrypoints.openai.api_server.run_server that
@@ -63,6 +65,10 @@ async def set_correlation_id(request: Request, call_next: Callable) -> Response:
6365
}
6466
serve_kwargs.update(uvicorn_kwargs)
6567

68+
# should only be used in versions of vllm >= 0.7.3
69+
if "sock" in inspect.getfullargspec(serve_http).args:
70+
serve_kwargs["sock"] = sock
71+
6672
shutdown_coro = await serve_http(app, **serve_kwargs)
6773

6874
# launcher.serve_http returns a shutdown coroutine to await

0 commit comments

Comments
 (0)