Skip to content

Commit baece8c

Browse files
authored
[Frontend] Add unix domain socket support (#18097)
Signed-off-by: <[email protected]> Signed-off-by: yyw <[email protected]>
1 parent 2fcf6b2 commit baece8c

File tree

5 files changed

+86
-16
lines changed

5 files changed

+86
-16
lines changed

docs/cli/README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,9 @@ Start the vLLM OpenAI Compatible API server.
2929
# Specify the port
3030
vllm serve meta-llama/Llama-2-7b-hf --port 8100
3131

32+
# Serve over a Unix domain socket
33+
vllm serve meta-llama/Llama-2-7b-hf --uds /tmp/vllm.sock
34+
3235
# Check with --help for more options
3336
# To list all groups
3437
vllm serve --help=listgroup

tests/entrypoints/openai/test_uds.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
# SPDX-License-Identifier: Apache-2.0
2+
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3+
4+
from tempfile import TemporaryDirectory
5+
6+
import httpx
7+
import pytest
8+
9+
from vllm.version import __version__ as VLLM_VERSION
10+
11+
from ...utils import RemoteOpenAIServer
12+
13+
MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"
14+
15+
16+
@pytest.fixture(scope="module")
17+
def server():
18+
with TemporaryDirectory() as tmpdir:
19+
args = [
20+
# use half precision for speed and memory savings in CI environment
21+
"--dtype",
22+
"bfloat16",
23+
"--max-model-len",
24+
"8192",
25+
"--enforce-eager",
26+
"--max-num-seqs",
27+
"128",
28+
"--uds",
29+
f"{tmpdir}/vllm.sock",
30+
]
31+
32+
with RemoteOpenAIServer(MODEL_NAME, args) as remote_server:
33+
yield remote_server
34+
35+
36+
@pytest.mark.asyncio
37+
async def test_show_version(server: RemoteOpenAIServer):
38+
transport = httpx.HTTPTransport(uds=server.uds)
39+
client = httpx.Client(transport=transport)
40+
response = client.get(server.url_for("version"))
41+
response.raise_for_status()
42+
43+
assert response.json() == {"version": VLLM_VERSION}

tests/utils.py

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
from typing import Any, Callable, Literal, Optional, Union
1818

1919
import cloudpickle
20+
import httpx
2021
import openai
2122
import pytest
2223
import requests
@@ -88,10 +89,12 @@ def __init__(self,
8889
raise ValueError("You have manually specified the port "
8990
"when `auto_port=True`.")
9091

91-
# Don't mutate the input args
92-
vllm_serve_args = vllm_serve_args + [
93-
"--port", str(get_open_port())
94-
]
92+
# No need for a port if using unix sockets
93+
if "--uds" not in vllm_serve_args:
94+
# Don't mutate the input args
95+
vllm_serve_args = vllm_serve_args + [
96+
"--port", str(get_open_port())
97+
]
9598
if seed is not None:
9699
if "--seed" in vllm_serve_args:
97100
raise ValueError("You have manually specified the seed "
@@ -104,8 +107,13 @@ def __init__(self,
104107
subparsers = parser.add_subparsers(required=False, dest="subparser")
105108
parser = ServeSubcommand().subparser_init(subparsers)
106109
args = parser.parse_args(["--model", model, *vllm_serve_args])
107-
self.host = str(args.host or 'localhost')
108-
self.port = int(args.port)
110+
self.uds = args.uds
111+
if args.uds:
112+
self.host = None
113+
self.port = None
114+
else:
115+
self.host = str(args.host or 'localhost')
116+
self.port = int(args.port)
109117

110118
self.show_hidden_metrics = \
111119
args.show_hidden_metrics_for_version is not None
@@ -150,9 +158,11 @@ def __exit__(self, exc_type, exc_value, traceback):
150158
def _wait_for_server(self, *, url: str, timeout: float):
151159
# run health check
152160
start = time.time()
161+
client = (httpx.Client(transport=httpx.HTTPTransport(
162+
uds=self.uds)) if self.uds else requests)
153163
while True:
154164
try:
155-
if requests.get(url).status_code == 200:
165+
if client.get(url).status_code == 200:
156166
break
157167
except Exception:
158168
# this exception can only be raised by requests.get,
@@ -170,7 +180,8 @@ def _wait_for_server(self, *, url: str, timeout: float):
170180

171181
@property
172182
def url_root(self) -> str:
173-
return f"http://{self.host}:{self.port}"
183+
return (f"http://{self.uds.split('/')[-1]}"
184+
if self.uds else f"http://{self.host}:{self.port}")
174185

175186
def url_for(self, *parts: str) -> str:
176187
return self.url_root + "/" + "/".join(parts)

vllm/entrypoints/openai/api_server.py

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1777,6 +1777,12 @@ def create_server_socket(addr: tuple[str, int]) -> socket.socket:
17771777
return sock
17781778

17791779

1780+
def create_server_unix_socket(path: str) -> socket.socket:
1781+
sock = socket.socket(family=socket.AF_UNIX, type=socket.SOCK_STREAM)
1782+
sock.bind(path)
1783+
return sock
1784+
1785+
17801786
def validate_api_server_args(args):
17811787
valid_tool_parses = ToolParserManager.tool_parsers.keys()
17821788
if args.enable_auto_tool_choice \
@@ -1807,8 +1813,11 @@ def setup_server(args):
18071813
# workaround to make sure that we bind the port before the engine is set up.
18081814
# This avoids race conditions with ray.
18091815
# see https://github.com/vllm-project/vllm/issues/8204
1810-
sock_addr = (args.host or "", args.port)
1811-
sock = create_server_socket(sock_addr)
1816+
if args.uds:
1817+
sock = create_server_unix_socket(args.uds)
1818+
else:
1819+
sock_addr = (args.host or "", args.port)
1820+
sock = create_server_socket(sock_addr)
18121821

18131822
# workaround to avoid footguns where uvicorn drops requests with too
18141823
# many concurrent requests active
@@ -1820,12 +1829,14 @@ def signal_handler(*_) -> None:
18201829

18211830
signal.signal(signal.SIGTERM, signal_handler)
18221831

1823-
addr, port = sock_addr
1824-
is_ssl = args.ssl_keyfile and args.ssl_certfile
1825-
host_part = f"[{addr}]" if is_valid_ipv6_address(
1826-
addr) else addr or "0.0.0.0"
1827-
listen_address = f"http{'s' if is_ssl else ''}://{host_part}:{port}"
1828-
1832+
if args.uds:
1833+
listen_address = f"unix:{args.uds}"
1834+
else:
1835+
addr, port = sock_addr
1836+
is_ssl = args.ssl_keyfile and args.ssl_certfile
1837+
host_part = f"[{addr}]" if is_valid_ipv6_address(
1838+
addr) else addr or "0.0.0.0"
1839+
listen_address = f"http{'s' if is_ssl else ''}://{host_part}:{port}"
18291840
return listen_address, sock
18301841

18311842

vllm/entrypoints/openai/cli_args.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,8 @@ class FrontendArgs:
7272
"""Host name."""
7373
port: int = 8000
7474
"""Port number."""
75+
uds: Optional[str] = None
76+
"""Unix domain socket path. If set, host and port arguments are ignored."""
7577
uvicorn_log_level: Literal["debug", "info", "warning", "error", "critical",
7678
"trace"] = "info"
7779
"""Log level for uvicorn."""

0 commit comments

Comments
 (0)