Skip to content
This repository was archived by the owner on Sep 4, 2025. It is now read-only.

Commit 7eebe8c

Browse files
authored
[distributed][misc] error on same VLLM_HOST_IP setting (vllm-project#7756)
1 parent 8678a69 commit 7eebe8c

File tree

2 files changed

+17
-1
lines changed

2 files changed

+17
-1
lines changed

vllm/envs.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,10 @@ def get_default_config_root():
137137
os.path.join(get_default_cache_root(), "vllm"),
138138
)),
139139

140-
# used in distributed environment to determine the master address
140+
# used in distributed environment to determine the ip address
141+
# of the current node, when the node has multiple network interfaces.
142+
# If you are using multi-node inference, you should set this differently
143+
# on each node.
141144
'VLLM_HOST_IP':
142145
lambda: os.getenv('VLLM_HOST_IP', "") or os.getenv("HOST_IP", ""),
143146

vllm/executor/ray_gpu_executor.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -218,6 +218,19 @@ def sort_by_driver_then_worker_ip(worker):
218218
for node_id, gpu_ids in node_gpus.items():
219219
node_gpus[node_id] = sorted(gpu_ids)
220220

221+
all_ips = set(worker_ips + [driver_ip])
222+
n_ips = len(all_ips)
223+
n_nodes = len(node_workers)
224+
225+
if n_nodes != n_ips:
226+
raise RuntimeError(
227+
f"Every node should have a unique IP address. Got {n_nodes}"
228+
f" nodes with node ids {list(node_workers.keys())} and "
229+
f"{n_ips} unique IP addresses {all_ips}. Please check your"
230+
" network configuration. If you set `VLLM_HOST_IP` or "
231+
"`HOST_IP` environment variable, make sure it is unique for"
232+
" each node.")
233+
221234
VLLM_INSTANCE_ID = get_vllm_instance_id()
222235

223236
# Set environment variables for the driver and workers.

0 commit comments

Comments
 (0)