16
16
from vllm .sequence import ExecuteModelRequest , IntermediateTensors
17
17
from vllm .utils import get_ip
18
18
from vllm .worker .worker_base import WorkerWrapperBase
19
+ import vllm .envs as envs
19
20
20
21
if TYPE_CHECKING :
21
22
from vllm .v1 .core .sched .output import SchedulerOutput
@@ -338,6 +339,7 @@ def initialize_ray_cluster(
338
339
else :
339
340
logger .info ("No current placement group found. "
340
341
"Creating a new placement group." )
342
+ device_resource_request = envs .VLLM_RAY_PER_WORKER_GPUS
341
343
num_devices_in_cluster = ray .cluster_resources ().get (device_str , 0 )
342
344
# Log a warning message and delay resource allocation failure response.
343
345
# Avoid immediate rejection to allow user-initiated placement group
@@ -349,7 +351,7 @@ def initialize_ray_cluster(
349
351
device_str )
350
352
# Create a new placement group
351
353
placement_group_specs : List [Dict [str , float ]] = ([{
352
- device_str : 1.0
354
+ device_str : device_resource_request
353
355
} for _ in range (parallel_config .world_size )])
354
356
355
357
# vLLM engine is also a worker to execute model with an accelerator,
@@ -358,11 +360,11 @@ def initialize_ray_cluster(
358
360
current_ip = get_ip ()
359
361
current_node_id = ray .get_runtime_context ().get_node_id ()
360
362
current_node_resource = available_resources_per_node ()[current_node_id ]
361
- if current_node_resource .get (device_str , 0 ) < 1 :
363
+ if current_node_resource .get (device_str , 0 ) < device_resource_request :
362
364
raise ValueError (
363
365
f"Current node has no { device_str } available. "
364
366
f"{ current_node_resource = } . vLLM engine cannot start without "
365
- f"{ device_str } . Make sure you have at least 1 { device_str } "
367
+ f"{ device_str } . Make sure you have at least { device_resource_request } { device_str } "
366
368
f"available in a node { current_node_id = } { current_ip = } ." )
367
369
# This way, at least bundle is required to be created in a current
368
370
# node.
0 commit comments