vllm-project · eric-higgins-ai · Aug 10, 2025 · Aug 10, 2025 · Aug 10, 2025 · Aug 10, 2025
diff --git a/vllm/executor/ray_utils.py b/vllm/executor/ray_utils.py
@@ -8,6 +8,7 @@
 
 import msgspec
 
+import vllm.envs as envs
 import vllm.platforms
 from vllm.config import ParallelConfig
 from vllm.executor.msgspec_utils import decode_hook, encode_hook
@@ -338,6 +339,7 @@
     else:
         logger.info("No current placement group found. "
                     "Creating a new placement group.")
+        device_resource_request = envs.VLLM_RAY_PER_WORKER_GPUS
         num_devices_in_cluster = ray.cluster_resources().get(device_str, 0)
         # Log a warning message and delay resource allocation failure response.
         # Avoid immediate rejection to allow user-initiated placement group
@@ -349,7 +351,8 @@
                 device_str)
         # Create a new placement group
         placement_group_specs: List[Dict[str, float]] = ([{
-            device_str: 1.0
+            device_str:
+            device_resource_request
         } for _ in range(parallel_config.world_size)])
 
         # vLLM engine is also a worker to execute model with an accelerator,
@@ -358,12 +361,13 @@
         current_ip = get_ip()
         current_node_id = ray.get_runtime_context().get_node_id()
         current_node_resource = available_resources_per_node()[current_node_id]
-        if current_node_resource.get(device_str, 0) < 1:
+        if current_node_resource.get(device_str, 0) < device_resource_request:
             raise ValueError(
                 f"Current node has no {device_str} available. "
                 f"{current_node_resource=}. vLLM engine cannot start without "
-                f"{device_str}. Make sure you have at least 1 {device_str} "
-                f"available in a node {current_node_id=} {current_ip=}.")
+                f"{device_str}. Make sure you have at least {device_resource_request} "
+                f"{device_str} available in a node {current_node_id=} {current_ip=}."
+            )
         # This way, at least bundle is required to be created in a current
         # node.
         placement_group_specs[0][f"node:{current_ip}"] = 0.001