From fb38d2b4d650ffed4fb0586aae02a6bdee81c369 Mon Sep 17 00:00:00 2001 From: eric-higgins-ai Date: Thu, 17 Jul 2025 14:34:15 -0700 Subject: [PATCH 1/3] [Misc] pass Ray runtime env to engine core Signed-off-by: eric-higgins-ai --- vllm/config.py | 5 +++++ vllm/engine/arg_utils.py | 3 +++ vllm/executor/ray_utils.py | 6 +++--- 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/vllm/config.py b/vllm/config.py index 22f740171369..865be138fd0b 100644 --- a/vllm/config.py +++ b/vllm/config.py @@ -57,6 +57,7 @@ if TYPE_CHECKING: from _typeshed import DataclassInstance from ray.util.placement_group import PlacementGroup + from ray.runtime_env import RuntimeEnv from transformers.configuration_utils import PretrainedConfig import vllm.model_executor.layers.quantization as me_quant @@ -73,6 +74,7 @@ else: DataclassInstance = Any PlacementGroup = Any + RuntimeEnv = Any PretrainedConfig = Any ExecutorBase = Any QuantizationConfig = Any @@ -1902,6 +1904,9 @@ class ParallelConfig: placement_group: Optional["PlacementGroup"] = None """ray distributed model workers placement group.""" + runtime_env: Optional["RuntimeEnv"] = None + """ray runtime environment for distributed workers""" + distributed_executor_backend: Optional[Union[DistributedExecutorBackend, type["ExecutorBase"]]] = None """Backend to use for distributed model diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index ae5eb46fa967..d1667e78b87f 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -1088,12 +1088,14 @@ def create_engine_config( # we are in a Ray actor. If so, then the placement group will be # passed to spawned processes. placement_group = None + runtime_env = None if is_in_ray_actor(): import ray # This call initializes Ray automatically if it is not initialized, # but we should not do this here. placement_group = ray.util.get_current_placement_group() + runtime_env = ray.get_runtime_context().runtime_env data_parallel_external_lb = self.data_parallel_rank is not None if data_parallel_external_lb: @@ -1170,6 +1172,7 @@ def create_engine_config( disable_custom_all_reduce=self.disable_custom_all_reduce, ray_workers_use_nsight=self.ray_workers_use_nsight, placement_group=placement_group, + runtime_env=runtime_env, distributed_executor_backend=self.distributed_executor_backend, worker_cls=self.worker_cls, worker_extension_cls=self.worker_extension_cls, diff --git a/vllm/executor/ray_utils.py b/vllm/executor/ray_utils.py index c222f1609096..2bb75a5ffc77 100644 --- a/vllm/executor/ray_utils.py +++ b/vllm/executor/ray_utils.py @@ -288,14 +288,14 @@ def initialize_ray_cluster( elif current_platform.is_rocm() or current_platform.is_xpu(): # Try to connect existing ray instance and create a new one if not found try: - ray.init("auto") + ray.init("auto", runtime_env=parallel_config.runtime_env) except ConnectionError: logger.warning( "No existing RAY instance detected. " "A new instance will be launched with current node resources.") - ray.init(address=ray_address, num_gpus=parallel_config.world_size) + ray.init(address=ray_address, num_gpus=parallel_config.world_size, runtime_env=parallel_config.runtime_env) else: - ray.init(address=ray_address) + ray.init(address=ray_address, runtime_env=parallel_config.runtime_env) device_str = current_platform.ray_device_key if not device_str: From e098713e8e7bc2d8bf14ef9c015bc8e24e23448e Mon Sep 17 00:00:00 2001 From: eric-higgins-ai Date: Thu, 17 Jul 2025 14:58:24 -0700 Subject: [PATCH 2/3] fix pre-commit Signed-off-by: eric-higgins-ai --- vllm/config.py | 2 +- vllm/executor/ray_utils.py | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/vllm/config.py b/vllm/config.py index 865be138fd0b..80d7d4e17602 100644 --- a/vllm/config.py +++ b/vllm/config.py @@ -56,8 +56,8 @@ if TYPE_CHECKING: from _typeshed import DataclassInstance - from ray.util.placement_group import PlacementGroup from ray.runtime_env import RuntimeEnv + from ray.util.placement_group import PlacementGroup from transformers.configuration_utils import PretrainedConfig import vllm.model_executor.layers.quantization as me_quant diff --git a/vllm/executor/ray_utils.py b/vllm/executor/ray_utils.py index 2bb75a5ffc77..cec3729c4cce 100644 --- a/vllm/executor/ray_utils.py +++ b/vllm/executor/ray_utils.py @@ -293,7 +293,9 @@ def initialize_ray_cluster( logger.warning( "No existing RAY instance detected. " "A new instance will be launched with current node resources.") - ray.init(address=ray_address, num_gpus=parallel_config.world_size, runtime_env=parallel_config.runtime_env) + ray.init(address=ray_address, + num_gpus=parallel_config.world_size, + runtime_env=parallel_config.runtime_env) else: ray.init(address=ray_address, runtime_env=parallel_config.runtime_env) From 5cb10cb6080e7e5a8e12692a39d1e7acbec817ae Mon Sep 17 00:00:00 2001 From: eric-higgins-ai Date: Thu, 17 Jul 2025 15:14:27 -0700 Subject: [PATCH 3/3] oops Signed-off-by: eric-higgins-ai --- vllm/executor/ray_utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vllm/executor/ray_utils.py b/vllm/executor/ray_utils.py index cec3729c4cce..64f736d525ab 100644 --- a/vllm/executor/ray_utils.py +++ b/vllm/executor/ray_utils.py @@ -294,8 +294,8 @@ def initialize_ray_cluster( "No existing RAY instance detected. " "A new instance will be launched with current node resources.") ray.init(address=ray_address, - num_gpus=parallel_config.world_size, - runtime_env=parallel_config.runtime_env) + num_gpus=parallel_config.world_size, + runtime_env=parallel_config.runtime_env) else: ray.init(address=ray_address, runtime_env=parallel_config.runtime_env)