Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions vllm/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@

if TYPE_CHECKING:
from _typeshed import DataclassInstance
from ray.runtime_env import RuntimeEnv
from ray.util.placement_group import PlacementGroup
from transformers.configuration_utils import PretrainedConfig

Expand All @@ -73,6 +74,7 @@
else:
DataclassInstance = Any
PlacementGroup = Any
RuntimeEnv = Any
PretrainedConfig = Any
ExecutorBase = Any
QuantizationConfig = Any
Expand Down Expand Up @@ -1902,6 +1904,9 @@ class ParallelConfig:
placement_group: Optional["PlacementGroup"] = None
"""ray distributed model workers placement group."""

runtime_env: Optional["RuntimeEnv"] = None
"""ray runtime environment for distributed workers"""

distributed_executor_backend: Optional[Union[DistributedExecutorBackend,
type["ExecutorBase"]]] = None
"""Backend to use for distributed model
Expand Down
3 changes: 3 additions & 0 deletions vllm/engine/arg_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1088,12 +1088,14 @@ def create_engine_config(
# we are in a Ray actor. If so, then the placement group will be
# passed to spawned processes.
placement_group = None
runtime_env = None
if is_in_ray_actor():
import ray

# This call initializes Ray automatically if it is not initialized,
# but we should not do this here.
placement_group = ray.util.get_current_placement_group()
runtime_env = ray.get_runtime_context().runtime_env
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what are passed in the runtime env?


data_parallel_external_lb = self.data_parallel_rank is not None
if data_parallel_external_lb:
Expand Down Expand Up @@ -1170,6 +1172,7 @@ def create_engine_config(
disable_custom_all_reduce=self.disable_custom_all_reduce,
ray_workers_use_nsight=self.ray_workers_use_nsight,
placement_group=placement_group,
runtime_env=runtime_env,
distributed_executor_backend=self.distributed_executor_backend,
worker_cls=self.worker_cls,
worker_extension_cls=self.worker_extension_cls,
Expand Down
8 changes: 5 additions & 3 deletions vllm/executor/ray_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,14 +288,16 @@ def initialize_ray_cluster(
elif current_platform.is_rocm() or current_platform.is_xpu():
# Try to connect existing ray instance and create a new one if not found
try:
ray.init("auto")
ray.init("auto", runtime_env=parallel_config.runtime_env)
except ConnectionError:
logger.warning(
"No existing RAY instance detected. "
"A new instance will be launched with current node resources.")
ray.init(address=ray_address, num_gpus=parallel_config.world_size)
ray.init(address=ray_address,
num_gpus=parallel_config.world_size,
runtime_env=parallel_config.runtime_env)
else:
ray.init(address=ray_address)
ray.init(address=ray_address, runtime_env=parallel_config.runtime_env)

device_str = current_platform.ray_device_key
if not device_str:
Expand Down