|
36 | 36 | from vllm.logger import init_logger
|
37 | 37 | from vllm.platforms import CpuArchEnum, current_platform
|
38 | 38 | from vllm.plugins import load_general_plugins
|
| 39 | +from vllm.ray.lazy_utils import is_ray_initialized |
39 | 40 | from vllm.reasoning import ReasoningParserManager
|
40 | 41 | from vllm.test_utils import MODEL_WEIGHTS_S3_BUCKET, MODELS_ON_S3
|
41 | 42 | from vllm.transformers_utils.utils import check_gguf_file
|
@@ -1099,6 +1100,15 @@ def create_engine_config(
|
1099 | 1100 | kv_sharing_fast_prefill=self.kv_sharing_fast_prefill,
|
1100 | 1101 | )
|
1101 | 1102 |
|
| 1103 | + ray_runtime_env = None |
| 1104 | + if is_ray_initialized(): |
| 1105 | + # Ray Serve LLM calls `create_engine_config` in the context |
| 1106 | + # of a Ray task, therefore we check is_ray_initialized() |
| 1107 | + # as opposed to is_in_ray_actor(). |
| 1108 | + import ray |
| 1109 | + ray_runtime_env = ray.get_runtime_context().runtime_env |
| 1110 | + logger.info("Using ray runtime env: %s", ray_runtime_env) |
| 1111 | + |
1102 | 1112 | # Get the current placement group if Ray is initialized and
|
1103 | 1113 | # we are in a Ray actor. If so, then the placement group will be
|
1104 | 1114 | # passed to spawned processes.
|
@@ -1211,6 +1221,7 @@ def create_engine_config(
|
1211 | 1221 | max_parallel_loading_workers=self.max_parallel_loading_workers,
|
1212 | 1222 | disable_custom_all_reduce=self.disable_custom_all_reduce,
|
1213 | 1223 | ray_workers_use_nsight=self.ray_workers_use_nsight,
|
| 1224 | + ray_runtime_env=ray_runtime_env, |
1214 | 1225 | placement_group=placement_group,
|
1215 | 1226 | distributed_executor_backend=self.distributed_executor_backend,
|
1216 | 1227 | worker_cls=self.worker_cls,
|
|
0 commit comments