Skip to content

Error when using in Ray clusterΒ #36

@eugr

Description

@eugr

When running VLLM in Ray cluster across two nodes with -tp 2, getting the following error. Regular safetensor loading works:

(EngineCore_DP0 pid=2442) Process EngineCore_DP0:
(EngineCore_DP0 pid=2442) Traceback (most recent call last):
(EngineCore_DP0 pid=2442)   File "/usr/lib/python3.12/multiprocessing/process.py", line 314, in _bootstrap
(EngineCore_DP0 pid=2442)     self.run()
(EngineCore_DP0 pid=2442)   File "/usr/lib/python3.12/multiprocessing/process.py", line 108, in run
(EngineCore_DP0 pid=2442)     self._target(*self._args, **self._kwargs)
(EngineCore_DP0 pid=2442)   File "/usr/local/lib/python3.12/dist-packages/vllm/v1/engine/core.py", line 846, in run_engine_core
(EngineCore_DP0 pid=2442)     raise e
(EngineCore_DP0 pid=2442)   File "/usr/local/lib/python3.12/dist-packages/vllm/v1/engine/core.py", line 833, in run_engine_core
(EngineCore_DP0 pid=2442)     engine_core = EngineCoreProc(*args, **kwargs)
(EngineCore_DP0 pid=2442)                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=2442)   File "/usr/local/lib/python3.12/dist-packages/vllm/v1/engine/core.py", line 609, in __init__
(EngineCore_DP0 pid=2442)     super().__init__(
(EngineCore_DP0 pid=2442)   File "/usr/local/lib/python3.12/dist-packages/vllm/v1/engine/core.py", line 102, in __init__
(EngineCore_DP0 pid=2442)     self.model_executor = executor_class(vllm_config)
(EngineCore_DP0 pid=2442)                           ^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=2442)   File "/usr/local/lib/python3.12/dist-packages/vllm/v1/executor/abstract.py", line 101, in __init__
(EngineCore_DP0 pid=2442)     self._init_executor()
(EngineCore_DP0 pid=2442)   File "/usr/local/lib/python3.12/dist-packages/vllm/v1/executor/ray_executor.py", line 97, in _init_executor
(EngineCore_DP0 pid=2442)     self._init_workers_ray(placement_group)
(EngineCore_DP0 pid=2442)   File "/usr/local/lib/python3.12/dist-packages/vllm/v1/executor/ray_executor.py", line 371, in _init_workers_ray
(EngineCore_DP0 pid=2442)     self.collective_rpc("load_model")
(EngineCore_DP0 pid=2442)   File "/usr/local/lib/python3.12/dist-packages/vllm/v1/executor/ray_executor.py", line 493, in collective_rpc
(EngineCore_DP0 pid=2442)     return ray.get(ray_worker_outputs, timeout=timeout)
(EngineCore_DP0 pid=2442)            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=2442)   File "/usr/local/lib/python3.12/dist-packages/ray/_private/auto_init_hook.py", line 22, in auto_init_wrapper
(EngineCore_DP0 pid=2442)     return fn(*args, **kwargs)
(EngineCore_DP0 pid=2442)            ^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=2442)   File "/usr/local/lib/python3.12/dist-packages/ray/_private/client_mode_hook.py", line 104, in wrapper
(EngineCore_DP0 pid=2442)     return func(*args, **kwargs)
(EngineCore_DP0 pid=2442)            ^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=2442)   File "/usr/local/lib/python3.12/dist-packages/ray/_private/worker.py", line 2972, in get
(EngineCore_DP0 pid=2442)     values, debugger_breakpoint = worker.get_objects(
(EngineCore_DP0 pid=2442)                                   ^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=2442)   File "/usr/local/lib/python3.12/dist-packages/ray/_private/worker.py", line 1031, in get_objects
(EngineCore_DP0 pid=2442)     raise value.as_instanceof_cause()
(EngineCore_DP0 pid=2442) ray.exceptions.RayTaskError(AcceleratorError): ray::RayWorkerWrapper.execute_method() (pid=877, ip=192.168.177.12, actor_id=20efcf2913ef05e274cb90e503000000, repr=<vllm.v1.executor.ray_utils.RayWorkerWrapper object at 0xe2e315e5a4e0>)
(EngineCore_DP0 pid=2442)            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=2442)            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=2442)   File "/usr/local/lib/python3.12/dist-packages/vllm/v1/worker/worker_base.py", line 345, in execute_method
(EngineCore_DP0 pid=2442)     raise e
(EngineCore_DP0 pid=2442)   File "/usr/local/lib/python3.12/dist-packages/vllm/v1/worker/worker_base.py", line 334, in execute_method
(EngineCore_DP0 pid=2442)     return run_method(self, method, args, kwargs)
(EngineCore_DP0 pid=2442)            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=2442)   File "/usr/local/lib/python3.12/dist-packages/vllm/v1/serial_utils.py", line 479, in run_method
(EngineCore_DP0 pid=2442)     return func(*args, **kwargs)
(EngineCore_DP0 pid=2442)            ^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=2442)   File "/usr/local/lib/python3.12/dist-packages/vllm/v1/worker/gpu_worker.py", line 262, in load_model
(EngineCore_DP0 pid=2442)     self.model_runner.load_model(eep_scale_up=eep_scale_up)
(EngineCore_DP0 pid=2442)   File "/usr/local/lib/python3.12/dist-packages/vllm/v1/worker/gpu_model_runner.py", line 3276, in load_model
(EngineCore_DP0 pid=2442)     self.model = model_loader.load_model(
(EngineCore_DP0 pid=2442)                  ^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=2442)   File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/model_loader/base_loader.py", line 55, in load_model
(EngineCore_DP0 pid=2442)     self.load_weights(model, model_config)
(EngineCore_DP0 pid=2442)   File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/model_loader/default_loader.py", line 305, in load_weights
(EngineCore_DP0 pid=2442)     loaded_weights = model.load_weights(self.get_all_weights(model_config, model))
(EngineCore_DP0 pid=2442)                      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=2442)   File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/models/qwen3_vl.py", line 1670, in load_weights
(EngineCore_DP0 pid=2442)     return loader.load_weights(weights, mapper=self.hf_to_vllm_mapper)
(EngineCore_DP0 pid=2442)            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=2442)   File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/model_loader/online_quantization.py", line 173, in patched_model_load_weights
(EngineCore_DP0 pid=2442)     return original_load_weights(auto_weight_loader, weights, mapper=mapper)
(EngineCore_DP0 pid=2442)            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=2442)   File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/models/utils.py", line 336, in load_weights
(EngineCore_DP0 pid=2442)     autoloaded_weights = set(self._load_module("", self.module, weights))
(EngineCore_DP0 pid=2442)                          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=2442)   File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/models/utils.py", line 280, in _load_module
(EngineCore_DP0 pid=2442)     for child_prefix, child_weights in self._groupby_prefix(weights):
(EngineCore_DP0 pid=2442)   File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/models/utils.py", line 164, in _groupby_prefix
(EngineCore_DP0 pid=2442)     for prefix, group in itertools.groupby(weights_by_parts, key=lambda x: x[0][0]):
(EngineCore_DP0 pid=2442)   File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/models/utils.py", line 159, in <genexpr>
(EngineCore_DP0 pid=2442)     weights_by_parts = (
(EngineCore_DP0 pid=2442)                        ^
(EngineCore_DP0 pid=2442)   File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/models/utils.py", line 332, in <genexpr>
(EngineCore_DP0 pid=2442)     weights = (
(EngineCore_DP0 pid=2442)               ^
(EngineCore_DP0 pid=2442)   File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/models/utils.py", line 90, in <genexpr>
(EngineCore_DP0 pid=2442)     return (
(EngineCore_DP0 pid=2442)            ^
(EngineCore_DP0 pid=2442)   File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/model_loader/default_loader.py", line 277, in get_all_weights
(EngineCore_DP0 pid=2442)     yield from self._get_weights_iterator(primary_weights)
(EngineCore_DP0 pid=2442)   File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/model_loader/default_loader.py", line 263, in <genexpr>
(EngineCore_DP0 pid=2442)     return ((source.prefix + name, tensor) for (name, tensor) in weights_iterator)
(EngineCore_DP0 pid=2442)            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=2442)   File "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/model_loader/weight_utils.py", line 748, in fastsafetensors_weights_iterator
(EngineCore_DP0 pid=2442)     fb = loader.copy_files_to_device()
(EngineCore_DP0 pid=2442)          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=2442)   File "/usr/local/lib/python3.12/dist-packages/fastsafetensors/loader.py", line 135, in copy_files_to_device
(EngineCore_DP0 pid=2442)     self.framework.set_device(self.device)
(EngineCore_DP0 pid=2442)   File "/usr/local/lib/python3.12/dist-packages/fastsafetensors/frameworks/_torch.py", line 148, in set_device
(EngineCore_DP0 pid=2442)     torch.cuda.set_device(device.as_str())
(EngineCore_DP0 pid=2442)   File "/usr/local/lib/python3.12/dist-packages/torch/cuda/__init__.py", line 567, in set_device
(EngineCore_DP0 pid=2442)     torch._C._cuda_setDevice(device)
(EngineCore_DP0 pid=2442) torch.AcceleratorError: CUDA error: invalid device ordinal
(EngineCore_DP0 pid=2442) GPU device may be out of range, do you have enough GPUs?
(EngineCore_DP0 pid=2442) CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
(EngineCore_DP0 pid=2442) For debugging consider passing CUDA_LAUNCH_BLOCKING=1
(EngineCore_DP0 pid=2442) Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.
(EngineCore_DP0 pid=2442) INFO 11-25 00:52:57 [ray_executor.py:121] Shutting down Ray distributed executor. If you see error log from logging.cc regarding SIGTERM received, please ignore because this is the expected termination process in Ray.

Metadata

Metadata

Labels

No labels
No labels

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions