Skip to content

Commit cd9e5b8

Browse files
authored
Fix V1 engine serialization error with Ray distributed executor (vllm-project#26148)
Signed-off-by: Nikhil Ghosh <[email protected]>
1 parent 300a59c commit cd9e5b8

File tree

1 file changed

+6
-0
lines changed

1 file changed

+6
-0
lines changed

vllm/executor/ray_utils.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
from vllm.platforms import current_platform
1717
from vllm.sequence import ExecuteModelRequest, IntermediateTensors
1818
from vllm.utils import get_ip
19+
from vllm.v1.outputs import AsyncModelRunnerOutput
1920
from vllm.v1.worker.worker_base import WorkerWrapperBase
2021

2122
if TYPE_CHECKING:
@@ -142,6 +143,11 @@ def execute_model_ray(
142143
# but may still be finished requests.
143144
assert not output or not output.req_ids
144145
output = scheduler_output, None
146+
# Ensure outputs crossing Ray compiled DAG are serializable.
147+
# AsyncModelRunnerOutput holds CUDA events and cannot be
148+
# pickled.
149+
if isinstance(output, AsyncModelRunnerOutput):
150+
output = output.get_output()
145151
return output
146152

147153
def override_env_vars(self, vars: Dict[str, str]):

0 commit comments

Comments
 (0)