File tree Expand file tree Collapse file tree 1 file changed +6
-0
lines changed Expand file tree Collapse file tree 1 file changed +6
-0
lines changed Original file line number Diff line number Diff line change 16
16
from vllm .platforms import current_platform
17
17
from vllm .sequence import ExecuteModelRequest , IntermediateTensors
18
18
from vllm .utils import get_ip
19
+ from vllm .v1 .outputs import AsyncModelRunnerOutput
19
20
from vllm .v1 .worker .worker_base import WorkerWrapperBase
20
21
21
22
if TYPE_CHECKING :
@@ -142,6 +143,11 @@ def execute_model_ray(
142
143
# but may still be finished requests.
143
144
assert not output or not output .req_ids
144
145
output = scheduler_output , None
146
+ # Ensure outputs crossing Ray compiled DAG are serializable.
147
+ # AsyncModelRunnerOutput holds CUDA events and cannot be
148
+ # pickled.
149
+ if isinstance (output , AsyncModelRunnerOutput ):
150
+ output = output .get_output ()
145
151
return output
146
152
147
153
def override_env_vars (self , vars : Dict [str , str ]):
You can’t perform that action at this time.
0 commit comments