Skip to content

Commit dc2f159

Browse files
authored
Dump input metadata on crash for async scheduling (#21258)
Signed-off-by: Woosuk Kwon <[email protected]>
1 parent d5b981f commit dc2f159

File tree

1 file changed

+14
-4
lines changed

1 file changed

+14
-4
lines changed

vllm/v1/engine/core.py

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -234,9 +234,14 @@ def abort_requests(self, request_ids: list[str]):
234234
self.scheduler.finish_requests(request_ids,
235235
RequestStatus.FINISHED_ABORTED)
236236

237-
def execute_model(self, scheduler_output: SchedulerOutput):
237+
def execute_model_with_error_logging(
238+
self,
239+
model_fn: Callable[[SchedulerOutput], ModelRunnerOutput],
240+
scheduler_output: SchedulerOutput,
241+
) -> ModelRunnerOutput:
242+
"""Execute the model and log detailed info on failure."""
238243
try:
239-
return self.model_executor.execute_model(scheduler_output)
244+
return model_fn(scheduler_output)
240245
except Exception as err:
241246
# We do not want to catch BaseException here since we're only
242247
# interested in dumping info when the exception is due to an
@@ -259,7 +264,9 @@ def step(self) -> tuple[dict[int, EngineCoreOutputs], bool]:
259264
if not self.scheduler.has_requests():
260265
return {}, False
261266
scheduler_output = self.scheduler.schedule()
262-
model_output = self.execute_model(scheduler_output)
267+
model_output = self.execute_model_with_error_logging(
268+
self.model_executor.execute_model, # type: ignore
269+
scheduler_output)
263270
engine_core_outputs = self.scheduler.update_from_output(
264271
scheduler_output, model_output) # type: ignore
265272

@@ -306,8 +313,11 @@ def step_with_batch_queue(
306313
# so we need more work.
307314
if not scheduled_batch and not self.batch_queue.empty():
308315
future, scheduler_output = self.batch_queue.get_nowait()
316+
309317
# Blocking until the first result is available.
310-
model_output = future.result()
318+
model_output = self.execute_model_with_error_logging(
319+
lambda _: future.result(), scheduler_output)
320+
311321
self.batch_queue.task_done()
312322
engine_core_outputs = (self.scheduler.update_from_output(
313323
scheduler_output, model_output))

0 commit comments

Comments
 (0)