Skip to content
This repository was archived by the owner on Sep 4, 2025. It is now read-only.

Commit a65cb16

Browse files
authored
[MISC] Dump model runner inputs when crashing (vllm-project#8305)
1 parent 3fd2b0d commit a65cb16

File tree

4 files changed

+75
-1
lines changed

4 files changed

+75
-1
lines changed

.github/ISSUE_TEMPLATE/400-bug report.yml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,15 @@ body:
3030
</details>
3131
validations:
3232
required: true
33+
- type: textarea
34+
attributes:
35+
label: Model Input Dumps
36+
description: |
37+
If you are facing crashing due to illegal memory access or other issues with model execution, vLLM may dump the problematic input of the model. In this case, you will see the message `Error in model execution (input dumped to /tmp/err_xxx.pkl)`. If you see this message, please zip the file (because GitHub doesn't support .pkl file format) and upload it here. This will help us to reproduce the issue and facilitate the debugging process.
38+
placeholder: |
39+
Upload the dumped input file.
40+
validations:
41+
required: false
3342
- type: textarea
3443
attributes:
3544
label: 🐛 Describe the bug

tests/basic_correctness/test_basic_correctness.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,16 @@
33
Run `pytest tests/basic_correctness/test_basic_correctness.py`.
44
"""
55
import os
6+
import pickle
7+
import re
68
import weakref
9+
from unittest.mock import patch
710

811
import pytest
912

1013
from vllm import LLM
1114
from vllm.utils import is_hip
15+
from vllm.worker.model_runner import ModelInputForGPUWithSamplingMetadata
1216

1317
from ..models.utils import check_outputs_equal
1418

@@ -64,3 +68,29 @@ def test_models(
6468
name_0="hf",
6569
name_1="vllm",
6670
)
71+
72+
73+
def test_model_with_failure(vllm_runner) -> None:
74+
try:
75+
with patch("vllm.model_executor.models.opt.OPTForCausalLM.forward",
76+
side_effect=ValueError()):
77+
with pytest.raises(ValueError) as exc_info:
78+
vllm_runner("facebook/opt-125m",
79+
dtype="half",
80+
enforce_eager=False,
81+
gpu_memory_utilization=0.7)
82+
matches = re.search(r"input dumped to (.+).pkl",
83+
str(exc_info.value))
84+
assert matches is not None
85+
filename = f"{matches.group(1)}.pkl"
86+
87+
with open(filename, "rb") as filep:
88+
inputs = pickle.load(filep)
89+
90+
if any(key not in inputs for key in ("arg_1", "arg_2", "arg_3")):
91+
raise AssertionError("Missing keys in dumped inputs. Dumped keys: "
92+
f"{list(inputs.keys())}")
93+
assert isinstance(inputs["arg_1"],
94+
ModelInputForGPUWithSamplingMetadata)
95+
finally:
96+
os.remove(filename)

vllm/worker/model_runner.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@
5353
_add_attn_metadata_broadcastable_dict,
5454
_add_sampling_metadata_broadcastable_dict,
5555
_init_attn_metadata_from_tensor_dict,
56-
_init_sampling_metadata_from_tensor_dict)
56+
_init_sampling_metadata_from_tensor_dict, dump_input_when_exception)
5757

5858
if TYPE_CHECKING:
5959
from vllm.attention.backends.abstract import AttentionBackend
@@ -1489,6 +1489,7 @@ def prepare_model_input(
14891489
virtual_engine=virtual_engine)
14901490

14911491
@torch.inference_mode()
1492+
@dump_input_when_exception(exclude_args=[0], exclude_kwargs=["self"])
14921493
def execute_model(
14931494
self,
14941495
model_input: ModelInputForGPUWithSamplingMetadata,

vllm/worker/model_runner_base.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
import dataclasses
2+
import pickle
23
from abc import ABC, abstractmethod
4+
from datetime import datetime
5+
from functools import wraps
36
from typing import (TYPE_CHECKING, Any, Dict, Generic, List, Optional, Type,
47
TypeVar)
58

@@ -98,6 +101,37 @@ def _init_frozen_model_input_from_tensor_dict(
98101
return tensor_dict
99102

100103

104+
def dump_input_when_exception(exclude_args: Optional[List[int]] = None,
105+
exclude_kwargs: Optional[List[str]] = None):
106+
107+
def _inner(func):
108+
109+
@wraps(func)
110+
def _wrapper(*args, **kwargs):
111+
try:
112+
return func(*args, **kwargs)
113+
except Exception as err:
114+
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
115+
filename = f"/tmp/err_{func.__name__}_input_{timestamp}.pkl"
116+
with open(filename, "wb") as filep:
117+
dumped_inputs = {
118+
k: v
119+
for k, v in kwargs.items()
120+
if k not in (exclude_kwargs or [])
121+
}
122+
for i, arg in enumerate(args):
123+
if i not in (exclude_args or []):
124+
dumped_inputs[f"arg_{i}"] = arg
125+
pickle.dump(dumped_inputs, filep)
126+
raise type(err)(
127+
f"Error in model execution (input dumped to {filename}): "
128+
f"{str(err)}") from err
129+
130+
return _wrapper
131+
132+
return _inner
133+
134+
101135
class BroadcastableModelInput(ABC):
102136

103137
@abstractmethod

0 commit comments

Comments
 (0)