Skip to content

Commit 3e70e3d

Browse files
authored
add(v1): RequestStatesStats to RequestOutput (vllm-project#24947)
Signed-off-by: huijjj <[email protected]>
1 parent eb0fa43 commit 3e70e3d

File tree

3 files changed

+24
-11
lines changed

3 files changed

+24
-11
lines changed

tests/entrypoints/llm/test_generate.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,3 +86,16 @@ def test_max_model_len():
8686
# It can be less if generation finishes due to other reasons (e.g., EOS)
8787
# before reaching the absolute model length limit.
8888
assert num_total_tokens <= max_model_len
89+
90+
91+
def test_log_stats():
92+
llm = LLM(
93+
model=MODEL_NAME,
94+
disable_log_stats=False,
95+
gpu_memory_utilization=0.10,
96+
enforce_eager=True, # reduce test time
97+
)
98+
outputs = llm.generate(PROMPTS, sampling_params=None)
99+
100+
# disable_log_stats is False, every output should have metrics
101+
assert all(output.metrics is not None for output in outputs)

vllm/outputs.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
from vllm.lora.request import LoRARequest
1515
from vllm.multimodal.inputs import MultiModalPlaceholderDict
1616
from vllm.sequence import RequestMetrics
17+
from vllm.v1.metrics.stats import RequestStateStats
1718

1819
logger = init_logger(__name__)
1920

@@ -108,7 +109,7 @@ def __init__(
108109
prompt_logprobs: Optional[PromptLogprobs],
109110
outputs: list[CompletionOutput],
110111
finished: bool,
111-
metrics: Optional[RequestMetrics] = None,
112+
metrics: Optional[Union[RequestMetrics, RequestStateStats]] = None,
112113
lora_request: Optional[LoRARequest] = None,
113114
encoder_prompt: Optional[str] = None,
114115
encoder_prompt_token_ids: Optional[list[int]] = None,

vllm/v1/engine/output_processor.py

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -248,16 +248,15 @@ def _new_request_output(
248248
if prompt_token_ids is None and self.prompt_embeds is not None:
249249
prompt_token_ids = [0] * len(self.prompt_embeds)
250250

251-
return RequestOutput(
252-
request_id=request_id,
253-
prompt=self.prompt,
254-
prompt_token_ids=prompt_token_ids,
255-
prompt_logprobs=prompt_logprobs,
256-
outputs=cast(list[CompletionOutput], outputs),
257-
finished=finished,
258-
kv_transfer_params=kv_transfer_params,
259-
num_cached_tokens=self.num_cached_tokens,
260-
)
251+
return RequestOutput(request_id=request_id,
252+
prompt=self.prompt,
253+
prompt_token_ids=prompt_token_ids,
254+
prompt_logprobs=prompt_logprobs,
255+
outputs=cast(list[CompletionOutput], outputs),
256+
finished=finished,
257+
kv_transfer_params=kv_transfer_params,
258+
num_cached_tokens=self.num_cached_tokens,
259+
metrics=self.stats)
261260

262261
def _new_completion_output(
263262
self,

0 commit comments

Comments
 (0)