Skip to content

Commit 8e8e0b6

Browse files
authored
feat: Add --enable-log-outputs flag for logging model generations (#20707)
Signed-off-by: Adrian Garcia <[email protected]>
1 parent 82216dc commit 8e8e0b6

File tree

5 files changed

+412
-26
lines changed

5 files changed

+412
-26
lines changed

tests/test_logger.py

Lines changed: 248 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,12 @@
1010
from json.decoder import JSONDecodeError
1111
from tempfile import NamedTemporaryFile
1212
from typing import Any
13-
from unittest.mock import patch
13+
from unittest.mock import MagicMock, patch
1414
from uuid import uuid4
1515

1616
import pytest
1717

18+
from vllm.entrypoints.logger import RequestLogger
1819
from vllm.logger import (_DATE_FORMAT, _FORMAT, _configure_vllm_root_logger,
1920
enable_trace_function_call, init_logger)
2021
from vllm.logging_utils import NewLineFormatter
@@ -228,9 +229,10 @@ def test_prepare_object_to_dump():
228229
list_obj = [1, 2, 3]
229230
assert prepare_object_to_dump(list_obj) == '[1, 2, 3]'
230231

231-
dict_obj = {'a': 1, 'b': 'b'}
232+
dict_obj = {"a": 1, "b": "b"}
232233
assert prepare_object_to_dump(dict_obj) in [
233-
"{a: 1, b: 'b'}", "{b: 'b', a: 1}"
234+
"{a: 1, b: 'b'}",
235+
"{b: 'b', a: 1}",
234236
]
235237

236238
set_obj = {1, 2, 3}
@@ -252,4 +254,246 @@ class CustomClass:
252254
b: str
253255

254256
assert (prepare_object_to_dump(CustomClass(
255-
1, 'b')) == "CustomClass(a=1, b='b')")
257+
1, "b")) == "CustomClass(a=1, b='b')")
258+
259+
260+
def test_request_logger_log_outputs():
261+
"""Test the new log_outputs functionality."""
262+
# Create a mock logger to capture log calls
263+
mock_logger = MagicMock()
264+
265+
with patch("vllm.entrypoints.logger.logger", mock_logger):
266+
request_logger = RequestLogger(max_log_len=None)
267+
268+
# Test basic output logging
269+
request_logger.log_outputs(
270+
request_id="test-123",
271+
outputs="Hello, world!",
272+
output_token_ids=[1, 2, 3, 4],
273+
finish_reason="stop",
274+
is_streaming=False,
275+
delta=False,
276+
)
277+
278+
mock_logger.info.assert_called_once()
279+
call_args = mock_logger.info.call_args.args
280+
assert "Generated response %s%s" in call_args[0]
281+
assert call_args[1] == "test-123"
282+
assert call_args[3] == "Hello, world!"
283+
assert call_args[4] == [1, 2, 3, 4]
284+
assert call_args[5] == "stop"
285+
286+
287+
def test_request_logger_log_outputs_streaming_delta():
288+
"""Test log_outputs with streaming delta mode."""
289+
mock_logger = MagicMock()
290+
291+
with patch("vllm.entrypoints.logger.logger", mock_logger):
292+
request_logger = RequestLogger(max_log_len=None)
293+
294+
# Test streaming delta logging
295+
request_logger.log_outputs(
296+
request_id="test-456",
297+
outputs="Hello",
298+
output_token_ids=[1],
299+
finish_reason=None,
300+
is_streaming=True,
301+
delta=True,
302+
)
303+
304+
mock_logger.info.assert_called_once()
305+
call_args = mock_logger.info.call_args.args
306+
assert "Generated response %s%s" in call_args[0]
307+
assert call_args[1] == "test-456"
308+
assert call_args[2] == " (streaming delta)"
309+
assert call_args[3] == "Hello"
310+
assert call_args[4] == [1]
311+
assert call_args[5] is None
312+
313+
314+
def test_request_logger_log_outputs_streaming_complete():
315+
"""Test log_outputs with streaming complete mode."""
316+
mock_logger = MagicMock()
317+
318+
with patch("vllm.entrypoints.logger.logger", mock_logger):
319+
request_logger = RequestLogger(max_log_len=None)
320+
321+
# Test streaming complete logging
322+
request_logger.log_outputs(
323+
request_id="test-789",
324+
outputs="Complete response",
325+
output_token_ids=[1, 2, 3],
326+
finish_reason="length",
327+
is_streaming=True,
328+
delta=False,
329+
)
330+
331+
mock_logger.info.assert_called_once()
332+
call_args = mock_logger.info.call_args.args
333+
assert "Generated response %s%s" in call_args[0]
334+
assert call_args[1] == "test-789"
335+
assert call_args[2] == " (streaming complete)"
336+
assert call_args[3] == "Complete response"
337+
assert call_args[4] == [1, 2, 3]
338+
assert call_args[5] == "length"
339+
340+
341+
def test_request_logger_log_outputs_with_truncation():
342+
"""Test log_outputs respects max_log_len setting."""
343+
mock_logger = MagicMock()
344+
345+
with patch("vllm.entrypoints.logger.logger", mock_logger):
346+
# Set max_log_len to 10
347+
request_logger = RequestLogger(max_log_len=10)
348+
349+
# Test output truncation
350+
long_output = "This is a very long output that should be truncated"
351+
long_token_ids = list(range(20)) # 20 tokens
352+
353+
request_logger.log_outputs(
354+
request_id="test-truncate",
355+
outputs=long_output,
356+
output_token_ids=long_token_ids,
357+
finish_reason="stop",
358+
is_streaming=False,
359+
delta=False,
360+
)
361+
362+
mock_logger.info.assert_called_once()
363+
call_args = mock_logger.info.call_args
364+
365+
# Check that output was truncated to first 10 characters
366+
logged_output = call_args[0][3]
367+
assert logged_output == "This is a "
368+
assert len(logged_output) == 10
369+
370+
# Check that token IDs were truncated to first 10 tokens
371+
logged_token_ids = call_args[0][4]
372+
assert logged_token_ids == list(range(10))
373+
assert len(logged_token_ids) == 10
374+
375+
376+
def test_request_logger_log_outputs_none_values():
377+
"""Test log_outputs handles None values correctly."""
378+
mock_logger = MagicMock()
379+
380+
with patch("vllm.entrypoints.logger.logger", mock_logger):
381+
request_logger = RequestLogger(max_log_len=None)
382+
383+
# Test with None output_token_ids
384+
request_logger.log_outputs(
385+
request_id="test-none",
386+
outputs="Test output",
387+
output_token_ids=None,
388+
finish_reason="stop",
389+
is_streaming=False,
390+
delta=False,
391+
)
392+
393+
mock_logger.info.assert_called_once()
394+
call_args = mock_logger.info.call_args.args
395+
assert "Generated response %s%s" in call_args[0]
396+
assert call_args[1] == "test-none"
397+
assert call_args[3] == "Test output"
398+
assert call_args[4] is None
399+
assert call_args[5] == "stop"
400+
401+
402+
def test_request_logger_log_outputs_empty_output():
403+
"""Test log_outputs handles empty output correctly."""
404+
mock_logger = MagicMock()
405+
406+
with patch("vllm.entrypoints.logger.logger", mock_logger):
407+
request_logger = RequestLogger(max_log_len=5)
408+
409+
# Test with empty output
410+
request_logger.log_outputs(
411+
request_id="test-empty",
412+
outputs="",
413+
output_token_ids=[],
414+
finish_reason="stop",
415+
is_streaming=False,
416+
delta=False,
417+
)
418+
419+
mock_logger.info.assert_called_once()
420+
call_args = mock_logger.info.call_args.args
421+
assert "Generated response %s%s" in call_args[0]
422+
assert call_args[1] == "test-empty"
423+
assert call_args[3] == ""
424+
assert call_args[4] == []
425+
assert call_args[5] == "stop"
426+
427+
428+
def test_request_logger_log_outputs_integration():
429+
"""Test that log_outputs can be called alongside log_inputs."""
430+
mock_logger = MagicMock()
431+
432+
with patch("vllm.entrypoints.logger.logger", mock_logger):
433+
request_logger = RequestLogger(max_log_len=None)
434+
435+
# Test that both methods can be called without interference
436+
request_logger.log_inputs(
437+
request_id="test-integration",
438+
prompt="Test prompt",
439+
prompt_token_ids=[1, 2, 3],
440+
prompt_embeds=None,
441+
params=None,
442+
lora_request=None,
443+
)
444+
445+
request_logger.log_outputs(
446+
request_id="test-integration",
447+
outputs="Test output",
448+
output_token_ids=[4, 5, 6],
449+
finish_reason="stop",
450+
is_streaming=False,
451+
delta=False,
452+
)
453+
454+
# Should have been called twice - once for inputs, once for outputs
455+
assert mock_logger.info.call_count == 2
456+
457+
# Check that the calls were made with correct patterns
458+
input_call = mock_logger.info.call_args_list[0][0]
459+
output_call = mock_logger.info.call_args_list[1][0]
460+
461+
assert "Received request %s" in input_call[0]
462+
assert input_call[1] == "test-integration"
463+
464+
assert "Generated response %s%s" in output_call[0]
465+
assert output_call[1] == "test-integration"
466+
467+
468+
def test_streaming_complete_logs_full_text_content():
469+
"""Test that streaming complete logging includes
470+
full accumulated text, not just token count."""
471+
mock_logger = MagicMock()
472+
473+
with patch("vllm.entrypoints.logger.logger", mock_logger):
474+
request_logger = RequestLogger(max_log_len=None)
475+
476+
# Test with actual content instead of token count format
477+
full_response = "This is a complete response from streaming"
478+
request_logger.log_outputs(
479+
request_id="test-streaming-full-text",
480+
outputs=full_response,
481+
output_token_ids=None,
482+
finish_reason="streaming_complete",
483+
is_streaming=True,
484+
delta=False,
485+
)
486+
487+
mock_logger.info.assert_called_once()
488+
call_args = mock_logger.info.call_args.args
489+
490+
# Verify the logged output is the full text, not a token count format
491+
logged_output = call_args[3]
492+
assert logged_output == full_response
493+
assert "tokens>" not in logged_output
494+
assert "streaming_complete" not in logged_output
495+
496+
# Verify other parameters
497+
assert call_args[1] == "test-streaming-full-text"
498+
assert call_args[2] == " (streaming complete)"
499+
assert call_args[5] == "streaming_complete"

vllm/entrypoints/logger.py

Lines changed: 34 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# SPDX-License-Identifier: Apache-2.0
22
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
33

4+
from collections.abc import Sequence
45
from typing import Optional, Union
56

67
import torch
@@ -16,8 +17,6 @@
1617
class RequestLogger:
1718

1819
def __init__(self, *, max_log_len: Optional[int]) -> None:
19-
super().__init__()
20-
2120
self.max_log_len = max_log_len
2221

2322
def log_inputs(
@@ -45,3 +44,36 @@ def log_inputs(
4544
"lora_request: %s.", request_id, prompt, params, prompt_token_ids,
4645
prompt_embeds.shape if prompt_embeds is not None else None,
4746
lora_request)
47+
48+
def log_outputs(
49+
self,
50+
request_id: str,
51+
outputs: str,
52+
output_token_ids: Optional[Sequence[int]],
53+
finish_reason: Optional[str] = None,
54+
is_streaming: bool = False,
55+
delta: bool = False,
56+
) -> None:
57+
max_log_len = self.max_log_len
58+
if max_log_len is not None:
59+
if outputs is not None:
60+
outputs = outputs[:max_log_len]
61+
62+
if output_token_ids is not None:
63+
# Convert to list and apply truncation
64+
output_token_ids = list(output_token_ids)[:max_log_len]
65+
66+
stream_info = ""
67+
if is_streaming:
68+
stream_info = (" (streaming delta)"
69+
if delta else " (streaming complete)")
70+
71+
logger.info(
72+
"Generated response %s%s: output: %r, "
73+
"output_token_ids: %s, finish_reason: %s",
74+
request_id,
75+
stream_info,
76+
outputs,
77+
output_token_ids,
78+
finish_reason,
79+
)

vllm/entrypoints/openai/cli_args.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,10 +44,10 @@ def __call__(
4444

4545
lora_list: list[LoRAModulePath] = []
4646
for item in values:
47-
if item in [None, '']: # Skip if item is None or empty string
47+
if item in [None, ""]: # Skip if item is None or empty string
4848
continue
49-
if '=' in item and ',' not in item: # Old format: name=path
50-
name, path = item.split('=')
49+
if "=" in item and "," not in item: # Old format: name=path
50+
name, path = item.split("=")
5151
lora_list.append(LoRAModulePath(name, path))
5252
else: # Assume JSON format
5353
try:
@@ -167,6 +167,9 @@ class FrontendArgs:
167167
enable_tokenizer_info_endpoint: bool = False
168168
"""Enable the /get_tokenizer_info endpoint. May expose chat
169169
templates and other tokenizer configuration."""
170+
enable_log_outputs: bool = False
171+
"""If set to True, enable logging of model outputs (generations)
172+
in addition to the input logging that is enabled by default."""
170173

171174
@staticmethod
172175
def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:

0 commit comments

Comments
 (0)