10
10
from json .decoder import JSONDecodeError
11
11
from tempfile import NamedTemporaryFile
12
12
from typing import Any
13
- from unittest .mock import patch
13
+ from unittest .mock import MagicMock , patch
14
14
from uuid import uuid4
15
15
16
16
import pytest
17
17
18
+ from vllm .entrypoints .logger import RequestLogger
18
19
from vllm .logger import (_DATE_FORMAT , _FORMAT , _configure_vllm_root_logger ,
19
20
enable_trace_function_call , init_logger )
20
21
from vllm .logging_utils import NewLineFormatter
@@ -228,9 +229,10 @@ def test_prepare_object_to_dump():
228
229
list_obj = [1 , 2 , 3 ]
229
230
assert prepare_object_to_dump (list_obj ) == '[1, 2, 3]'
230
231
231
- dict_obj = {'a' : 1 , 'b' : 'b' }
232
+ dict_obj = {"a" : 1 , "b" : "b" }
232
233
assert prepare_object_to_dump (dict_obj ) in [
233
- "{a: 1, b: 'b'}" , "{b: 'b', a: 1}"
234
+ "{a: 1, b: 'b'}" ,
235
+ "{b: 'b', a: 1}" ,
234
236
]
235
237
236
238
set_obj = {1 , 2 , 3 }
@@ -252,4 +254,246 @@ class CustomClass:
252
254
b : str
253
255
254
256
assert (prepare_object_to_dump (CustomClass (
255
- 1 , 'b' )) == "CustomClass(a=1, b='b')" )
257
+ 1 , "b" )) == "CustomClass(a=1, b='b')" )
258
+
259
+
260
+ def test_request_logger_log_outputs ():
261
+ """Test the new log_outputs functionality."""
262
+ # Create a mock logger to capture log calls
263
+ mock_logger = MagicMock ()
264
+
265
+ with patch ("vllm.entrypoints.logger.logger" , mock_logger ):
266
+ request_logger = RequestLogger (max_log_len = None )
267
+
268
+ # Test basic output logging
269
+ request_logger .log_outputs (
270
+ request_id = "test-123" ,
271
+ outputs = "Hello, world!" ,
272
+ output_token_ids = [1 , 2 , 3 , 4 ],
273
+ finish_reason = "stop" ,
274
+ is_streaming = False ,
275
+ delta = False ,
276
+ )
277
+
278
+ mock_logger .info .assert_called_once ()
279
+ call_args = mock_logger .info .call_args .args
280
+ assert "Generated response %s%s" in call_args [0 ]
281
+ assert call_args [1 ] == "test-123"
282
+ assert call_args [3 ] == "Hello, world!"
283
+ assert call_args [4 ] == [1 , 2 , 3 , 4 ]
284
+ assert call_args [5 ] == "stop"
285
+
286
+
287
+ def test_request_logger_log_outputs_streaming_delta ():
288
+ """Test log_outputs with streaming delta mode."""
289
+ mock_logger = MagicMock ()
290
+
291
+ with patch ("vllm.entrypoints.logger.logger" , mock_logger ):
292
+ request_logger = RequestLogger (max_log_len = None )
293
+
294
+ # Test streaming delta logging
295
+ request_logger .log_outputs (
296
+ request_id = "test-456" ,
297
+ outputs = "Hello" ,
298
+ output_token_ids = [1 ],
299
+ finish_reason = None ,
300
+ is_streaming = True ,
301
+ delta = True ,
302
+ )
303
+
304
+ mock_logger .info .assert_called_once ()
305
+ call_args = mock_logger .info .call_args .args
306
+ assert "Generated response %s%s" in call_args [0 ]
307
+ assert call_args [1 ] == "test-456"
308
+ assert call_args [2 ] == " (streaming delta)"
309
+ assert call_args [3 ] == "Hello"
310
+ assert call_args [4 ] == [1 ]
311
+ assert call_args [5 ] is None
312
+
313
+
314
+ def test_request_logger_log_outputs_streaming_complete ():
315
+ """Test log_outputs with streaming complete mode."""
316
+ mock_logger = MagicMock ()
317
+
318
+ with patch ("vllm.entrypoints.logger.logger" , mock_logger ):
319
+ request_logger = RequestLogger (max_log_len = None )
320
+
321
+ # Test streaming complete logging
322
+ request_logger .log_outputs (
323
+ request_id = "test-789" ,
324
+ outputs = "Complete response" ,
325
+ output_token_ids = [1 , 2 , 3 ],
326
+ finish_reason = "length" ,
327
+ is_streaming = True ,
328
+ delta = False ,
329
+ )
330
+
331
+ mock_logger .info .assert_called_once ()
332
+ call_args = mock_logger .info .call_args .args
333
+ assert "Generated response %s%s" in call_args [0 ]
334
+ assert call_args [1 ] == "test-789"
335
+ assert call_args [2 ] == " (streaming complete)"
336
+ assert call_args [3 ] == "Complete response"
337
+ assert call_args [4 ] == [1 , 2 , 3 ]
338
+ assert call_args [5 ] == "length"
339
+
340
+
341
+ def test_request_logger_log_outputs_with_truncation ():
342
+ """Test log_outputs respects max_log_len setting."""
343
+ mock_logger = MagicMock ()
344
+
345
+ with patch ("vllm.entrypoints.logger.logger" , mock_logger ):
346
+ # Set max_log_len to 10
347
+ request_logger = RequestLogger (max_log_len = 10 )
348
+
349
+ # Test output truncation
350
+ long_output = "This is a very long output that should be truncated"
351
+ long_token_ids = list (range (20 )) # 20 tokens
352
+
353
+ request_logger .log_outputs (
354
+ request_id = "test-truncate" ,
355
+ outputs = long_output ,
356
+ output_token_ids = long_token_ids ,
357
+ finish_reason = "stop" ,
358
+ is_streaming = False ,
359
+ delta = False ,
360
+ )
361
+
362
+ mock_logger .info .assert_called_once ()
363
+ call_args = mock_logger .info .call_args
364
+
365
+ # Check that output was truncated to first 10 characters
366
+ logged_output = call_args [0 ][3 ]
367
+ assert logged_output == "This is a "
368
+ assert len (logged_output ) == 10
369
+
370
+ # Check that token IDs were truncated to first 10 tokens
371
+ logged_token_ids = call_args [0 ][4 ]
372
+ assert logged_token_ids == list (range (10 ))
373
+ assert len (logged_token_ids ) == 10
374
+
375
+
376
+ def test_request_logger_log_outputs_none_values ():
377
+ """Test log_outputs handles None values correctly."""
378
+ mock_logger = MagicMock ()
379
+
380
+ with patch ("vllm.entrypoints.logger.logger" , mock_logger ):
381
+ request_logger = RequestLogger (max_log_len = None )
382
+
383
+ # Test with None output_token_ids
384
+ request_logger .log_outputs (
385
+ request_id = "test-none" ,
386
+ outputs = "Test output" ,
387
+ output_token_ids = None ,
388
+ finish_reason = "stop" ,
389
+ is_streaming = False ,
390
+ delta = False ,
391
+ )
392
+
393
+ mock_logger .info .assert_called_once ()
394
+ call_args = mock_logger .info .call_args .args
395
+ assert "Generated response %s%s" in call_args [0 ]
396
+ assert call_args [1 ] == "test-none"
397
+ assert call_args [3 ] == "Test output"
398
+ assert call_args [4 ] is None
399
+ assert call_args [5 ] == "stop"
400
+
401
+
402
+ def test_request_logger_log_outputs_empty_output ():
403
+ """Test log_outputs handles empty output correctly."""
404
+ mock_logger = MagicMock ()
405
+
406
+ with patch ("vllm.entrypoints.logger.logger" , mock_logger ):
407
+ request_logger = RequestLogger (max_log_len = 5 )
408
+
409
+ # Test with empty output
410
+ request_logger .log_outputs (
411
+ request_id = "test-empty" ,
412
+ outputs = "" ,
413
+ output_token_ids = [],
414
+ finish_reason = "stop" ,
415
+ is_streaming = False ,
416
+ delta = False ,
417
+ )
418
+
419
+ mock_logger .info .assert_called_once ()
420
+ call_args = mock_logger .info .call_args .args
421
+ assert "Generated response %s%s" in call_args [0 ]
422
+ assert call_args [1 ] == "test-empty"
423
+ assert call_args [3 ] == ""
424
+ assert call_args [4 ] == []
425
+ assert call_args [5 ] == "stop"
426
+
427
+
428
+ def test_request_logger_log_outputs_integration ():
429
+ """Test that log_outputs can be called alongside log_inputs."""
430
+ mock_logger = MagicMock ()
431
+
432
+ with patch ("vllm.entrypoints.logger.logger" , mock_logger ):
433
+ request_logger = RequestLogger (max_log_len = None )
434
+
435
+ # Test that both methods can be called without interference
436
+ request_logger .log_inputs (
437
+ request_id = "test-integration" ,
438
+ prompt = "Test prompt" ,
439
+ prompt_token_ids = [1 , 2 , 3 ],
440
+ prompt_embeds = None ,
441
+ params = None ,
442
+ lora_request = None ,
443
+ )
444
+
445
+ request_logger .log_outputs (
446
+ request_id = "test-integration" ,
447
+ outputs = "Test output" ,
448
+ output_token_ids = [4 , 5 , 6 ],
449
+ finish_reason = "stop" ,
450
+ is_streaming = False ,
451
+ delta = False ,
452
+ )
453
+
454
+ # Should have been called twice - once for inputs, once for outputs
455
+ assert mock_logger .info .call_count == 2
456
+
457
+ # Check that the calls were made with correct patterns
458
+ input_call = mock_logger .info .call_args_list [0 ][0 ]
459
+ output_call = mock_logger .info .call_args_list [1 ][0 ]
460
+
461
+ assert "Received request %s" in input_call [0 ]
462
+ assert input_call [1 ] == "test-integration"
463
+
464
+ assert "Generated response %s%s" in output_call [0 ]
465
+ assert output_call [1 ] == "test-integration"
466
+
467
+
468
+ def test_streaming_complete_logs_full_text_content ():
469
+ """Test that streaming complete logging includes
470
+ full accumulated text, not just token count."""
471
+ mock_logger = MagicMock ()
472
+
473
+ with patch ("vllm.entrypoints.logger.logger" , mock_logger ):
474
+ request_logger = RequestLogger (max_log_len = None )
475
+
476
+ # Test with actual content instead of token count format
477
+ full_response = "This is a complete response from streaming"
478
+ request_logger .log_outputs (
479
+ request_id = "test-streaming-full-text" ,
480
+ outputs = full_response ,
481
+ output_token_ids = None ,
482
+ finish_reason = "streaming_complete" ,
483
+ is_streaming = True ,
484
+ delta = False ,
485
+ )
486
+
487
+ mock_logger .info .assert_called_once ()
488
+ call_args = mock_logger .info .call_args .args
489
+
490
+ # Verify the logged output is the full text, not a token count format
491
+ logged_output = call_args [3 ]
492
+ assert logged_output == full_response
493
+ assert "tokens>" not in logged_output
494
+ assert "streaming_complete" not in logged_output
495
+
496
+ # Verify other parameters
497
+ assert call_args [1 ] == "test-streaming-full-text"
498
+ assert call_args [2 ] == " (streaming complete)"
499
+ assert call_args [5 ] == "streaming_complete"
0 commit comments