36
36
from vllm .v1 .engine .parallel_sampling import ParentRequest
37
37
from vllm .v1 .engine .processor import Processor
38
38
from vllm .v1 .executor .abstract import Executor
39
- from vllm .v1 .metrics .loggers import (StatLoggerBase , StatLoggerFactory ,
40
- setup_default_loggers )
39
+ from vllm .v1 .metrics .loggers import StatLoggerFactory , StatLoggerManager
41
40
from vllm .v1 .metrics .prometheus import shutdown_prometheus
42
- from vllm .v1 .metrics .stats import IterationStats , SchedulerStats
41
+ from vllm .v1 .metrics .stats import IterationStats
43
42
44
43
logger = init_logger (__name__ )
45
44
@@ -95,14 +94,6 @@ def __init__(
95
94
self .log_requests = log_requests
96
95
self .log_stats = log_stats
97
96
98
- # Set up stat loggers; independent set for each DP rank.
99
- self .stat_loggers : list [list [StatLoggerBase ]] = setup_default_loggers (
100
- vllm_config = vllm_config ,
101
- log_stats = self .log_stats ,
102
- engine_num = vllm_config .parallel_config .data_parallel_size ,
103
- custom_stat_loggers = stat_loggers ,
104
- )
105
-
106
97
# Tokenizer (+ ensure liveness if running in another process).
107
98
self .tokenizer = init_tokenizer_from_configs (
108
99
model_config = vllm_config .model_config ,
@@ -121,17 +112,24 @@ def __init__(
121
112
log_stats = self .log_stats )
122
113
123
114
# EngineCore (starts the engine in background process).
124
-
125
115
self .engine_core = EngineCoreClient .make_async_mp_client (
126
116
vllm_config = vllm_config ,
127
117
executor_class = executor_class ,
128
118
log_stats = self .log_stats ,
129
119
client_addresses = client_addresses ,
130
120
client_index = client_index ,
131
121
)
132
- if self .stat_loggers :
133
- for stat_logger in self .stat_loggers [0 ]:
134
- stat_logger .log_engine_initialized ()
122
+
123
+ # Loggers.
124
+ self .logger_manager : Optional [StatLoggerManager ] = None
125
+ if self .log_stats :
126
+ self .logger_manager = StatLoggerManager (
127
+ vllm_config = vllm_config ,
128
+ engine_idxs = self .engine_core .engine_ranks ,
129
+ custom_stat_loggers = stat_loggers ,
130
+ )
131
+ self .logger_manager .log_engine_initialized ()
132
+
135
133
self .output_handler : Optional [asyncio .Task ] = None
136
134
try :
137
135
# Start output handler eagerly if we are in the asyncio eventloop.
@@ -370,7 +368,7 @@ def _run_output_handler(self):
370
368
engine_core = self .engine_core
371
369
output_processor = self .output_processor
372
370
log_stats = self .log_stats
373
- stat_loggers = self .stat_loggers if log_stats else None
371
+ logger_manager = self .logger_manager
374
372
375
373
async def output_handler ():
376
374
try :
@@ -410,9 +408,9 @@ async def output_handler():
410
408
# 4) Logging.
411
409
# TODO(rob): make into a coroutine and launch it in
412
410
# background thread once Prometheus overhead is non-trivial.
413
- if stat_loggers :
414
- AsyncLLM . _record_stats (
415
- stat_loggers [ outputs .engine_index ] ,
411
+ if logger_manager :
412
+ logger_manager . record (
413
+ engine_idx = outputs .engine_index ,
416
414
scheduler_stats = outputs .scheduler_stats ,
417
415
iteration_stats = iteration_stats ,
418
416
)
@@ -431,18 +429,6 @@ async def abort(self, request_id: str) -> None:
431
429
if self .log_requests :
432
430
logger .info ("Aborted request %s." , request_id )
433
431
434
- @staticmethod
435
- def _record_stats (
436
- stat_loggers : list [StatLoggerBase ],
437
- scheduler_stats : Optional [SchedulerStats ],
438
- iteration_stats : Optional [IterationStats ],
439
- ):
440
- """static so that it can be used from the output_handler task
441
- without a circular ref to AsyncLLM."""
442
- for stat_logger in stat_loggers :
443
- stat_logger .record (scheduler_stats = scheduler_stats ,
444
- iteration_stats = iteration_stats )
445
-
446
432
async def encode (
447
433
self ,
448
434
prompt : PromptType ,
@@ -547,9 +533,8 @@ async def do_log_stats(
547
533
scheduler_outputs = None ,
548
534
model_output = None ,
549
535
) -> None :
550
- for loggers in self .stat_loggers :
551
- for stat_logger in loggers :
552
- stat_logger .log ()
536
+ if self .logger_manager :
537
+ self .logger_manager .log ()
553
538
554
539
async def check_health (self ) -> None :
555
540
logger .debug ("Called check_health." )
@@ -653,18 +638,16 @@ async def scale_elastic_ep(self,
653
638
new_data_parallel_size
654
639
655
640
# recreate stat loggers
656
- if new_data_parallel_size > old_data_parallel_size :
657
- stat_loggers : list [list [StatLoggerBase ]] = setup_default_loggers (
641
+ if new_data_parallel_size > old_data_parallel_size and self .log_stats :
642
+ # TODO(rob): fix this after talking with Ray team.
643
+ # This resets all the prometheus metrics since we
644
+ # unregister during initialization. Need to understand
645
+ # the intended behavior here better.
646
+ self .logger_manager = StatLoggerManager (
658
647
vllm_config = self .vllm_config ,
659
- log_stats = self .log_stats ,
660
- engine_num = new_data_parallel_size ,
648
+ engine_idxs = list (range (new_data_parallel_size )),
661
649
custom_stat_loggers = None ,
662
650
)
663
- num_new_engines = len (stat_loggers ) - len (self .stat_loggers )
664
- self .stat_loggers .extend (stat_loggers [- num_new_engines :])
665
- else :
666
- for _ in range (old_data_parallel_size - new_data_parallel_size ):
667
- self .stat_loggers .pop ()
668
651
669
652
@property
670
653
def is_running (self ) -> bool :
0 commit comments