3434
3535class TritonMetrics :
3636 def __init__ (self , labels ):
37- # System stats
38- # Scheduler State
39- self .gauge_scheduler_running_family = pb_utils .MetricFamily (
40- name = "vllm:num_requests_running" ,
41- description = "Number of requests currently running on GPU." ,
42- kind = pb_utils .MetricFamily .GAUGE ,
43- )
44- self .gauge_scheduler_waiting_family = pb_utils .MetricFamily (
45- name = "vllm:num_requests_waiting" ,
46- description = "Number of requests waiting to be processed." ,
47- kind = pb_utils .MetricFamily .GAUGE ,
48- )
49- self .gauge_scheduler_swapped_family = pb_utils .MetricFamily (
50- name = "vllm:num_requests_swapped" ,
51- description = "Number of requests swapped to CPU." ,
52- kind = pb_utils .MetricFamily .GAUGE ,
53- )
54- # KV Cache Usage in %
55- self .gauge_gpu_cache_usage_family = pb_utils .MetricFamily (
56- name = "vllm:gpu_cache_usage_perc" ,
57- description = "GPU KV-cache usage. 1 means 100 percent usage." ,
58- kind = pb_utils .MetricFamily .GAUGE ,
59- )
60- self .gauge_cpu_cache_usage_family = pb_utils .MetricFamily (
61- name = "vllm:cpu_cache_usage_perc" ,
62- description = "CPU KV-cache usage. 1 means 100 percent usage." ,
63- kind = pb_utils .MetricFamily .GAUGE ,
64- )
65-
37+ # Initialize metric families
6638 # Iteration stats
67- self .counter_num_preemption_family = pb_utils .MetricFamily (
68- name = "vllm:num_preemptions_total" ,
69- description = "Cumulative number of preemption from the engine." ,
70- kind = pb_utils .MetricFamily .COUNTER ,
71- )
7239 self .counter_prompt_tokens_family = pb_utils .MetricFamily (
7340 name = "vllm:prompt_tokens_total" ,
7441 description = "Number of prefill tokens processed." ,
@@ -80,29 +47,8 @@ def __init__(self, labels):
8047 kind = pb_utils .MetricFamily .COUNTER ,
8148 )
8249
83- # System stats
84- # Scheduler State
85- self .gauge_scheduler_running = self .gauge_scheduler_running_family .Metric (
86- labels = labels
87- )
88- self .gauge_scheduler_waiting = self .gauge_scheduler_waiting_family .Metric (
89- labels = labels
90- )
91- self .gauge_scheduler_swapped = self .gauge_scheduler_swapped_family .Metric (
92- labels = labels
93- )
94- # KV Cache Usage in %
95- self .gauge_gpu_cache_usage = self .gauge_gpu_cache_usage_family .Metric (
96- labels = labels
97- )
98- self .gauge_cpu_cache_usage = self .gauge_cpu_cache_usage_family .Metric (
99- labels = labels
100- )
101-
50+ # Initialize metrics
10251 # Iteration stats
103- self .counter_num_preemption = self .counter_num_preemption_family .Metric (
104- labels = labels
105- )
10652 self .counter_prompt_tokens = self .counter_prompt_tokens_family .Metric (
10753 labels = labels
10854 )
@@ -124,30 +70,38 @@ def info(self, type: str, obj: SupportsMetricsInfo) -> None:
12470 raise NotImplementedError
12571
12672 def _log_gauge (self , gauge , data : Union [int , float ]) -> None :
127- # Convenience function for logging to gauge.
73+ """Convenience function for logging to gauge.
74+
75+ Args:
76+ gauge: A gauge metric instance.
77+ data: An int or float to set the gauge metric.
78+
79+ Returns:
80+ None
81+ """
12882 gauge .set (data )
12983
13084 def _log_counter (self , counter , data : Union [int , float ]) -> None :
131- # Convenience function for logging to counter.
132- counter .increment (data )
85+ """Convenience function for logging to counter.
86+
87+ Args:
88+ counter: A counter metric instance.
89+ data: An int or float to increment the count metric.
13390
134- def _log_histogram ( self , histogram , data : Union [ List [ int ], List [ float ]]) -> None :
135- # Convenience function for logging list to histogram.
136- for datum in data :
137- histogram . observe ( datum )
91+ Returns :
92+ None
93+ """
94+ counter . increment ( data )
13895
13996 def log (self , stats : VllmStats ) -> None :
140- # System state data
141- self ._log_gauge (self .metrics .gauge_scheduler_running , stats .num_running_sys )
142- self ._log_gauge (self .metrics .gauge_scheduler_waiting , stats .num_waiting_sys )
143- self ._log_gauge (self .metrics .gauge_scheduler_swapped , stats .num_swapped_sys )
144- self ._log_gauge (self .metrics .gauge_gpu_cache_usage , stats .gpu_cache_usage_sys )
145- self ._log_gauge (self .metrics .gauge_cpu_cache_usage , stats .cpu_cache_usage_sys )
146-
147- # Iteration level data
148- self ._log_counter (
149- self .metrics .counter_num_preemption , stats .num_preemption_iter
150- )
97+ """Logs tracked stats to triton metrics server every iteration.
98+
99+ Args:
100+ stats: Created by LLMEngine for use by VllmStatLogger.
101+
102+ Returns:
103+ None
104+ """
151105 self ._log_counter (
152106 self .metrics .counter_prompt_tokens , stats .num_prompt_tokens_iter
153107 )
0 commit comments