@@ -116,21 +116,19 @@ def log(self, stats: Stats) -> None:
116
116
self ._vllm_stat_logger .log (stats )
117
117
118
118
# Then log TGIS specific ones
119
- self .tgi_queue_size .set (stats .num_waiting + stats .num_swapped )
120
- self .tgi_batch_current_size .set (stats .num_running )
121
-
122
- for ttft in stats .time_to_first_tokens :
123
- self .tgi_batch_inference_duration .labels ({
124
- "method" : "prefill"
125
- }).observe (ttft )
126
- for tpot in stats .time_per_output_tokens :
127
- self .tgi_batch_inference_duration .labels ({
128
- "method" : "next_token"
129
- }).observe (tpot )
130
-
131
- # These metrics depend on open PR: https://github.com/vllm-project/vllm/pull/2764
132
- if hasattr (stats , "num_prompt_tokens_lst" ):
133
- for input_len in stats .num_prompt_tokens_lst :
134
- self .tgi_request_input_length .observe (input_len )
135
- for output_len in stats .num_generation_tokens_lst :
136
- self .tgi_request_generated_tokens .observe (output_len )
119
+ self .tgi_queue_size .set (stats .num_waiting_sys + stats .num_swapped_sys )
120
+ self .tgi_batch_current_size .set (stats .num_running_sys )
121
+
122
+ for ttft in stats .time_to_first_tokens_iter :
123
+ self .tgi_batch_inference_duration .labels (
124
+ {"method" : "prefill" }
125
+ ).observe (ttft )
126
+ for tpot in stats .time_per_output_tokens_iter :
127
+ self .tgi_batch_inference_duration .labels (
128
+ {"method" : "next_token" }
129
+ ).observe (tpot )
130
+
131
+ for input_len in stats .num_prompt_tokens_requests :
132
+ self .tgi_request_input_length .observe (input_len )
133
+ for output_len in stats .num_generation_tokens_requests :
134
+ self .tgi_request_generated_tokens .observe (output_len )
0 commit comments