This repository was archived by the owner on Sep 4, 2025. It is now read-only.
File tree Expand file tree Collapse file tree 1 file changed +8
-10
lines changed Expand file tree Collapse file tree 1 file changed +8
-10
lines changed Original file line number Diff line number Diff line change @@ -116,21 +116,19 @@ def log(self, stats: Stats) -> None:
116
116
self ._vllm_stat_logger .log (stats )
117
117
118
118
# Then log TGIS specific ones
119
- self .tgi_queue_size .set (stats .num_waiting + stats .num_swapped )
120
- self .tgi_batch_current_size .set (stats .num_running )
119
+ self .tgi_queue_size .set (stats .num_waiting_sys + stats .num_swapped_sys )
120
+ self .tgi_batch_current_size .set (stats .num_running_sys )
121
121
122
- for ttft in stats .time_to_first_tokens :
122
+ for ttft in stats .time_to_first_tokens_iter :
123
123
self .tgi_batch_inference_duration .labels ({
124
124
"method" : "prefill"
125
125
}).observe (ttft )
126
- for tpot in stats .time_per_output_tokens :
126
+ for tpot in stats .time_per_output_tokens_iter :
127
127
self .tgi_batch_inference_duration .labels ({
128
128
"method" : "next_token"
129
129
}).observe (tpot )
130
130
131
- # These metrics depend on open PR: https://github.com/vllm-project/vllm/pull/2764
132
- if hasattr (stats , "num_prompt_tokens_lst" ):
133
- for input_len in stats .num_prompt_tokens_lst :
134
- self .tgi_request_input_length .observe (input_len )
135
- for output_len in stats .num_generation_tokens_lst :
136
- self .tgi_request_generated_tokens .observe (output_len )
131
+ for input_len in stats .num_prompt_tokens_requests :
132
+ self .tgi_request_input_length .observe (input_len )
133
+ for output_len in stats .num_generation_tokens_requests :
134
+ self .tgi_request_generated_tokens .observe (output_len )
You can’t perform that action at this time.
0 commit comments