File tree Expand file tree Collapse file tree 3 files changed +24
-11
lines changed Expand file tree Collapse file tree 3 files changed +24
-11
lines changed Original file line number Diff line number Diff line change @@ -86,3 +86,16 @@ def test_max_model_len():
86
86
# It can be less if generation finishes due to other reasons (e.g., EOS)
87
87
# before reaching the absolute model length limit.
88
88
assert num_total_tokens <= max_model_len
89
+
90
+
91
+ def test_log_stats ():
92
+ llm = LLM (
93
+ model = MODEL_NAME ,
94
+ disable_log_stats = False ,
95
+ gpu_memory_utilization = 0.10 ,
96
+ enforce_eager = True , # reduce test time
97
+ )
98
+ outputs = llm .generate (PROMPTS , sampling_params = None )
99
+
100
+ # disable_log_stats is False, every output should have metrics
101
+ assert all (output .metrics is not None for output in outputs )
Original file line number Diff line number Diff line change 14
14
from vllm .lora .request import LoRARequest
15
15
from vllm .multimodal .inputs import MultiModalPlaceholderDict
16
16
from vllm .sequence import RequestMetrics
17
+ from vllm .v1 .metrics .stats import RequestStateStats
17
18
18
19
logger = init_logger (__name__ )
19
20
@@ -108,7 +109,7 @@ def __init__(
108
109
prompt_logprobs : Optional [PromptLogprobs ],
109
110
outputs : list [CompletionOutput ],
110
111
finished : bool ,
111
- metrics : Optional [RequestMetrics ] = None ,
112
+ metrics : Optional [Union [ RequestMetrics , RequestStateStats ] ] = None ,
112
113
lora_request : Optional [LoRARequest ] = None ,
113
114
encoder_prompt : Optional [str ] = None ,
114
115
encoder_prompt_token_ids : Optional [list [int ]] = None ,
Original file line number Diff line number Diff line change @@ -248,16 +248,15 @@ def _new_request_output(
248
248
if prompt_token_ids is None and self .prompt_embeds is not None :
249
249
prompt_token_ids = [0 ] * len (self .prompt_embeds )
250
250
251
- return RequestOutput (
252
- request_id = request_id ,
253
- prompt = self .prompt ,
254
- prompt_token_ids = prompt_token_ids ,
255
- prompt_logprobs = prompt_logprobs ,
256
- outputs = cast (list [CompletionOutput ], outputs ),
257
- finished = finished ,
258
- kv_transfer_params = kv_transfer_params ,
259
- num_cached_tokens = self .num_cached_tokens ,
260
- )
251
+ return RequestOutput (request_id = request_id ,
252
+ prompt = self .prompt ,
253
+ prompt_token_ids = prompt_token_ids ,
254
+ prompt_logprobs = prompt_logprobs ,
255
+ outputs = cast (list [CompletionOutput ], outputs ),
256
+ finished = finished ,
257
+ kv_transfer_params = kv_transfer_params ,
258
+ num_cached_tokens = self .num_cached_tokens ,
259
+ metrics = self .stats )
261
260
262
261
def _new_completion_output (
263
262
self ,
You can’t perform that action at this time.
0 commit comments