File tree Expand file tree Collapse file tree 2 files changed +6
-10
lines changed
ci/L0_backend_vllm/metrics_test Expand file tree Collapse file tree 2 files changed +6
-10
lines changed Original file line number Diff line number Diff line change @@ -70,12 +70,10 @@ def get_metrics(self):
7070
7171 return vllm_dict
7272
73- def vllm_async_stream_infer (
73+ def vllm_infer (
7474 self ,
7575 prompts ,
7676 sampling_parameters ,
77- stream ,
78- send_parameters_as_tensor ,
7977 model_name ,
8078 ):
8179 """
@@ -89,15 +87,15 @@ def vllm_async_stream_infer(
8987 request_data = create_vllm_request (
9088 prompts [i ],
9189 i ,
92- stream ,
90+ False ,
9391 sampling_parameters ,
9492 model_name ,
95- send_parameters_as_tensor ,
93+ True ,
9694 )
9795 self .triton_client .async_stream_infer (
9896 model_name = model_name ,
99- request_id = request_data ["request_id" ],
10097 inputs = request_data ["inputs" ],
98+ request_id = request_data ["request_id" ],
10199 outputs = request_data ["outputs" ],
102100 parameters = sampling_parameters ,
103101 )
@@ -121,11 +119,9 @@ def test_vllm_metrics(self):
121119 }
122120
123121 # Test vLLM metrics
124- self .vllm_async_stream_infer (
122+ self .vllm_infer (
125123 prompts = self .prompts ,
126124 sampling_parameters = self .sampling_parameters ,
127- stream = False ,
128- send_parameters_as_tensor = True ,
129125 model_name = self .vllm_model_name ,
130126 )
131127 expected_metrics_dict ["vllm:prompt_tokens_total" ] = 18
Original file line number Diff line number Diff line change @@ -168,7 +168,7 @@ def init_engine(self):
168168 )
169169
170170 # Add vLLM custom metrics
171- if not self .metrics :
171+ if self .metrics :
172172 self .llm_engine .add_logger ("triton" , self .metrics )
173173
174174 def setup_lora (self ):
You can’t perform that action at this time.
0 commit comments