@@ -44,7 +44,7 @@ def _get_inputs(
4444 sampling_parameters = None ,
4545 return_finish_reason = None ,
4646 return_cumulative_logprob = None ,
47- return_num_token_ids = None ,
47+ return_num_output_tokens = None ,
4848 ):
4949 inputs = []
5050
@@ -76,9 +76,13 @@ def _get_inputs(
7676 np .array ([return_cumulative_logprob ], dtype = bool )
7777 )
7878
79- if return_num_token_ids is not None :
80- inputs .append (grpcclient .InferInput ("return_num_token_ids" , [1 ], "BOOL" ))
81- inputs [- 1 ].set_data_from_numpy (np .array ([return_num_token_ids ], dtype = bool ))
79+ if return_num_output_tokens is not None :
80+ inputs .append (
81+ grpcclient .InferInput ("return_num_output_tokens" , [1 ], "BOOL" )
82+ )
83+ inputs [- 1 ].set_data_from_numpy (
84+ np .array ([return_num_output_tokens ], dtype = bool )
85+ )
8286
8387 return inputs
8488
@@ -131,15 +135,15 @@ def _assert_cumulative_logprob(self, return_cumulative_logprob):
131135 assert cumulative_logprob != prev_cumulative_logprob
132136 prev_cumulative_logprob = cumulative_logprob
133137
134- def _assert_num_token_ids (self , return_num_token_ids ):
138+ def _assert_num_output_tokens (self , return_num_output_tokens ):
135139 for response in self ._responses :
136140 result , error = response ["result" ], response ["error" ]
137141 assert error is None
138- num_token_ids_np = result .as_numpy (name = "num_token_ids " )
139- if return_num_token_ids is None or return_num_token_ids == False :
140- assert num_token_ids_np is None
142+ num_output_tokens_np = result .as_numpy (name = "num_output_tokens " )
143+ if return_num_output_tokens is None or return_num_output_tokens == False :
144+ assert num_output_tokens_np is None
141145 continue
142- num_token_ids = num_token_ids_np [0 ].astype (int )
146+ num_output_tokens = num_output_tokens_np [0 ].astype (int )
143147 # TODO: vLLM may return token ids identical to the previous one when
144148 # streaming, for example:
145149 #
@@ -156,30 +160,30 @@ def _assert_num_token_ids(self, return_num_token_ids):
156160 # curr: text=' the term “', token_ids=array('l', [5, 1385, 44, 48])
157161 #
158162 # If this is no longer the case in a future release, change the assert
159- # to assert num_token_ids > 0.
160- assert num_token_ids >= 0
163+ # to assert num_output_tokens > 0.
164+ assert num_output_tokens >= 0
161165
162166 @pytest .mark .parametrize ("stream" , [True , False ])
163167 @pytest .mark .parametrize ("return_finish_reason" , [None , True , False ])
164168 @pytest .mark .parametrize ("return_cumulative_logprob" , [None , True , False ])
165- @pytest .mark .parametrize ("return_num_token_ids " , [None , True , False ])
169+ @pytest .mark .parametrize ("return_num_output_tokens " , [None , True , False ])
166170 def test_additional_outputs (
167171 self ,
168172 stream ,
169173 return_finish_reason ,
170174 return_cumulative_logprob ,
171- return_num_token_ids ,
175+ return_num_output_tokens ,
172176 ):
173177 inputs = self ._get_inputs (
174178 self ._prompt ,
175179 stream = stream ,
176180 sampling_parameters = self ._sampling_parameters ,
177181 return_finish_reason = return_finish_reason ,
178182 return_cumulative_logprob = return_cumulative_logprob ,
179- return_num_token_ids = return_num_token_ids ,
183+ return_num_output_tokens = return_num_output_tokens ,
180184 )
181185 self ._llm_infer (inputs )
182186 self ._assert_text_output_valid ()
183187 self ._assert_finish_reason (return_finish_reason )
184188 self ._assert_cumulative_logprob (return_cumulative_logprob )
185- self ._assert_num_token_ids ( return_num_token_ids )
189+ self ._assert_num_output_tokens ( return_num_output_tokens )
0 commit comments