@@ -44,7 +44,7 @@ def _get_inputs(
4444 sampling_parameters = None ,
4545 return_finish_reason = None ,
4646 return_cumulative_logprob = None ,
47- return_token_ids = None ,
47+ return_num_token_ids = None ,
4848 ):
4949 inputs = []
5050
@@ -76,9 +76,9 @@ def _get_inputs(
7676 np .array ([return_cumulative_logprob ], dtype = bool )
7777 )
7878
79- if return_token_ids is not None :
80- inputs .append (grpcclient .InferInput ("return_token_ids " , [1 ], "BOOL" ))
81- inputs [- 1 ].set_data_from_numpy (np .array ([return_token_ids ], dtype = bool ))
79+ if return_num_token_ids is not None :
80+ inputs .append (grpcclient .InferInput ("return_num_token_ids " , [1 ], "BOOL" ))
81+ inputs [- 1 ].set_data_from_numpy (np .array ([return_num_token_ids ], dtype = bool ))
8282
8383 return inputs
8484
@@ -131,15 +131,15 @@ def _assert_cumulative_logprob(self, return_cumulative_logprob):
131131 assert cumulative_logprob != prev_cumulative_logprob
132132 prev_cumulative_logprob = cumulative_logprob
133133
134- def _assert_token_ids (self , return_token_ids ):
134+ def _assert_num_token_ids (self , return_num_token_ids ):
135135 for response in self ._responses :
136136 result , error = response ["result" ], response ["error" ]
137137 assert error is None
138- token_ids_np = result .as_numpy (name = "token_ids " )
139- if return_token_ids is None or return_token_ids == False :
140- assert token_ids_np is None
138+ num_token_ids_np = result .as_numpy (name = "num_token_ids " )
139+ if return_num_token_ids is None or return_num_token_ids == False :
140+ assert num_token_ids_np is None
141141 continue
142- token_ids = token_ids_np [0 ].astype (int )
142+ num_token_ids = num_token_ids_np [0 ].astype (int )
143143 # TODO: vLLM may return token ids identical to the previous one when
144144 # streaming, for example:
145145 #
@@ -155,31 +155,31 @@ def _assert_token_ids(self, return_token_ids):
155155 # prev: text=' the term', token_ids=array('l', [5, 1385, 44, 48])
156156 # curr: text=' the term “', token_ids=array('l', [5, 1385, 44, 48])
157157 #
158- # If this is no longer the case in a future release, change to
159- # assert len(token_ids) > 0.
160- assert len ( token_ids ) >= 0
158+ # If this is no longer the case in a future release, change the assert
159+ # to assert num_token_ids > 0.
160+ assert num_token_ids >= 0
161161
162162 @pytest .mark .parametrize ("stream" , [True , False ])
163163 @pytest .mark .parametrize ("return_finish_reason" , [None , True , False ])
164164 @pytest .mark .parametrize ("return_cumulative_logprob" , [None , True , False ])
165- @pytest .mark .parametrize ("return_token_ids " , [None , True , False ])
165+ @pytest .mark .parametrize ("return_num_token_ids " , [None , True , False ])
166166 def test_additional_outputs (
167167 self ,
168168 stream ,
169169 return_finish_reason ,
170170 return_cumulative_logprob ,
171- return_token_ids ,
171+ return_num_token_ids ,
172172 ):
173173 inputs = self ._get_inputs (
174174 self ._prompt ,
175175 stream = stream ,
176176 sampling_parameters = self ._sampling_parameters ,
177177 return_finish_reason = return_finish_reason ,
178178 return_cumulative_logprob = return_cumulative_logprob ,
179- return_token_ids = return_token_ids ,
179+ return_num_token_ids = return_num_token_ids ,
180180 )
181181 self ._llm_infer (inputs )
182182 self ._assert_text_output_valid ()
183183 self ._assert_finish_reason (return_finish_reason )
184184 self ._assert_cumulative_logprob (return_cumulative_logprob )
185- self ._assert_token_ids ( return_token_ids )
185+ self ._assert_num_token_ids ( return_num_token_ids )
0 commit comments