@@ -213,3 +213,29 @@ def find_metric(name) -> list[Metric]:
213
213
assert len (num_accepted_tokens_per_pos ) == 1
214
214
assert isinstance (num_accepted_tokens_per_pos [0 ], Vector )
215
215
assert len (num_accepted_tokens_per_pos [0 ].values ) == 5
216
+
217
+
218
+ @pytest .mark .parametrize ("model" , ["meta-llama/Llama-3.2-1B-Instruct" ])
219
+ def test_skip_tokenizer_initialization (model : str ,
220
+ monkeypatch : pytest .MonkeyPatch ):
221
+ monkeypatch .setenv ("VLLM_USE_V1" , "1" )
222
+ # This test checks if the flag skip_tokenizer_init skips the initialization
223
+ # of tokenizer and detokenizer. The generated output is expected to contain
224
+ # token ids.
225
+ llm = LLM (
226
+ model = model ,
227
+ skip_tokenizer_init = True ,
228
+ enforce_eager = True ,
229
+ )
230
+ sampling_params = SamplingParams (prompt_logprobs = True , detokenize = True )
231
+
232
+ with pytest .raises (ValueError , match = "cannot pass text prompts when" ):
233
+ llm .generate ("abc" , sampling_params )
234
+
235
+ outputs = llm .generate ({"prompt_token_ids" : [1 , 2 , 3 ]},
236
+ sampling_params = sampling_params )
237
+ assert len (outputs ) > 0
238
+ completions = outputs [0 ].outputs
239
+ assert len (completions ) > 0
240
+ assert completions [0 ].text == ""
241
+ assert completions [0 ].token_ids
0 commit comments