File tree Expand file tree Collapse file tree 1 file changed +13
-1
lines changed Expand file tree Collapse file tree 1 file changed +13
-1
lines changed Original file line number Diff line number Diff line change @@ -470,7 +470,19 @@ def _validate_model_input(
470470 else :
471471 tokenizer = self .tokenizer .get_lora_tokenizer (lora_request )
472472 max_input_id = max (prompt_ids , default = 0 )
473- if max_input_id > tokenizer .max_token_id :
473+
474+ # NOTE: tokenizer.max_token_id is the tokenizer’s vocab size while
475+ # self.model_config.get_vocab_size() is the model’s vocab size.
476+ # For Qwen3 models, the language model has extra tokens that do
477+ # not exist in the tokenizer, and vice versa for multimodal
478+ # placeholder tokens in some multimodal models.
479+ # See https://github.com/QwenLM/Qwen3/issues/29#issuecomment-1933720399 # noqa: E501
480+ # and https://github.com/vllm-project/vllm/pull/22471#discussion_r2312251421 # noqa: E501
481+
482+ # Here we take the max of the two to determine if a token id is
483+ # truly out-of-vocabulary.
484+ if max_input_id > max (tokenizer .max_token_id ,
485+ self .model_config .get_vocab_size () - 1 ):
474486 raise ValueError (
475487 f"Token id { max_input_id } is out of vocabulary" )
476488
You can’t perform that action at this time.
0 commit comments