We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 62b8aeb commit d3c8180Copy full SHA for d3c8180
vllm/entrypoints/openai/serving_engine.py
@@ -206,6 +206,12 @@ def _validate_prompt_and_tokenize(
206
token_num = len(input_ids)
207
208
if request.max_tokens is None:
209
+ if token_num >= self.max_model_len:
210
+ raise ValueError(
211
+ f"This model's maximum context length is "
212
+ f"{self.max_model_len} tokens. However, you requested "
213
+ f"{token_num} tokens in the messages, "
214
+ f"Please reduce the length of the messages.", )
215
request.max_tokens = self.max_model_len - token_num
216
217
if token_num + request.max_tokens > self.max_model_len:
0 commit comments