Skip to content

Commit bbbf865

Browse files
authored
Align max_tokens behavior with openai (#852)
1 parent 9f6be86 commit bbbf865

File tree

2 files changed

+3
-1
lines changed

2 files changed

+3
-1
lines changed

vllm/entrypoints/openai/api_server.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,8 @@ async def check_length(
130130
input_ids = tokenizer(prompt).input_ids
131131
token_num = len(input_ids)
132132

133+
if request.max_tokens is None:
134+
request.max_tokens = max_model_len - token_num
133135
if token_num + request.max_tokens > max_model_len:
134136
return input_ids, create_error_response(
135137
HTTPStatus.BAD_REQUEST,

vllm/entrypoints/openai/protocol.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ class ChatCompletionRequest(BaseModel):
5858
temperature: Optional[float] = 0.7
5959
top_p: Optional[float] = 1.0
6060
n: Optional[int] = 1
61-
max_tokens: Optional[int] = 16
61+
max_tokens: Optional[int] = None
6262
stop: Optional[Union[str, List[str]]] = Field(default_factory=list)
6363
stream: Optional[bool] = False
6464
presence_penalty: Optional[float] = 0.0

0 commit comments

Comments
 (0)