Skip to content

Commit bf668b5

Browse files
authored
[Feature] Support multiple api keys in server (#18548)
Signed-off-by: Yan Pashkovsky <[email protected]>
1 parent da3e0bd commit bf668b5

File tree

3 files changed

+30
-29
lines changed

3 files changed

+30
-29
lines changed

docs/getting_started/quickstart.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,7 @@ curl http://localhost:8000/v1/models
126126
```
127127

128128
You can pass in the argument `--api-key` or environment variable `VLLM_API_KEY` to enable the server to check for API key in the header.
129+
You can pass multiple keys after `--api-key`, and the server will accept any of the keys passed, this can be useful for key rotation.
129130

130131
### OpenAI Completions API with vLLM
131132

vllm/entrypoints/openai/api_server.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1239,9 +1239,9 @@ class AuthenticationMiddleware:
12391239
2. The request path doesn't start with /v1 (e.g. /health).
12401240
"""
12411241

1242-
def __init__(self, app: ASGIApp, api_token: str) -> None:
1242+
def __init__(self, app: ASGIApp, tokens: list[str]) -> None:
12431243
self.app = app
1244-
self.api_token = api_token
1244+
self.api_tokens = {f"Bearer {token}" for token in tokens}
12451245

12461246
def __call__(self, scope: Scope, receive: Receive,
12471247
send: Send) -> Awaitable[None]:
@@ -1255,7 +1255,7 @@ def __call__(self, scope: Scope, receive: Receive,
12551255
headers = Headers(scope=scope)
12561256
# Type narrow to satisfy mypy.
12571257
if url_path.startswith("/v1") and headers.get(
1258-
"Authorization") != f"Bearer {self.api_token}":
1258+
"Authorization") not in self.api_tokens:
12591259
response = JSONResponse(content={"error": "Unauthorized"},
12601260
status_code=401)
12611261
return response(scope, receive, send)
@@ -1303,7 +1303,7 @@ class ScalingMiddleware:
13031303
"""
13041304
Middleware that checks if the model is currently scaling and
13051305
returns a 503 Service Unavailable response if it is.
1306-
1306+
13071307
This middleware applies to all HTTP requests and prevents
13081308
processing when the model is in a scaling state.
13091309
"""
@@ -1512,8 +1512,8 @@ async def validation_exception_handler(_: Request,
15121512
status_code=HTTPStatus.BAD_REQUEST)
15131513

15141514
# Ensure --api-key option from CLI takes precedence over VLLM_API_KEY
1515-
if token := args.api_key or envs.VLLM_API_KEY:
1516-
app.add_middleware(AuthenticationMiddleware, api_token=token)
1515+
if tokens := [key for key in (args.api_key or [envs.VLLM_API_KEY]) if key]:
1516+
app.add_middleware(AuthenticationMiddleware, tokens=tokens)
15171517

15181518
if args.enable_request_id_headers:
15191519
app.add_middleware(XRequestIdMiddleware)

vllm/entrypoints/openai/cli_args.py

Lines changed: 23 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -85,22 +85,22 @@ class FrontendArgs:
8585
"""Allowed methods."""
8686
allowed_headers: list[str] = field(default_factory=lambda: ["*"])
8787
"""Allowed headers."""
88-
api_key: Optional[str] = None
89-
"""If provided, the server will require this key to be presented in the
90-
header."""
88+
api_key: Optional[list[str]] = None
89+
"""If provided, the server will require one of these keys to be presented in
90+
the header."""
9191
lora_modules: Optional[list[LoRAModulePath]] = None
9292
"""LoRA modules configurations in either 'name=path' format or JSON format
93-
or JSON list format. Example (old format): `'name=path'` Example (new
94-
format): `{\"name\": \"name\", \"path\": \"lora_path\",
93+
or JSON list format. Example (old format): `'name=path'` Example (new
94+
format): `{\"name\": \"name\", \"path\": \"lora_path\",
9595
\"base_model_name\": \"id\"}`"""
9696
chat_template: Optional[str] = None
97-
"""The file path to the chat template, or the template in single-line form
97+
"""The file path to the chat template, or the template in single-line form
9898
for the specified model."""
9999
chat_template_content_format: ChatTemplateContentFormatOption = "auto"
100100
"""The format to render message content within a chat template.
101101
102102
* "string" will render the content as a string. Example: `"Hello World"`
103-
* "openai" will render the content as a list of dictionaries, similar to OpenAI
103+
* "openai" will render the content as a list of dictionaries, similar to OpenAI
104104
schema. Example: `[{"type": "text", "text": "Hello world!"}]`"""
105105
response_role: str = "assistant"
106106
"""The role name to return if `request.add_generation_prompt=true`."""
@@ -117,40 +117,40 @@ class FrontendArgs:
117117
root_path: Optional[str] = None
118118
"""FastAPI root_path when app is behind a path based routing proxy."""
119119
middleware: list[str] = field(default_factory=lambda: [])
120-
"""Additional ASGI middleware to apply to the app. We accept multiple
121-
--middleware arguments. The value should be an import path. If a function
122-
is provided, vLLM will add it to the server using
123-
`@app.middleware('http')`. If a class is provided, vLLM will
120+
"""Additional ASGI middleware to apply to the app. We accept multiple
121+
--middleware arguments. The value should be an import path. If a function
122+
is provided, vLLM will add it to the server using
123+
`@app.middleware('http')`. If a class is provided, vLLM will
124124
add it to the server using `app.add_middleware()`."""
125125
return_tokens_as_token_ids: bool = False
126-
"""When `--max-logprobs` is specified, represents single tokens as
127-
strings of the form 'token_id:{token_id}' so that tokens that are not
126+
"""When `--max-logprobs` is specified, represents single tokens as
127+
strings of the form 'token_id:{token_id}' so that tokens that are not
128128
JSON-encodable can be identified."""
129129
disable_frontend_multiprocessing: bool = False
130-
"""If specified, will run the OpenAI frontend server in the same process as
130+
"""If specified, will run the OpenAI frontend server in the same process as
131131
the model serving engine."""
132132
enable_request_id_headers: bool = False
133-
"""If specified, API server will add X-Request-Id header to responses.
133+
"""If specified, API server will add X-Request-Id header to responses.
134134
Caution: this hurts performance at high QPS."""
135135
enable_auto_tool_choice: bool = False
136-
"""If specified, exclude tool definitions in prompts when
136+
"""If specified, exclude tool definitions in prompts when
137137
tool_choice='none'."""
138138
exclude_tools_when_tool_choice_none: bool = False
139-
"""Enable auto tool choice for supported models. Use `--tool-call-parser`
139+
"""Enable auto tool choice for supported models. Use `--tool-call-parser`
140140
to specify which parser to use."""
141141
tool_call_parser: Optional[str] = None
142-
"""Select the tool call parser depending on the model that you're using.
143-
This is used to parse the model-generated tool call into OpenAI API format.
144-
Required for `--enable-auto-tool-choice`. You can choose any option from
142+
"""Select the tool call parser depending on the model that you're using.
143+
This is used to parse the model-generated tool call into OpenAI API format.
144+
Required for `--enable-auto-tool-choice`. You can choose any option from
145145
the built-in parsers or register a plugin via `--tool-parser-plugin`."""
146146
tool_parser_plugin: str = ""
147-
"""Special the tool parser plugin write to parse the model-generated tool
148-
into OpenAI API format, the name register in this plugin can be used in
147+
"""Special the tool parser plugin write to parse the model-generated tool
148+
into OpenAI API format, the name register in this plugin can be used in
149149
`--tool-call-parser`."""
150150
log_config_file: Optional[str] = envs.VLLM_LOGGING_CONFIG_PATH
151151
"""Path to logging config JSON file for both vllm and uvicorn"""
152152
max_log_len: Optional[int] = None
153-
"""Max number of prompt characters or prompt ID numbers being printed in
153+
"""Max number of prompt characters or prompt ID numbers being printed in
154154
log. The default of None means unlimited."""
155155
disable_fastapi_docs: bool = False
156156
"""Disable FastAPI's OpenAPI schema, Swagger UI, and ReDoc endpoint."""

0 commit comments

Comments
 (0)