[Feature] Support multiple api keys in server (#18548)

Yanpas · web-flow · commit bf668b5bf566 · 2025-07-30T07:03:23.000-07:00
Signed-off-by: Yan Pashkovsky &lt;yanp.bugz@gmail.com&gt;
diff --git a/docs/getting_started/quickstart.md b/docs/getting_started/quickstart.md
@@ -126,6 +126,7 @@ curl http://localhost:8000/v1/models
 ```
 
 You can pass in the argument `--api-key` or environment variable `VLLM_API_KEY` to enable the server to check for API key in the header.
+You can pass multiple keys after `--api-key`, and the server will accept any of the keys passed, this can be useful for key rotation.
 
 ### OpenAI Completions API with vLLM
 
diff --git a/vllm/entrypoints/openai/api_server.py b/vllm/entrypoints/openai/api_server.py
@@ -1239,9 +1239,9 @@ class AuthenticationMiddleware:
         2. The request path doesn't start with /v1 (e.g. /health).
     """
 
-    def __init__(self, app: ASGIApp, api_token: str) -> None:
+    def __init__(self, app: ASGIApp, tokens: list[str]) -> None:
         self.app = app
-        self.api_token = api_token
+        self.api_tokens = {f"Bearer {token}" for token in tokens}
 
     def __call__(self, scope: Scope, receive: Receive,
                  send: Send) -> Awaitable[None]:
@@ -1255,7 +1255,7 @@ def __call__(self, scope: Scope, receive: Receive,
         headers = Headers(scope=scope)
         # Type narrow to satisfy mypy.
         if url_path.startswith("/v1") and headers.get(
-                "Authorization") != f"Bearer {self.api_token}":
+                "Authorization") not in self.api_tokens:
             response = JSONResponse(content={"error": "Unauthorized"},
                                     status_code=401)
             return response(scope, receive, send)
@@ -1303,7 +1303,7 @@ class ScalingMiddleware:
     """
     Middleware that checks if the model is currently scaling and
     returns a 503 Service Unavailable response if it is.
-    
+
     This middleware applies to all HTTP requests and prevents
     processing when the model is in a scaling state.
     """
@@ -1512,8 +1512,8 @@ async def validation_exception_handler(_: Request,
                             status_code=HTTPStatus.BAD_REQUEST)
 
     # Ensure --api-key option from CLI takes precedence over VLLM_API_KEY
-    if token := args.api_key or envs.VLLM_API_KEY:
-        app.add_middleware(AuthenticationMiddleware, api_token=token)
+    if tokens := [key for key in (args.api_key or [envs.VLLM_API_KEY]) if key]:
+        app.add_middleware(AuthenticationMiddleware, tokens=tokens)
 
     if args.enable_request_id_headers:
         app.add_middleware(XRequestIdMiddleware)
diff --git a/vllm/entrypoints/openai/cli_args.py b/vllm/entrypoints/openai/cli_args.py
@@ -85,22 +85,22 @@ class FrontendArgs:
     """Allowed methods."""
     allowed_headers: list[str] = field(default_factory=lambda: ["*"])
     """Allowed headers."""
-    api_key: Optional[str] = None
-    """If provided, the server will require this key to be presented in the
-    header."""
+    api_key: Optional[list[str]] = None
+    """If provided, the server will require one of these keys to be presented in
+    the header."""
     lora_modules: Optional[list[LoRAModulePath]] = None
     """LoRA modules configurations in either 'name=path' format or JSON format
-    or JSON list format. Example (old format): `'name=path'` Example (new 
-    format): `{\"name\": \"name\", \"path\": \"lora_path\", 
+    or JSON list format. Example (old format): `'name=path'` Example (new
+    format): `{\"name\": \"name\", \"path\": \"lora_path\",
     \"base_model_name\": \"id\"}`"""
     chat_template: Optional[str] = None
-    """The file path to the chat template, or the template in single-line form 
+    """The file path to the chat template, or the template in single-line form
     for the specified model."""
     chat_template_content_format: ChatTemplateContentFormatOption = "auto"
     """The format to render message content within a chat template.
 
 * "string" will render the content as a string. Example: `"Hello World"`
-* "openai" will render the content as a list of dictionaries, similar to OpenAI 
+* "openai" will render the content as a list of dictionaries, similar to OpenAI
 schema. Example: `[{"type": "text", "text": "Hello world!"}]`"""
     response_role: str = "assistant"
     """The role name to return if `request.add_generation_prompt=true`."""
@@ -117,40 +117,40 @@ class FrontendArgs:
     root_path: Optional[str] = None
     """FastAPI root_path when app is behind a path based routing proxy."""
     middleware: list[str] = field(default_factory=lambda: [])
-    """Additional ASGI middleware to apply to the app. We accept multiple 
-    --middleware arguments. The value should be an import path. If a function 
-    is provided, vLLM will add it to the server using 
-    `@app.middleware('http')`. If a class is provided, vLLM will 
+    """Additional ASGI middleware to apply to the app. We accept multiple
+    --middleware arguments. The value should be an import path. If a function
+    is provided, vLLM will add it to the server using
+    `@app.middleware('http')`. If a class is provided, vLLM will
     add it to the server using `app.add_middleware()`."""
     return_tokens_as_token_ids: bool = False
-    """When `--max-logprobs` is specified, represents single tokens as 
-    strings of the form 'token_id:{token_id}' so that tokens that are not 
+    """When `--max-logprobs` is specified, represents single tokens as
+    strings of the form 'token_id:{token_id}' so that tokens that are not
     JSON-encodable can be identified."""
     disable_frontend_multiprocessing: bool = False
-    """If specified, will run the OpenAI frontend server in the same process as 
+    """If specified, will run the OpenAI frontend server in the same process as
     the model serving engine."""
     enable_request_id_headers: bool = False
-    """If specified, API server will add X-Request-Id header to responses. 
+    """If specified, API server will add X-Request-Id header to responses.
     Caution: this hurts performance at high QPS."""
     enable_auto_tool_choice: bool = False
-    """If specified, exclude tool definitions in prompts when 
+    """If specified, exclude tool definitions in prompts when
     tool_choice='none'."""
     exclude_tools_when_tool_choice_none: bool = False
-    """Enable auto tool choice for supported models. Use `--tool-call-parser` 
+    """Enable auto tool choice for supported models. Use `--tool-call-parser`
     to specify which parser to use."""
     tool_call_parser: Optional[str] = None
-    """Select the tool call parser depending on the model that you're using. 
-    This is used to parse the model-generated tool call into OpenAI API format. 
-    Required for `--enable-auto-tool-choice`. You can choose any option from 
+    """Select the tool call parser depending on the model that you're using.
+    This is used to parse the model-generated tool call into OpenAI API format.
+    Required for `--enable-auto-tool-choice`. You can choose any option from
     the built-in parsers or register a plugin via `--tool-parser-plugin`."""
     tool_parser_plugin: str = ""
-    """Special the tool parser plugin write to parse the model-generated tool 
-    into OpenAI API format, the name register in this plugin can be used in 
+    """Special the tool parser plugin write to parse the model-generated tool
+    into OpenAI API format, the name register in this plugin can be used in
     `--tool-call-parser`."""
     log_config_file: Optional[str] = envs.VLLM_LOGGING_CONFIG_PATH
     """Path to logging config JSON file for both vllm and uvicorn"""
     max_log_len: Optional[int] = None
-    """Max number of prompt characters or prompt ID numbers being printed in 
+    """Max number of prompt characters or prompt ID numbers being printed in
     log. The default of None means unlimited."""
     disable_fastapi_docs: bool = False
     """Disable FastAPI's OpenAPI schema, Swagger UI, and ReDoc endpoint."""