Skip to content

Commit c2f33d0

Browse files
committed
Merge remote-tracking branch 'upstream/main' into rhoai-2.24
2 parents feb49f0 + da25d68 commit c2f33d0

File tree

5 files changed

+45
-9
lines changed

5 files changed

+45
-9
lines changed

vllm/entrypoints/constants.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
# SPDX-License-Identifier: Apache-2.0
2+
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3+
"""
4+
Shared constants for vLLM entrypoints.
5+
"""
6+
7+
# HTTP header limits for h11 parser
8+
# These constants help mitigate header abuse attacks
9+
H11_MAX_INCOMPLETE_EVENT_SIZE_DEFAULT = 4194304 # 4 MB
10+
H11_MAX_HEADER_COUNT_DEFAULT = 256

vllm/entrypoints/launcher.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414
from vllm.engine.async_llm_engine import AsyncEngineDeadError
1515
from vllm.engine.multiprocessing import MQEngineDeadError
1616
from vllm.engine.protocol import EngineClient
17+
from vllm.entrypoints.constants import (H11_MAX_HEADER_COUNT_DEFAULT,
18+
H11_MAX_INCOMPLETE_EVENT_SIZE_DEFAULT)
1719
from vllm.entrypoints.ssl import SSLCertRefresher
1820
from vllm.logger import init_logger
1921
from vllm.utils import find_process_using_port
@@ -26,6 +28,11 @@ async def serve_http(app: FastAPI,
2628
sock: Optional[socket.socket],
2729
enable_ssl_refresh: bool = False,
2830
**uvicorn_kwargs: Any):
31+
"""
32+
Start a FastAPI app using Uvicorn, with support for custom Uvicorn config
33+
options. Supports http header limits via h11_max_incomplete_event_size and
34+
h11_max_header_count.
35+
"""
2936
logger.info("Available routes are:")
3037
for route in app.routes:
3138
methods = getattr(route, "methods", None)
@@ -36,7 +43,21 @@ async def serve_http(app: FastAPI,
3643

3744
logger.info("Route: %s, Methods: %s", path, ', '.join(methods))
3845

46+
# Extract header limit options if present
47+
h11_max_incomplete_event_size = uvicorn_kwargs.pop(
48+
"h11_max_incomplete_event_size", None)
49+
h11_max_header_count = uvicorn_kwargs.pop("h11_max_header_count", None)
50+
51+
# Set safe defaults if not provided
52+
if h11_max_incomplete_event_size is None:
53+
h11_max_incomplete_event_size = H11_MAX_INCOMPLETE_EVENT_SIZE_DEFAULT
54+
if h11_max_header_count is None:
55+
h11_max_header_count = H11_MAX_HEADER_COUNT_DEFAULT
56+
3957
config = uvicorn.Config(app, **uvicorn_kwargs)
58+
# Set header limits
59+
config.h11_max_incomplete_event_size = h11_max_incomplete_event_size
60+
config.h11_max_header_count = h11_max_header_count
4061
config.load()
4162
server = uvicorn.Server(config)
4263
_add_shutdown_handlers(app, server)

vllm/entrypoints/openai/api_server.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1832,6 +1832,8 @@ async def run_server_worker(listen_address,
18321832
ssl_certfile=args.ssl_certfile,
18331833
ssl_ca_certs=args.ssl_ca_certs,
18341834
ssl_cert_reqs=args.ssl_cert_reqs,
1835+
h11_max_incomplete_event_size=args.h11_max_incomplete_event_size,
1836+
h11_max_header_count=args.h11_max_header_count,
18351837
**uvicorn_kwargs,
18361838
)
18371839

vllm/entrypoints/openai/cli_args.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@
2020
from vllm.engine.arg_utils import AsyncEngineArgs, optional_type
2121
from vllm.entrypoints.chat_utils import (ChatTemplateContentFormatOption,
2222
validate_chat_template)
23+
from vllm.entrypoints.constants import (H11_MAX_HEADER_COUNT_DEFAULT,
24+
H11_MAX_INCOMPLETE_EVENT_SIZE_DEFAULT)
2325
from vllm.entrypoints.openai.serving_models import LoRAModulePath
2426
from vllm.entrypoints.openai.tool_parsers import ToolParserManager
2527
from vllm.logger import init_logger
@@ -160,6 +162,12 @@ class FrontendArgs:
160162
enable_tokenizer_info_endpoint: bool = False
161163
"""Enable the /get_tokenizer_info endpoint. May expose chat
162164
templates and other tokenizer configuration."""
165+
h11_max_incomplete_event_size: int = H11_MAX_INCOMPLETE_EVENT_SIZE_DEFAULT
166+
"""Maximum size (bytes) of an incomplete HTTP event (header or body) for
167+
h11 parser. Helps mitigate header abuse. Default: 4194304 (4 MB)."""
168+
h11_max_header_count: int = H11_MAX_HEADER_COUNT_DEFAULT
169+
"""Maximum number of HTTP headers allowed in a request for h11 parser.
170+
Helps mitigate header abuse. Default: 256."""
163171

164172
@staticmethod
165173
def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:

vllm/entrypoints/openai/tool_parsers/qwen3coder_tool_parser.py

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -208,15 +208,10 @@ def convert_param_value(param_value: str, param_name: str,
208208
"valid JSON object in tool '%s', will try other "
209209
"methods to parse it.", param_value, param_name,
210210
func_name)
211-
try:
212-
converted_value = eval(param_value)
213-
return converted_value
214-
except Exception:
215-
logger.warning(
216-
"Parsed value '%s' of parameter '%s' cannot be "
217-
"converted via Python `eval()` in tool '%s', "
218-
"degenerating to string.", param_value, param_name,
219-
func_name)
211+
logger.warning(
212+
"Parameter '%s' has unknown type '%s'. "
213+
"The value will be treated as a string.", param_name,
214+
param_type)
220215
return param_value
221216

222217
# Extract function name

0 commit comments

Comments
 (0)