Skip to content

Commit e63e0db

Browse files
committed
💥👽 Breaking changes for vLLM 0.11.1+
Signed-off-by: Evaline Ju <[email protected]>
1 parent 77b0532 commit e63e0db

File tree

4 files changed

+38
-39
lines changed

4 files changed

+38
-39
lines changed

pyproject.toml

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "vllm-detector-adapter"
3-
version = "0.9.0"
3+
version = "0.10.0"
44
authors = [
55
{ name = "Gaurav Kumbhat", email = "[email protected]" },
66
{ name = "Evaline Ju", email = "[email protected]" },
@@ -15,9 +15,8 @@ dependencies = ["orjson>=3.10.16,<3.11"]
1515
[project.optional-dependencies]
1616
vllm-tgis-adapter = ["vllm-tgis-adapter>=0.8.0,<0.9.0"]
1717
vllm = [
18-
# Note: vllm < 0.10.0 has issues with transformers >= 4.54.0
19-
"vllm @ git+https://github.com/vllm-project/[email protected] ; sys_platform == 'darwin'",
20-
"vllm>=0.10.1,<0.11.1 ; sys_platform != 'darwin'",
18+
"vllm @ git+https://github.com/vllm-project/[email protected] ; sys_platform == 'darwin'",
19+
"vllm>=0.11.1,<0.12.1 ; sys_platform != 'darwin'",
2120
]
2221

2322
## Dev Extra Sets ##

vllm_detector_adapter/api_server.py

Lines changed: 19 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99
from fastapi.exceptions import RequestValidationError
1010
from fastapi.responses import JSONResponse
1111
from starlette.datastructures import State
12-
from vllm.config import ModelConfig
1312
from vllm.engine.protocol import EngineClient
1413
from vllm.entrypoints.chat_utils import load_chat_template
1514
from vllm.entrypoints.launcher import serve_http
@@ -19,7 +18,10 @@
1918
from vllm.entrypoints.openai.protocol import ErrorInfo, ErrorResponse
2019
from vllm.entrypoints.openai.serving_models import BaseModelPath, OpenAIServingModels
2120
from vllm.entrypoints.openai.tool_parsers import ToolParserManager
22-
from vllm.utils import FlexibleArgumentParser, is_valid_ipv6_address, set_ulimit
21+
from vllm.entrypoints.utils import process_lora_modules
22+
from vllm.reasoning import ReasoningParserManager
23+
from vllm.utils import is_valid_ipv6_address, set_ulimit
24+
from vllm.utils.argparse_utils import FlexibleArgumentParser
2325
from vllm.version import __version__ as VLLM_VERSION
2426
import uvloop
2527

@@ -36,14 +38,6 @@
3638
)
3739
from vllm_detector_adapter.utils import LocalEnvVarArgumentParser
3840

39-
try:
40-
# Third Party
41-
from vllm.entrypoints.openai.reasoning_parsers import ReasoningParserManager
42-
except ImportError:
43-
# Third Party
44-
from vllm.reasoning import ReasoningParserManager
45-
46-
4741
TIMEOUT_KEEP_ALIVE = 5 # seconds
4842

4943
# Cannot use __name__ (https://github.com/vllm-project/vllm/pull/4765)
@@ -61,42 +55,46 @@ def chat_detection(
6155

6256
async def init_app_state_with_detectors(
6357
engine_client: EngineClient,
64-
config, # ModelConfig | VllmConfig
6558
state: State,
6659
args: Namespace,
6760
) -> None:
6861
"""Add detection capabilities to app state"""
62+
vllm_config = engine_client.vllm_config
63+
6964
if args.served_model_name is not None:
7065
served_model_names = args.served_model_name
7166
else:
7267
served_model_names = [args.model]
7368

74-
if args.disable_log_requests:
75-
request_logger = None
76-
else:
69+
if args.enable_log_requests:
7770
request_logger = RequestLogger(max_log_len=args.max_log_len)
71+
else:
72+
request_logger = None
7873

7974
base_model_paths = [
8075
BaseModelPath(name=name, model_path=args.model) for name in served_model_names
8176
]
8277

8378
resolved_chat_template = load_chat_template(args.chat_template)
8479

85-
model_config = config
86-
if type(config) != ModelConfig: # VllmConfig
87-
model_config = config.model_config
80+
# Merge default_mm_loras into the static lora_modules
81+
default_mm_loras = (
82+
vllm_config.lora_config.default_mm_loras
83+
if vllm_config.lora_config is not None
84+
else {}
85+
)
86+
lora_modules = process_lora_modules(args.lora_modules, default_mm_loras)
8887

8988
state.openai_serving_models = OpenAIServingModels(
9089
engine_client=engine_client,
91-
model_config=model_config,
9290
base_model_paths=base_model_paths,
93-
lora_modules=args.lora_modules,
91+
lora_modules=lora_modules,
9492
)
9593

9694
# Use vllm app state init
9795
# init_app_state became async in https://github.com/vllm-project/vllm/pull/11727
9896
# ref. https://github.com/opendatahub-io/vllm-tgis-adapter/pull/207
99-
maybe_coroutine = api_server.init_app_state(engine_client, config, state, args)
97+
maybe_coroutine = api_server.init_app_state(engine_client, state, args)
10098
if inspect.isawaitable(maybe_coroutine):
10199
await maybe_coroutine
102100

@@ -107,7 +105,6 @@ async def init_app_state_with_detectors(
107105
args.task_template,
108106
args.output_template,
109107
engine_client,
110-
model_config,
111108
state.openai_serving_models,
112109
args.response_role,
113110
request_logger=request_logger,
@@ -196,18 +193,7 @@ async def validation_exception_handler(
196193
content=err.model_dump(), status_code=HTTPStatus.BAD_REQUEST
197194
)
198195

199-
# api_server.init_app_state takes vllm_config
200-
# ref. https://github.com/vllm-project/vllm/pull/16572
201-
if hasattr(engine_client, "get_vllm_config"):
202-
vllm_config = await engine_client.get_vllm_config()
203-
await init_app_state_with_detectors(
204-
engine_client, vllm_config, app.state, args
205-
)
206-
else:
207-
model_config = await engine_client.get_model_config()
208-
await init_app_state_with_detectors(
209-
engine_client, model_config, app.state, args
210-
)
196+
await init_app_state_with_detectors(engine_client, app.state, args)
211197

212198
def _listen_addr(a: str) -> str:
213199
if is_valid_ipv6_address(a):

vllm_detector_adapter/start_with_tgis_adapter.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
from vllm.entrypoints.launcher import serve_http
2222
from vllm.entrypoints.openai import api_server
2323
from vllm.entrypoints.openai.cli_args import make_arg_parser
24-
from vllm.utils import FlexibleArgumentParser
24+
from vllm.utils.argparse_utils import FlexibleArgumentParser
2525
import uvloop
2626

2727
if TYPE_CHECKING:

vllm_detector_adapter/utils.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import os
55

66
# Third Party
7-
from vllm.utils import FlexibleArgumentParser, StoreBoolean
7+
from vllm.utils.argparse_utils import FlexibleArgumentParser
88

99

1010
class DetectorType(Enum):
@@ -16,6 +16,20 @@ class DetectorType(Enum):
1616
TEXT_CONTEXT_DOC = auto()
1717

1818

19+
# This is taken from vLLM < 0.11.1 for backwards compatibility.
20+
# vLLM versions >=0.11.1 no longer include StoreBoolean.
21+
class StoreBoolean(argparse.Action):
22+
def __call__(self, parser, namespace, values, option_string=None):
23+
if values.lower() == "true":
24+
setattr(namespace, self.dest, True)
25+
elif values.lower() == "false":
26+
setattr(namespace, self.dest, False)
27+
else:
28+
raise ValueError(
29+
f"Invalid boolean value: {values}. Expected 'true' or 'false'."
30+
)
31+
32+
1933
# LocalEnvVarArgumentParser and dependent functions taken from
2034
# https://github.com/opendatahub-io/vllm-tgis-adapter/blob/main/src/vllm_tgis_adapter/tgis_utils/args.py
2135
# vllm by default parses args from CLI, not from env vars, but env var overrides

0 commit comments

Comments
 (0)