Skip to content

Commit 4593165

Browse files
authored
⬆️ Update to allow vllm 0.8.5 (#69)
* ⬆️ Bump upper bound for 0.8.5 vllm Signed-off-by: Evaline Ju <69598118+evaline-ju@users.noreply.github.com> * ♻️ Backwards compatible import for nullable_str Signed-off-by: Evaline Ju <69598118+evaline-ju@users.noreply.github.com> * ♻️ Account for init_app_state with vllm config Signed-off-by: Evaline Ju <69598118+evaline-ju@users.noreply.github.com> * ♻️ Keep ordering Signed-off-by: Evaline Ju <69598118+evaline-ju@users.noreply.github.com> * 👽 Optional type change Signed-off-by: Evaline Ju <69598118+evaline-ju@users.noreply.github.com> --------- Signed-off-by: Evaline Ju <69598118+evaline-ju@users.noreply.github.com>
1 parent 6a34f9c commit 4593165

File tree

3 files changed

+37
-14
lines changed

3 files changed

+37
-14
lines changed

Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ ARG BASE_UBI_IMAGE_TAG=9.5
88
ARG PYTHON_VERSION=3.12
99

1010
### Build layer
11-
FROM quay.io/vllm/vllm:0.8.4.20250423 as build
11+
FROM quay.io/vllm/vllm:0.8.5.0_cu128 as build
1212

1313
ARG PYTHON_VERSION
1414
ENV PYTHON_VERSION=${PYTHON_VERSION}

pyproject.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,8 @@ vllm-tgis-adapter = [
2222
]
2323
vllm = [
2424
# Note: 0.8.4 has a triton bug on Mac
25-
"vllm @ git+https://github.com/vllm-project/vllm.git@v0.8.3 ; sys_platform == 'darwin'",
26-
"vllm>=0.7.2,<0.8.5 ; sys_platform != 'darwin'",
25+
"vllm @ git+https://github.com/vllm-project/vllm.git@v0.8.5 ; sys_platform == 'darwin'",
26+
"vllm>=0.7.2,<0.8.6 ; sys_platform != 'darwin'",
2727
]
2828

2929
## Dev Extra Sets ##

vllm_detector_adapter/api_server.py

Lines changed: 34 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
from fastapi.responses import JSONResponse
99
from starlette.datastructures import State
1010
from vllm.config import ModelConfig
11-
from vllm.engine.arg_utils import nullable_str
1211
from vllm.engine.protocol import EngineClient
1312
from vllm.entrypoints.chat_utils import load_chat_template
1413
from vllm.entrypoints.launcher import serve_http
@@ -59,7 +58,7 @@ def chat_detection(
5958

6059
async def init_app_state_with_detectors(
6160
engine_client: EngineClient,
62-
model_config: ModelConfig,
61+
config, # ModelConfig | VllmConfig
6362
state: State,
6463
args: Namespace,
6564
) -> None:
@@ -79,6 +78,11 @@ async def init_app_state_with_detectors(
7978
]
8079

8180
resolved_chat_template = load_chat_template(args.chat_template)
81+
82+
model_config = config
83+
if type(config) != ModelConfig: # VllmConfig
84+
model_config = config.model_config
85+
8286
state.openai_serving_models = OpenAIServingModels(
8387
engine_client=engine_client,
8488
model_config=model_config,
@@ -90,9 +94,7 @@ async def init_app_state_with_detectors(
9094
# Use vllm app state init
9195
# init_app_state became async in https://github.com/vllm-project/vllm/pull/11727
9296
# ref. https://github.com/opendatahub-io/vllm-tgis-adapter/pull/207
93-
maybe_coroutine = api_server.init_app_state(
94-
engine_client, model_config, state, args
95-
)
97+
maybe_coroutine = api_server.init_app_state(engine_client, config, state, args)
9698
if inspect.isawaitable(maybe_coroutine):
9799
await maybe_coroutine
98100

@@ -161,10 +163,18 @@ def signal_handler(*_) -> None:
161163
# Use vllm build_app which adds middleware
162164
app = api_server.build_app(args)
163165

164-
model_config = await engine_client.get_model_config()
165-
await init_app_state_with_detectors(
166-
engine_client, model_config, app.state, args
167-
)
166+
# api_server.init_app_state takes vllm_config
167+
# ref. https://github.com/vllm-project/vllm/pull/16572
168+
if hasattr(engine_client, "get_vllm_config"):
169+
vllm_config = await engine_client.get_vllm_config()
170+
await init_app_state_with_detectors(
171+
engine_client, vllm_config, app.state, args
172+
)
173+
else:
174+
model_config = await engine_client.get_model_config()
175+
await init_app_state_with_detectors(
176+
engine_client, model_config, app.state, args
177+
)
168178

169179
def _listen_addr(a: str) -> str:
170180
if is_valid_ipv6_address(a):
@@ -280,17 +290,30 @@ async def create_generation_detection(
280290

281291

282292
def add_chat_detection_params(parser):
293+
294+
template_type = None
295+
try:
296+
# Third Party
297+
from vllm.engine.arg_utils import nullable_str
298+
299+
template_type = nullable_str
300+
except ImportError:
301+
# Third Party
302+
from vllm.engine.arg_utils import optional_type
303+
304+
template_type = optional_type(str)
305+
283306
parser.add_argument(
284307
"--task-template",
285-
type=nullable_str,
308+
type=template_type,
286309
default=None,
287310
help="The file path to the task template, "
288311
"or the template in single-line form "
289312
"for the specified model",
290313
)
291314
parser.add_argument(
292315
"--output-template",
293-
type=nullable_str,
316+
type=template_type,
294317
default=None,
295318
help="The file path to the output template, "
296319
"or the template in single-line form "

0 commit comments

Comments
 (0)