Skip to content

Commit 23d3b0e

Browse files
authored
Support vllm 0.10.1.1 (#704)
1 parent 7f8fea9 commit 23d3b0e

File tree

4 files changed

+8
-18
lines changed

4 files changed

+8
-18
lines changed

model-engine/model_engine_server/inference/vllm/Dockerfile.vllm

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# syntax=docker/dockerfile:1
2-
ARG VLLM_VERSION=0.10.0
2+
ARG VLLM_VERSION=0.10.1.1
33
ARG VLLM_BASE_REPO=vllm/vllm-openai
44
ARG VLLM_BASE_IMAGE=${VLLM_BASE_REPO}:v${VLLM_VERSION}
55
FROM ${VLLM_BASE_IMAGE} AS base

model-engine/model_engine_server/inference/vllm/build_and_upload_image.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ fi
2929
ACCOUNT=$1
3030
IMAGE_TAG=$2
3131
BUILD_TARGET=$3
32-
VLLM_VERSION=${VLLM_VERSION:-"0.10.0"}
32+
VLLM_VERSION=${VLLM_VERSION:-"0.10.1.1"}
3333
VLLM_BASE_REPO=${VLLM_BASE_REPO:-"vllm/vllm-openai"}
3434

3535
# if build target = vllm use vllm otherwise use vllm_batch
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
vllm==0.10.0
1+
vllm==0.10.1.1

model-engine/model_engine_server/inference/vllm/vllm_server.py

Lines changed: 5 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,7 @@
1111
import vllm.envs as envs
1212
from fastapi import APIRouter, BackgroundTasks, Request
1313
from fastapi.responses import Response, StreamingResponse
14-
from vllm.engine.async_llm_engine import (
15-
AsyncEngineDeadError,
16-
build_guided_decoding_logits_processor_async,
17-
)
14+
from vllm.engine.async_llm_engine import AsyncEngineDeadError
1815
from vllm.engine.protocol import EngineClient
1916
from vllm.entrypoints.launcher import serve_http
2017
from vllm.entrypoints.openai.api_server import (
@@ -60,16 +57,7 @@ async def generate(request: Request) -> Response:
6057
prompt = request_dict.pop("prompt")
6158
stream = request_dict.pop("stream", False)
6259

63-
guided_decoding_backend = (
64-
await engine_client.get_decoding_config()
65-
).guided_decoding_backend
66-
67-
sampling_params = await build_guided_decoding_logits_processor_async(
68-
sampling_params=SamplingParams(**request_dict),
69-
tokenizer=await engine_client.get_tokenizer(lora_request=None),
70-
default_guided_backend=guided_decoding_backend,
71-
model_config=await engine_client.get_model_config(),
72-
)
60+
sampling_params = SamplingParams(**request_dict)
7361

7462
request_id = random_uuid()
7563

@@ -226,7 +214,7 @@ async def run_server_worker(
226214

227215
global engine_client
228216

229-
async with build_async_engine_client(args, client_config) as engine_client:
217+
async with build_async_engine_client(args, client_config=client_config) as engine_client:
230218
maybe_register_tokenizer_info_endpoint(args)
231219
app = build_app(args)
232220

@@ -250,6 +238,8 @@ async def run_server_worker(
250238
ssl_certfile=args.ssl_certfile,
251239
ssl_ca_certs=args.ssl_ca_certs,
252240
ssl_cert_reqs=args.ssl_cert_reqs,
241+
h11_max_incomplete_event_size=args.h11_max_incomplete_event_size,
242+
h11_max_header_count=args.h11_max_header_count,
253243
**uvicorn_kwargs,
254244
)
255245

0 commit comments

Comments
 (0)