Skip to content

Commit d08744c

Browse files
ksuma2109Suma Kasa
andauthored
Upgrade vllm to 0.12.0 and fix Multimodal integration tests (#2978)
Co-authored-by: Suma Kasa <sumakasa@amazon.com>
1 parent 16da769 commit d08744c

File tree

6 files changed

+18
-11
lines changed

6 files changed

+18
-11
lines changed

.github/workflows/integration.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -224,8 +224,8 @@ jobs:
224224
- test: TestVllmAsyncLoraWithCustomCode_g6
225225
instance: g6
226226
failure-prefix: lmi
227-
- test: TestMultiModalVllm_g6
228-
instance: g6
227+
- test: TestMultiModalVllm_p4d
228+
instance: p4d
229229
failure-prefix: lmi
230230
# - test: TestTextEmbedding_g6
231231
# instance: g6

engines/python/setup/djl_python/chat_completions/vllm_chat_utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
from vllm import TokensPrompt
1717
from vllm.entrypoints.openai.serving_engine import RequestPrompt, TextTokensPrompt
1818
from vllm.entrypoints.openai.tool_parsers import ToolParser
19-
from vllm.transformers_utils.tokenizers.mistral import maybe_serialize_tool_calls
19+
from vllm.tokenizers.mistral import maybe_serialize_tool_calls
2020
from vllm.transformers_utils.tokenizer import AnyTokenizer
2121
from vllm.entrypoints.openai.protocol import ChatCompletionRequest
2222
from vllm.entrypoints.chat_utils import (

engines/python/setup/djl_python/lmi_vllm/request_response_utils.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -166,9 +166,11 @@ def vllm_non_stream_output_formatter(
166166
**_,
167167
) -> Output:
168168
if isinstance(response, ErrorResponse):
169-
return create_non_stream_output("",
170-
error=response.message,
171-
code=response.code)
169+
error_msg = getattr(response, 'message', None) or getattr(
170+
response, 'detail', str(response))
171+
error_code = getattr(response, 'code', None) or getattr(
172+
response, 'type', 500)
173+
return create_non_stream_output("", error=error_msg, code=error_code)
172174
response_data = response.model_dump_json()
173175
return create_non_stream_output(response_data)
174176

serving/docker/lmi-container-requirements.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ uvloop
3232
ninja
3333
peft
3434
llmcompressor
35-
https://djl-ai.s3.us-east-1.amazonaws.com/publish/vllm/vllm_extensions_package-1.0.0-py3-none-any.whl
35+
vllm==0.12.0
3636
xgrammar
37-
flashinfer-python==0.5.2
37+
flashinfer-python==0.5.3
3838
lmcache

tests/integration/llm/prepare.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -417,6 +417,9 @@
417417
"llava_v1.6-mistral": {
418418
"option.model_id": "s3://djl-llm/llava-v1.6-mistral-7b-hf/",
419419
"option.limit_mm_per_prompt": '{"image": 4}',
420+
"option.gpu_memory_utilization": "0.7",
421+
"option.enforce_eager": True,
422+
"option.tensor_parallel_degree": 1,
420423
},
421424
"paligemma-3b-mix-448": {
422425
"option.model_id": "s3://djl-llm/paligemma-3b-mix-448/",

tests/integration/tests.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -552,17 +552,19 @@ def test_mistral_7b_fp8(self):
552552
"correctness trtllm-mistral-7b-instruct-v0.3-fp8".split())
553553

554554

555-
class TestMultiModalVllm_g6:
555+
@pytest.mark.vllm
556+
@pytest.mark.gpu_8
557+
class TestMultiModalVllm_p4d:
556558

557559
def test_llava_next(self):
558560
with Runner("lmi", "llava_v1.6-mistral") as r:
559-
prepare.build_vllm_model("llava_v1.6-mistral")
561+
prepare.build_vllm_async_model("llava_v1.6-mistral")
560562
r.launch()
561563
client.run("multimodal llava_v1.6-mistral".split())
562564

563565
def test_phi3_v(self):
564566
with Runner("lmi", "phi-3-vision-128k-instruct") as r:
565-
prepare.build_vllm_model("phi-3-vision-128k-instruct")
567+
prepare.build_vllm_async_model("phi-3-vision-128k-instruct")
566568
r.launch()
567569
client.run("multimodal phi-3-vision-128k-instruct".split())
568570

0 commit comments

Comments
 (0)