Skip to content

Commit e78587a

Browse files
authored
Improve-mm-and-pooler-and-decoding-configs (vllm-project#16789)
Signed-off-by: Harry Mellor <[email protected]>
1 parent 7eb4255 commit e78587a

File tree

14 files changed

+84
-78
lines changed

14 files changed

+84
-78
lines changed

docs/source/models/supported_models.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -788,7 +788,7 @@ llm = LLM(
788788
Online serving:
789789

790790
```bash
791-
vllm serve Qwen/Qwen2-VL-7B-Instruct --limit-mm-per-prompt image=4
791+
vllm serve Qwen/Qwen2-VL-7B-Instruct --limit-mm-per-prompt '{"image":4}'
792792
```
793793

794794
**This is no longer required if you are using vLLM V1.**

docs/source/serving/multimodal_inputs.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -228,7 +228,7 @@ First, launch the OpenAI-compatible server:
228228

229229
```bash
230230
vllm serve microsoft/Phi-3.5-vision-instruct --task generate \
231-
--trust-remote-code --max-model-len 4096 --limit-mm-per-prompt image=2
231+
--trust-remote-code --max-model-len 4096 --limit-mm-per-prompt '{"image":2}'
232232
```
233233

234234
Then, you can use the OpenAI client as follows:

examples/offline_inference/mistral-small.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,11 @@
1616
# # Mistral format
1717
# vllm serve mistralai/Mistral-Small-3.1-24B-Instruct-2503 \
1818
# --tokenizer-mode mistral --config-format mistral --load-format mistral \
19-
# --limit-mm-per-prompt 'image=4' --max-model-len 16384
19+
# --limit-mm-per-prompt '{"image":4}' --max-model-len 16384
2020
#
2121
# # HF format
2222
# vllm serve mistralai/Mistral-Small-3.1-24B-Instruct-2503 \
23-
# --limit-mm-per-prompt 'image=4' --max-model-len 16384
23+
# --limit-mm-per-prompt '{"image":4}' --max-model-len 16384
2424
# ```
2525
#
2626
# - Client:

examples/online_serving/openai_chat_completion_client_for_multimodal.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
1010
(multi-image inference with Phi-3.5-vision-instruct)
1111
vllm serve microsoft/Phi-3.5-vision-instruct --task generate \
12-
--trust-remote-code --max-model-len 4096 --limit-mm-per-prompt image=2
12+
--trust-remote-code --max-model-len 4096 --limit-mm-per-prompt '{"image":2}'
1313
1414
(audio inference with Ultravox)
1515
vllm serve fixie-ai/ultravox-v0_5-llama-3_2-1b --max-model-len 4096

tests/engine/test_arg_utils.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,10 @@
2424
}),
2525
])
2626
def test_limit_mm_per_prompt_parser(arg, expected):
27+
"""This functionality is deprecated and will be removed in the future.
28+
This argument should be passed as JSON string instead.
29+
30+
TODO: Remove with nullable_kvs."""
2731
parser = EngineArgs.add_cli_args(FlexibleArgumentParser())
2832
if arg is None:
2933
args = parser.parse_args([])

tests/entrypoints/openai/test_audio.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ def server():
2727
"--enforce-eager",
2828
"--trust-remote-code",
2929
"--limit-mm-per-prompt",
30-
f"audio={MAXIMUM_AUDIOS}",
30+
str({"audio": MAXIMUM_AUDIOS}),
3131
]
3232

3333
with RemoteOpenAIServer(MODEL_NAME, args) as remote_server:

tests/entrypoints/openai/test_video.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ def server():
3131
"--enforce-eager",
3232
"--trust-remote-code",
3333
"--limit-mm-per-prompt",
34-
f"video={MAXIMUM_VIDEOS}",
34+
str({"video": MAXIMUM_VIDEOS}),
3535
]
3636

3737
with RemoteOpenAIServer(MODEL_NAME, args) as remote_server:

tests/entrypoints/openai/test_vision.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ def server():
3535
"--enforce-eager",
3636
"--trust-remote-code",
3737
"--limit-mm-per-prompt",
38-
f"image={MAXIMUM_IMAGES}",
38+
str({"image": MAXIMUM_IMAGES}),
3939
]
4040

4141
with RemoteOpenAIServer(MODEL_NAME, args) as remote_server:

tests/entrypoints/openai/test_vision_embedding.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ def server():
3737
"--enforce-eager",
3838
"--trust-remote-code",
3939
"--limit-mm-per-prompt",
40-
f"image={MAXIMUM_IMAGES}",
40+
str({"image": MAXIMUM_IMAGES}),
4141
"--chat-template",
4242
str(vlm2vec_jinja_path),
4343
]

tests/models/decoder_only/audio_language/test_ultravox.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -48,9 +48,9 @@ def audio(request):
4848
])
4949
def server(request, audio_assets):
5050
args = [
51-
"--dtype=bfloat16", "--max-model-len=4096", "--enforce-eager",
52-
f"--limit-mm-per-prompt=audio={len(audio_assets)}",
53-
"--trust-remote-code"
51+
"--dtype", "bfloat16", "--max-model-len", "4096", "--enforce-eager",
52+
"--limit-mm-per-prompt",
53+
str({"audio": len(audio_assets)}), "--trust-remote-code"
5454
] + [
5555
f"--{key.replace('_','-')}={value}"
5656
for key, value in request.param.items()

0 commit comments

Comments
 (0)