Skip to content
This repository was archived by the owner on Sep 4, 2025. It is now read-only.

Commit 9b8c8ba

Browse files
[Core][Frontend] Support Passing Multimodal Processor Kwargs (vllm-project#8657)
Signed-off-by: Alex-Brooks <[email protected]>
1 parent d23679e commit 9b8c8ba

File tree

16 files changed

+589
-116
lines changed

16 files changed

+589
-116
lines changed

tests/engine/test_arg_utils.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,3 +40,24 @@ def test_limit_mm_per_prompt_parser(arg, expected):
4040
def test_bad_nullable_kvs(arg):
4141
with pytest.raises(ArgumentTypeError):
4242
nullable_kvs(arg)
43+
44+
45+
@pytest.mark.parametrize(("arg", "expected"), [
46+
(None, None),
47+
("{}", {}),
48+
('{"num_crops": 4}', {
49+
"num_crops": 4
50+
}),
51+
('{"foo": {"bar": "baz"}}', {
52+
"foo": {
53+
"bar": "baz"
54+
}
55+
}),
56+
])
57+
def test_mm_processor_kwargs_prompt_parser(arg, expected):
58+
parser = EngineArgs.add_cli_args(FlexibleArgumentParser())
59+
if arg is None:
60+
args = parser.parse_args([])
61+
else:
62+
args = parser.parse_args(["--mm-processor-kwargs", arg])
63+
assert args.mm_processor_kwargs == expected

tests/models/decoder_only/vision_language/test_qwen.py

Lines changed: 1 addition & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,13 @@
55
import torch
66
from PIL.Image import Image
77

8-
from vllm.config import ModelConfig
98
from vllm.inputs import InputContext, LLMInputs
109
from vllm.multimodal.base import MultiModalInputs
1110
from vllm.multimodal.utils import cached_get_tokenizer, rescale_image_size
1211

1312
from ....conftest import (IMAGE_ASSETS, HfRunner, ImageAsset, PromptImageInput,
1413
VllmRunner, _ImageAssets)
15-
from ...utils import check_logprobs_close
14+
from ...utils import build_model_context, check_logprobs_close
1615

1716
text_only_models = [
1817
"Qwen/Qwen-7B-Chat" # Has no visual component
@@ -42,32 +41,6 @@
4241
IMG_SIZE = 448
4342

4443

45-
def build_model_context(model_name: str,
46-
tokenizer_name: Optional[str] = None,
47-
trust_remote_code: bool = False):
48-
"""Creates an InputContext for a given model.
49-
50-
Args:
51-
model_name: Name of the model being considered.
52-
tokenizer_name: Name of the tokenizer being considered.
53-
trust_remote_code: Whether or not to allow loading remote code.
54-
55-
Returns:
56-
InputContext for the model being considered.
57-
"""
58-
if tokenizer_name is None:
59-
tokenizer_name = model_name
60-
model_config = ModelConfig(
61-
model_name,
62-
tokenizer_name,
63-
tokenizer_mode="auto",
64-
trust_remote_code=trust_remote_code,
65-
dtype="float32",
66-
seed=0,
67-
)
68-
return InputContext(model_config)
69-
70-
7144
@pytest.fixture()
7245
def input_mapper_for_qwen():
7346
# Lazy import to avoid initializing CUDA during test collection

tests/models/utils.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
import warnings
22
from typing import Dict, List, Optional, Sequence, Tuple, Union
33

4+
from vllm.config import ModelConfig
5+
from vllm.inputs import InputContext
46
from vllm.sequence import Logprob, PromptLogprobs, SampleLogprobs
57

68
TokensText = Tuple[List[int], str]
@@ -240,3 +242,36 @@ def check_logprobs_close(
240242
warnings.simplefilter("always")
241243

242244
warnings.warn(fail_msg, stacklevel=2)
245+
246+
247+
def build_model_context(model_name: str,
248+
tokenizer_name: Optional[str] = None,
249+
trust_remote_code: bool = False,
250+
mm_processor_kwargs: Optional[Dict] = None,
251+
limit_mm_per_prompt: Optional[Dict] = None):
252+
"""Creates an InputContext for a given model.
253+
254+
Args:
255+
model_name: Name of the model being considered.
256+
tokenizer_name: Name of the tokenizer being considered.
257+
trust_remote_code: Whether or not to allow loading remote code.
258+
mm_processor_kwargs: optional processor kwargs for to be leveraged
259+
in the input processor, mapper, dummy data creation, etc.
260+
limit_mm_per_prompt: Multimodal limits.
261+
262+
Returns:
263+
InputContext for the model being considered.
264+
"""
265+
if tokenizer_name is None:
266+
tokenizer_name = model_name
267+
model_config = ModelConfig(
268+
model_name,
269+
tokenizer_name,
270+
tokenizer_mode="auto",
271+
trust_remote_code=trust_remote_code,
272+
dtype="float32",
273+
seed=0,
274+
mm_processor_kwargs=mm_processor_kwargs,
275+
limit_mm_per_prompt=limit_mm_per_prompt,
276+
)
277+
return InputContext(model_config)

0 commit comments

Comments
 (0)