18
18
from vllm .sequence import Logprob , SampleLogprobs
19
19
20
20
from ....utils import VLLM_PATH , large_gpu_test
21
- from ...utils import check_logprobs_close
21
+ from ...utils import check_logprobs_close , dummy_hf_overrides
22
22
23
23
if TYPE_CHECKING :
24
24
from _typeshed import StrPath
29
29
MODELS = [PIXTRAL_ID , MISTRAL_SMALL_3_1_ID ]
30
30
31
31
IMG_URLS = [
32
- "https://picsum.photos/id/237/400/300 " ,
33
- "https://picsum.photos/id/231/200/300 " ,
34
- "https://picsum.photos/id/27/500/500 " ,
35
- "https://picsum.photos/id/17/150/600 " ,
32
+ "https://huggingface.co/datasets/Isotr0py/mistral-test-images/resolve/main/237-400x300.jpg " ,
33
+ "https://huggingface.co/datasets/Isotr0py/mistral-test-images/resolve/main/231-200x300.jpg " ,
34
+ "https://huggingface.co/datasets/Isotr0py/mistral-test-images/resolve/main/27-500x500.jpg " ,
35
+ "https://huggingface.co/datasets/Isotr0py/mistral-test-images/resolve/main/17-150x600.jpg " ,
36
36
]
37
37
PROMPT = "Describe each image in one short sentence."
38
38
@@ -110,11 +110,6 @@ def _create_engine_inputs_hf(urls: list[str]) -> TextPrompt:
110
110
_create_msg_format (IMG_URLS [:2 ]),
111
111
_create_msg_format (IMG_URLS ),
112
112
]
113
- ENGINE_INPUTS = [
114
- _create_engine_inputs (IMG_URLS [:1 ]),
115
- _create_engine_inputs (IMG_URLS [:2 ]),
116
- _create_engine_inputs (IMG_URLS ),
117
- ]
118
113
119
114
SAMPLING_PARAMS = SamplingParams (max_tokens = 512 , temperature = 0.0 , logprobs = 5 )
120
115
LIMIT_MM_PER_PROMPT = dict (image = 4 )
@@ -195,7 +190,6 @@ def test_chat(
195
190
name_1 = "output" )
196
191
197
192
198
- @large_gpu_test (min_gb = 48 )
199
193
@pytest .mark .parametrize ("prompt,expected_ranges" ,
200
194
[(_create_engine_inputs_hf (IMG_URLS [:1 ]),
201
195
[PlaceholderRange (offset = 11 , length = 494 )]),
@@ -204,7 +198,7 @@ def test_chat(
204
198
PlaceholderRange (offset = 277 , length = 1056 ),
205
199
PlaceholderRange (offset = 1333 , length = 418 )
206
200
])])
207
- def test_multi_modal_placeholders (vllm_runner , prompt ,
201
+ def test_multi_modal_placeholders (vllm_runner , prompt : TextPrompt ,
208
202
expected_ranges : list [PlaceholderRange ],
209
203
monkeypatch ) -> None :
210
204
@@ -215,6 +209,8 @@ def test_multi_modal_placeholders(vllm_runner, prompt,
215
209
"mistral-community/pixtral-12b" ,
216
210
max_model_len = 8192 ,
217
211
limit_mm_per_prompt = LIMIT_MM_PER_PROMPT ,
212
+ load_format = "dummy" ,
213
+ hf_overrides = dummy_hf_overrides ,
218
214
) as vllm_model :
219
215
outputs = vllm_model .llm .generate (prompt )
220
216
@@ -230,5 +226,7 @@ def test_multi_modal_placeholders(vllm_runner, prompt,
230
226
expected_ranges ), f"{ image_placeholder_ranges = } "
231
227
for real_range , expected_range in zip (image_placeholder_ranges ,
232
228
expected_ranges ):
233
- assert real_range == expected_range , \
229
+ assert real_range .offset == expected_range .offset , \
230
+ f"{ real_range = } { expected_range = } "
231
+ assert real_range .length == expected_range .length , \
234
232
f"{ real_range = } { expected_range = } "
0 commit comments