Skip to content

Commit 70549c1

Browse files
[CI/Build] Serve images used by multimodal tests through local HTTP Server (vllm-project#23907)
Signed-off-by: Divyansh Singhvi <[email protected]> Signed-off-by: dsinghvi <[email protected]> Co-authored-by: Cyrus Leung <[email protected]>
1 parent f0c503f commit 70549c1

File tree

9 files changed

+250
-98
lines changed

9 files changed

+250
-98
lines changed

tests/conftest.py

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,14 @@
11
# SPDX-License-Identifier: Apache-2.0
22
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3+
import http.server
34
import json
45
import math
6+
import mimetypes
57
import os
8+
import socket
69
import tempfile
10+
import threading
11+
from collections.abc import Generator
712
from enum import Enum
813
from typing import Any, Callable, Optional, TypedDict, TypeVar, Union, cast
914

@@ -32,6 +37,7 @@
3237
from vllm.inputs import (ExplicitEncoderDecoderPrompt, TextPrompt,
3338
to_enc_dec_tuple_list, zip_enc_dec_prompts)
3439
from vllm.logger import init_logger
40+
from vllm.multimodal.utils import fetch_image
3541
from vllm.outputs import RequestOutput
3642
from vllm.sampling_params import BeamSearchParams
3743
from vllm.sequence import Logprob
@@ -1253,3 +1259,119 @@ def cli_config_file():
12531259
def cli_config_file_with_model():
12541260
"""Return the path to the CLI config file with model."""
12551261
return os.path.join(_TEST_DIR, "config", "test_config_with_model.yaml")
1262+
1263+
1264+
class AssetHandler(http.server.BaseHTTPRequestHandler):
1265+
# _IMAGE_CACHE : Dict[str, bytes] = {}
1266+
1267+
def log_message(self, *args, **kwargs):
1268+
pass
1269+
1270+
def do_GET(self):
1271+
# Accepts paths like: /1280px-Venn_diagram_rgb.jpg
1272+
filename = self.path.lstrip("/")
1273+
if not filename or "." not in filename:
1274+
self.send_error(404, "Missing filename (expected /<name>.<ext>)")
1275+
return
1276+
1277+
base, ext = filename.rsplit(".", 1)
1278+
ext = ext.lower()
1279+
1280+
if ext not in ["jpg", "png"]:
1281+
self.send_error(404, f"Unsupported extension: .{ext}")
1282+
return
1283+
1284+
try:
1285+
data = ImageAsset(base).read_bytes(ext=ext)
1286+
except Exception as e:
1287+
self.send_error(500, f"Failed to load asset: {ext} {base} {e} ")
1288+
return
1289+
1290+
ctype, _ = mimetypes.guess_type(filename)
1291+
if ctype is None:
1292+
ctype = {"jpg": "image/jpg", "png": "image/png"}[ext]
1293+
self.send_response(200)
1294+
self.send_header("Content-Type", ctype)
1295+
self.send_header("Content-Length", str(len(data)))
1296+
self.end_headers()
1297+
self.wfile.write(data)
1298+
1299+
1300+
def _find_free_port() -> int:
1301+
with socket.socket() as s:
1302+
s.bind(("127.0.0.1", 0))
1303+
return s.getsockname()[1]
1304+
1305+
1306+
class LocalAssetServer:
1307+
1308+
address: str
1309+
port: int
1310+
server: Optional[http.server.ThreadingHTTPServer]
1311+
thread: Optional[threading.Thread]
1312+
1313+
def __init__(self, address: str = "127.0.0.1") -> None:
1314+
self.address = address
1315+
self.port = -1
1316+
self.server = None
1317+
self.thread = None
1318+
1319+
def __enter__(self):
1320+
self.port = _find_free_port()
1321+
self.server = http.server.ThreadingHTTPServer(
1322+
(self.address, self.port), AssetHandler)
1323+
self.thread = threading.Thread(target=self.server.serve_forever,
1324+
daemon=True)
1325+
self.thread.start()
1326+
return self
1327+
1328+
def __exit__(self, exc_type, exc_value, traceback):
1329+
if self.server:
1330+
self.server.shutdown()
1331+
del self.server
1332+
1333+
if self.thread:
1334+
self.thread.join()
1335+
del self.thread
1336+
1337+
if exc_type is None:
1338+
return None
1339+
1340+
return False
1341+
1342+
@property
1343+
def base_url(self) -> str:
1344+
assert self.port is not None
1345+
return f"http://{self.address}:{self.port}"
1346+
1347+
def url_for(self, name: str) -> str:
1348+
"""e.g., name='RGBA_comp.png' -> 'http://127.0.0.1:PORT/RGBA_comp.png'"""
1349+
return f"{self.base_url}/{name}"
1350+
1351+
def get_image_asset(self, name: str) -> Image.Image:
1352+
return fetch_image(self.url_for(name))
1353+
1354+
1355+
@pytest.fixture(scope="session")
1356+
def local_asset_server() -> Generator[LocalAssetServer, None, None]:
1357+
"""
1358+
Starts a thread based HTTP server bound to 127.0.0.1 on a random free port.
1359+
The server currently servers images at:
1360+
http://127.0.0.1:<port>/<name>.<ext>
1361+
"""
1362+
with LocalAssetServer() as srv:
1363+
yield srv
1364+
1365+
1366+
@pytest.fixture
1367+
def image_url(request, local_asset_server) -> str:
1368+
# request.param is one of the IMAGE_ASSETS filenames
1369+
name = request.param
1370+
return local_asset_server.url_for(name)
1371+
1372+
1373+
@pytest.fixture
1374+
def image_urls(request, local_asset_server) -> list[str]:
1375+
"""Indirect fixture: takes a list of names, returns list of full URLs."""
1376+
names: list[str] = request.param
1377+
return [local_asset_server.url_for(name) for name in names]

tests/entrypoints/llm/test_chat.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from vllm import LLM
88
from vllm.distributed import cleanup_dist_env_and_memory
99

10-
from ..openai.test_vision import TEST_IMAGE_URLS
10+
from ..openai.test_vision import TEST_IMAGE_ASSETS
1111

1212

1313
@pytest.fixture(scope="function")
@@ -95,7 +95,8 @@ def vision_llm():
9595

9696

9797
@pytest.mark.parametrize("image_urls",
98-
[[TEST_IMAGE_URLS[0], TEST_IMAGE_URLS[1]]])
98+
[[TEST_IMAGE_ASSETS[0], TEST_IMAGE_ASSETS[1]]],
99+
indirect=True)
99100
def test_chat_multi_image(vision_llm, image_urls: list[str]):
100101
messages = [{
101102
"role":

tests/entrypoints/openai/test_vision.py

Lines changed: 23 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,11 @@
1616
MAXIMUM_IMAGES = 2
1717

1818
# Test different image extensions (JPG/PNG) and formats (gray/RGB/RGBA)
19-
TEST_IMAGE_URLS = [
20-
"https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg",
21-
"https://upload.wikimedia.org/wikipedia/commons/f/fa/Grayscale_8bits_palette_sample_image.png",
22-
"https://upload.wikimedia.org/wikipedia/commons/thumb/9/91/Venn_diagram_rgb.svg/1280px-Venn_diagram_rgb.svg.png",
23-
"https://upload.wikimedia.org/wikipedia/commons/0/0b/RGBA_comp.png",
19+
TEST_IMAGE_ASSETS = [
20+
"2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg", # "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
21+
"Grayscale_8bits_palette_sample_image.png", # "https://upload.wikimedia.org/wikipedia/commons/f/fa/Grayscale_8bits_palette_sample_image.png",
22+
"1280px-Venn_diagram_rgb.svg.png", # "https://upload.wikimedia.org/wikipedia/commons/thumb/9/91/Venn_diagram_rgb.svg/1280px-Venn_diagram_rgb.svg.png",
23+
"RGBA_comp.png", # "https://upload.wikimedia.org/wikipedia/commons/0/0b/RGBA_comp.png",
2424
]
2525

2626
EXPECTED_MM_BEAM_SEARCH_RES = [
@@ -69,10 +69,11 @@ async def client(server):
6969

7070

7171
@pytest.fixture(scope="session")
72-
def base64_encoded_image() -> dict[str, str]:
72+
def base64_encoded_image(local_asset_server) -> dict[str, str]:
7373
return {
74-
image_url: encode_image_base64(fetch_image(image_url))
75-
for image_url in TEST_IMAGE_URLS
74+
image_asset:
75+
encode_image_base64(local_asset_server.get_image_asset(image_asset))
76+
for image_asset in TEST_IMAGE_ASSETS
7677
}
7778

7879

@@ -97,7 +98,7 @@ def get_hf_prompt_tokens(model_name, content, image_url):
9798

9899
@pytest.mark.asyncio
99100
@pytest.mark.parametrize("model_name", [MODEL_NAME])
100-
@pytest.mark.parametrize("image_url", TEST_IMAGE_URLS)
101+
@pytest.mark.parametrize("image_url", TEST_IMAGE_ASSETS, indirect=True)
101102
async def test_single_chat_session_image(client: openai.AsyncOpenAI,
102103
model_name: str, image_url: str):
103104
content_text = "What's in this image?"
@@ -157,7 +158,7 @@ async def test_single_chat_session_image(client: openai.AsyncOpenAI,
157158

158159
@pytest.mark.asyncio
159160
@pytest.mark.parametrize("model_name", [MODEL_NAME])
160-
@pytest.mark.parametrize("image_url", TEST_IMAGE_URLS)
161+
@pytest.mark.parametrize("image_url", TEST_IMAGE_ASSETS, indirect=True)
161162
async def test_error_on_invalid_image_url_type(client: openai.AsyncOpenAI,
162163
model_name: str,
163164
image_url: str):
@@ -187,7 +188,7 @@ async def test_error_on_invalid_image_url_type(client: openai.AsyncOpenAI,
187188

188189
@pytest.mark.asyncio
189190
@pytest.mark.parametrize("model_name", [MODEL_NAME])
190-
@pytest.mark.parametrize("image_url", TEST_IMAGE_URLS)
191+
@pytest.mark.parametrize("image_url", TEST_IMAGE_ASSETS, indirect=True)
191192
async def test_single_chat_session_image_beamsearch(client: openai.AsyncOpenAI,
192193
model_name: str,
193194
image_url: str):
@@ -223,10 +224,11 @@ async def test_single_chat_session_image_beamsearch(client: openai.AsyncOpenAI,
223224

224225
@pytest.mark.asyncio
225226
@pytest.mark.parametrize("model_name", [MODEL_NAME])
226-
@pytest.mark.parametrize("image_url", TEST_IMAGE_URLS)
227+
@pytest.mark.parametrize("raw_image_url", TEST_IMAGE_ASSETS)
228+
@pytest.mark.parametrize("image_url", TEST_IMAGE_ASSETS, indirect=True)
227229
async def test_single_chat_session_image_base64encoded(
228-
client: openai.AsyncOpenAI, model_name: str, image_url: str,
229-
base64_encoded_image: dict[str, str]):
230+
client: openai.AsyncOpenAI, model_name: str, raw_image_url: str,
231+
image_url: str, base64_encoded_image: dict[str, str]):
230232

231233
content_text = "What's in this image?"
232234
messages = [{
@@ -237,7 +239,7 @@ async def test_single_chat_session_image_base64encoded(
237239
"type": "image_url",
238240
"image_url": {
239241
"url":
240-
f"data:image/jpeg;base64,{base64_encoded_image[image_url]}"
242+
f"data:image/jpeg;base64,{base64_encoded_image[raw_image_url]}"
241243
}
242244
},
243245
{
@@ -287,12 +289,12 @@ async def test_single_chat_session_image_base64encoded(
287289

288290
@pytest.mark.asyncio
289291
@pytest.mark.parametrize("model_name", [MODEL_NAME])
290-
@pytest.mark.parametrize("image_idx", list(range(len(TEST_IMAGE_URLS))))
292+
@pytest.mark.parametrize("image_idx", list(range(len(TEST_IMAGE_ASSETS))))
291293
async def test_single_chat_session_image_base64encoded_beamsearch(
292294
client: openai.AsyncOpenAI, model_name: str, image_idx: int,
293295
base64_encoded_image: dict[str, str]):
294296
# NOTE: This test also validates that we pass MM data through beam search
295-
image_url = TEST_IMAGE_URLS[image_idx]
297+
raw_image_url = TEST_IMAGE_ASSETS[image_idx]
296298
expected_res = EXPECTED_MM_BEAM_SEARCH_RES[image_idx]
297299

298300
messages = [{
@@ -303,7 +305,7 @@ async def test_single_chat_session_image_base64encoded_beamsearch(
303305
"type": "image_url",
304306
"image_url": {
305307
"url":
306-
f"data:image/jpeg;base64,{base64_encoded_image[image_url]}"
308+
f"data:image/jpeg;base64,{base64_encoded_image[raw_image_url]}"
307309
}
308310
},
309311
{
@@ -326,7 +328,7 @@ async def test_single_chat_session_image_base64encoded_beamsearch(
326328

327329
@pytest.mark.asyncio
328330
@pytest.mark.parametrize("model_name", [MODEL_NAME])
329-
@pytest.mark.parametrize("image_url", TEST_IMAGE_URLS)
331+
@pytest.mark.parametrize("image_url", TEST_IMAGE_ASSETS, indirect=True)
330332
async def test_chat_streaming_image(client: openai.AsyncOpenAI,
331333
model_name: str, image_url: str):
332334
messages = [{
@@ -385,7 +387,8 @@ async def test_chat_streaming_image(client: openai.AsyncOpenAI,
385387
@pytest.mark.parametrize("model_name", [MODEL_NAME])
386388
@pytest.mark.parametrize(
387389
"image_urls",
388-
[TEST_IMAGE_URLS[:i] for i in range(2, len(TEST_IMAGE_URLS))])
390+
[TEST_IMAGE_ASSETS[:i] for i in range(2, len(TEST_IMAGE_ASSETS))],
391+
indirect=True)
389392
async def test_multi_image_input(client: openai.AsyncOpenAI, model_name: str,
390393
image_urls: list[str]):
391394

tests/entrypoints/openai/test_vision_embedding.py

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,11 @@
1919
assert vlm2vec_jinja_path.exists()
2020

2121
# Test different image extensions (JPG/PNG) and formats (gray/RGB/RGBA)
22-
TEST_IMAGE_URLS = [
23-
"https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg",
24-
"https://upload.wikimedia.org/wikipedia/commons/f/fa/Grayscale_8bits_palette_sample_image.png",
25-
"https://upload.wikimedia.org/wikipedia/commons/thumb/9/91/Venn_diagram_rgb.svg/1280px-Venn_diagram_rgb.svg.png",
26-
"https://upload.wikimedia.org/wikipedia/commons/0/0b/RGBA_comp.png",
22+
TEST_IMAGE_ASSETS = [
23+
"2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg", # "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
24+
"Grayscale_8bits_palette_sample_image.png", # "https://upload.wikimedia.org/wikipedia/commons/f/fa/Grayscale_8bits_palette_sample_image.png",
25+
"1280px-Venn_diagram_rgb.svg.png", # "https://upload.wikimedia.org/wikipedia/commons/thumb/9/91/Venn_diagram_rgb.svg/1280px-Venn_diagram_rgb.svg.png",
26+
"RGBA_comp.png", # "https://upload.wikimedia.org/wikipedia/commons/0/0b/RGBA_comp.png",
2727
]
2828

2929

@@ -49,10 +49,11 @@ def server():
4949

5050

5151
@pytest.fixture(scope="session")
52-
def base64_encoded_image() -> dict[str, str]:
52+
def base64_encoded_image(local_asset_server) -> dict[str, str]:
5353
return {
54-
image_url: encode_image_base64(fetch_image(image_url))
55-
for image_url in TEST_IMAGE_URLS
54+
image_url:
55+
encode_image_base64(local_asset_server.get_image_asset(image_url))
56+
for image_url in TEST_IMAGE_ASSETS
5657
}
5758

5859

@@ -70,7 +71,7 @@ def get_hf_prompt_tokens(model_name, content, image_url):
7071

7172
@pytest.mark.asyncio
7273
@pytest.mark.parametrize("model_name", [MODEL_NAME])
73-
@pytest.mark.parametrize("image_url", TEST_IMAGE_URLS)
74+
@pytest.mark.parametrize("image_url", TEST_IMAGE_ASSETS, indirect=True)
7475
async def test_image_embedding(server: RemoteOpenAIServer, model_name: str,
7576
image_url: str):
7677
content_text = "Represent the given image."

0 commit comments

Comments
 (0)