|
1 | 1 | # SPDX-License-Identifier: Apache-2.0
|
2 | 2 | # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
| 3 | + |
| 4 | +import tempfile |
| 5 | +from pathlib import Path |
| 6 | + |
3 | 7 | import numpy as np
|
4 | 8 | import numpy.typing as npt
|
5 | 9 | import pytest
|
| 10 | +from PIL import Image |
6 | 11 |
|
7 |
| -from vllm import envs |
| 12 | +from vllm.assets.base import get_vllm_public_assets |
| 13 | +from vllm.assets.video import video_to_ndarrays, video_to_pil_images_list |
8 | 14 | from vllm.multimodal.image import ImageMediaIO
|
9 | 15 | from vllm.multimodal.video import (VIDEO_LOADER_REGISTRY, VideoLoader,
|
10 | 16 | VideoMediaIO)
|
11 | 17 |
|
| 18 | +from .utils import cosine_similarity, create_video_from_image, normalize_image |
| 19 | + |
12 | 20 | NUM_FRAMES = 10
|
13 | 21 | FAKE_OUTPUT_1 = np.random.rand(NUM_FRAMES, 1280, 720, 3)
|
14 | 22 | FAKE_OUTPUT_2 = np.random.rand(NUM_FRAMES, 1280, 720, 3)
|
@@ -59,30 +67,79 @@ def load_bytes(cls,
|
59 | 67 | return FAKE_OUTPUT_2
|
60 | 68 |
|
61 | 69 |
|
62 |
| -def test_video_media_io_kwargs(): |
63 |
| - envs.VLLM_VIDEO_LOADER_BACKEND = "assert_10_frames_1_fps" |
64 |
| - imageio = ImageMediaIO() |
| 70 | +def test_video_media_io_kwargs(monkeypatch: pytest.MonkeyPatch): |
| 71 | + with monkeypatch.context() as m: |
| 72 | + m.setenv("VLLM_VIDEO_LOADER_BACKEND", "assert_10_frames_1_fps") |
| 73 | + imageio = ImageMediaIO() |
65 | 74 |
|
66 |
| - # Verify that different args pass/fail assertions as expected. |
67 |
| - videoio = VideoMediaIO(imageio, **{"num_frames": 10, "fps": 1.0}) |
68 |
| - _ = videoio.load_bytes(b"test") |
69 |
| - |
70 |
| - videoio = VideoMediaIO( |
71 |
| - imageio, **{ |
72 |
| - "num_frames": 10, |
73 |
| - "fps": 1.0, |
74 |
| - "not_used": "not_used" |
75 |
| - }) |
76 |
| - _ = videoio.load_bytes(b"test") |
77 |
| - |
78 |
| - with pytest.raises(AssertionError, match="bad num_frames"): |
79 |
| - videoio = VideoMediaIO(imageio, **{}) |
| 75 | + # Verify that different args pass/fail assertions as expected. |
| 76 | + videoio = VideoMediaIO(imageio, **{"num_frames": 10, "fps": 1.0}) |
80 | 77 | _ = videoio.load_bytes(b"test")
|
81 | 78 |
|
82 |
| - with pytest.raises(AssertionError, match="bad num_frames"): |
83 |
| - videoio = VideoMediaIO(imageio, **{"num_frames": 9, "fps": 1.0}) |
| 79 | + videoio = VideoMediaIO( |
| 80 | + imageio, **{ |
| 81 | + "num_frames": 10, |
| 82 | + "fps": 1.0, |
| 83 | + "not_used": "not_used" |
| 84 | + }) |
84 | 85 | _ = videoio.load_bytes(b"test")
|
85 | 86 |
|
86 |
| - with pytest.raises(AssertionError, match="bad fps"): |
87 |
| - videoio = VideoMediaIO(imageio, **{"num_frames": 10, "fps": 2.0}) |
88 |
| - _ = videoio.load_bytes(b"test") |
| 87 | + with pytest.raises(AssertionError, match="bad num_frames"): |
| 88 | + videoio = VideoMediaIO(imageio, **{}) |
| 89 | + _ = videoio.load_bytes(b"test") |
| 90 | + |
| 91 | + with pytest.raises(AssertionError, match="bad num_frames"): |
| 92 | + videoio = VideoMediaIO(imageio, **{"num_frames": 9, "fps": 1.0}) |
| 93 | + _ = videoio.load_bytes(b"test") |
| 94 | + |
| 95 | + with pytest.raises(AssertionError, match="bad fps"): |
| 96 | + videoio = VideoMediaIO(imageio, **{"num_frames": 10, "fps": 2.0}) |
| 97 | + _ = videoio.load_bytes(b"test") |
| 98 | + |
| 99 | + |
| 100 | +@pytest.mark.parametrize("is_color", [True, False]) |
| 101 | +@pytest.mark.parametrize("fourcc, ext", [("mp4v", "mp4"), ("XVID", "avi")]) |
| 102 | +def test_opencv_video_io_colorspace(is_color: bool, fourcc: str, ext: str): |
| 103 | + """ |
| 104 | + Test all functions that use OpenCV for video I/O return RGB format. |
| 105 | + Both RGB and grayscale videos are tested. |
| 106 | + """ |
| 107 | + image_path = get_vllm_public_assets(filename="stop_sign.jpg", |
| 108 | + s3_prefix="vision_model_images") |
| 109 | + image = Image.open(image_path) |
| 110 | + with tempfile.TemporaryDirectory() as tmpdir: |
| 111 | + if not is_color: |
| 112 | + image_path = f"{tmpdir}/test_grayscale_image.png" |
| 113 | + image = image.convert("L") |
| 114 | + image.save(image_path) |
| 115 | + # Convert to gray RGB for comparison |
| 116 | + image = image.convert("RGB") |
| 117 | + video_path = f"{tmpdir}/test_RGB_video.{ext}" |
| 118 | + create_video_from_image( |
| 119 | + image_path, |
| 120 | + video_path, |
| 121 | + num_frames=2, |
| 122 | + is_color=is_color, |
| 123 | + fourcc=fourcc, |
| 124 | + ) |
| 125 | + |
| 126 | + frames = video_to_ndarrays(video_path) |
| 127 | + for frame in frames: |
| 128 | + sim = cosine_similarity(normalize_image(np.array(frame)), |
| 129 | + normalize_image(np.array(image))) |
| 130 | + assert np.sum(np.isnan(sim)) / sim.size < 0.001 |
| 131 | + assert np.nanmean(sim) > 0.99 |
| 132 | + |
| 133 | + pil_frames = video_to_pil_images_list(video_path) |
| 134 | + for frame in pil_frames: |
| 135 | + sim = cosine_similarity(normalize_image(np.array(frame)), |
| 136 | + normalize_image(np.array(image))) |
| 137 | + assert np.sum(np.isnan(sim)) / sim.size < 0.001 |
| 138 | + assert np.nanmean(sim) > 0.99 |
| 139 | + |
| 140 | + io_frames, _ = VideoMediaIO(ImageMediaIO()).load_file(Path(video_path)) |
| 141 | + for frame in io_frames: |
| 142 | + sim = cosine_similarity(normalize_image(np.array(frame)), |
| 143 | + normalize_image(np.array(image))) |
| 144 | + assert np.sum(np.isnan(sim)) / sim.size < 0.001 |
| 145 | + assert np.nanmean(sim) > 0.99 |
0 commit comments