Skip to content

Commit a2d83d6

Browse files
ywang96Patryk999
authored andcommitted
[Bugfix] Follow-up fix on MediaWithBytes (vllm-project#29951)
Signed-off-by: Roger Wang <[email protected]>
1 parent e076853 commit a2d83d6

File tree

3 files changed

+5
-2
lines changed

3 files changed

+5
-2
lines changed

vllm/multimodal/base.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@ class MediaWithBytes(Generic[_T]):
2121
2222
The wrapper delegates attribute access to the underlying media object,
2323
making it behave transparently like the wrapped type (e.g., PIL.Image).
24+
25+
NOTE: Currently, this wrapper is used only for the image modality.
2426
"""
2527

2628
media: _T

vllm/multimodal/inputs.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
from PIL.Image import Image
3333
from transformers.feature_extraction_utils import BatchFeature
3434

35+
from .base import MediaWithBytes
3536
from .processing import MultiModalHashes
3637

3738
else:
@@ -59,7 +60,7 @@
5960
item, which can be passed to a HuggingFace `AudioProcessor`.
6061
"""
6162

62-
ImageItem: TypeAlias = Union[HfImageItem, "torch.Tensor"]
63+
ImageItem: TypeAlias = Union[HfImageItem, "torch.Tensor", "MediaWithBytes[HfImageItem]"]
6364
"""
6465
A `transformers.image_utils.ImageInput` representing a single image
6566
item, which can be passed to a HuggingFace `ImageProcessor`.

vllm/multimodal/parse.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -484,7 +484,7 @@ def _parse_image_data(
484484
return ImageEmbeddingItems(data)
485485

486486
if (
487-
isinstance(data, PILImage.Image)
487+
isinstance(data, (PILImage.Image, MediaWithBytes))
488488
or isinstance(data, (np.ndarray, torch.Tensor))
489489
and data.ndim == 3
490490
):

0 commit comments

Comments
 (0)