Skip to content

Commit de19916

Browse files
[Bugfix] Convert image to RGB by default (#6430)
1 parent 69672f1 commit de19916

File tree

1 file changed

+30
-10
lines changed

1 file changed

+30
-10
lines changed

vllm/multimodal/utils.py

Lines changed: 30 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,12 @@ def _load_image_from_data_url(image_url: str):
3535
return load_image_from_base64(image_base64)
3636

3737

38-
def fetch_image(image_url: str) -> Image.Image:
39-
"""Load PIL image from a url or base64 encoded openai GPT4V format"""
38+
def fetch_image(image_url: str, *, image_mode: str = "RGB") -> Image.Image:
39+
"""
40+
Load a PIL image from a HTTP or base64 data URL.
41+
42+
By default, the image is converted into RGB format.
43+
"""
4044
if image_url.startswith('http'):
4145
_validate_remote_url(image_url, name="image_url")
4246

@@ -53,7 +57,7 @@ def fetch_image(image_url: str) -> Image.Image:
5357
raise ValueError("Invalid 'image_url': A valid 'image_url' must start "
5458
"with either 'data:image' or 'http'.")
5559

56-
return image
60+
return image.convert(image_mode)
5761

5862

5963
class ImageFetchAiohttp:
@@ -70,8 +74,17 @@ def get_aiohttp_client(cls) -> aiohttp.ClientSession:
7074
return cls.aiohttp_client
7175

7276
@classmethod
73-
async def fetch_image(cls, image_url: str) -> Image.Image:
74-
"""Load PIL image from a url or base64 encoded openai GPT4V format"""
77+
async def fetch_image(
78+
cls,
79+
image_url: str,
80+
*,
81+
image_mode: str = "RGB",
82+
) -> Image.Image:
83+
"""
84+
Asynchronously load a PIL image from a HTTP or base64 data URL.
85+
86+
By default, the image is converted into RGB format.
87+
"""
7588

7689
if image_url.startswith('http'):
7790
_validate_remote_url(image_url, name="image_url")
@@ -91,20 +104,27 @@ async def fetch_image(cls, image_url: str) -> Image.Image:
91104
"Invalid 'image_url': A valid 'image_url' must start "
92105
"with either 'data:image' or 'http'.")
93106

94-
return image
107+
return image.convert(image_mode)
95108

96109

97110
async def async_get_and_parse_image(image_url: str) -> MultiModalDataDict:
98111
image = await ImageFetchAiohttp.fetch_image(image_url)
99112
return {"image": image}
100113

101114

102-
def encode_image_base64(image: Image.Image, format: str = 'JPEG') -> str:
103-
"""Encode a pillow image to base64 format."""
115+
def encode_image_base64(
116+
image: Image.Image,
117+
*,
118+
image_mode: str = "RGB",
119+
format: str = "JPEG",
120+
) -> str:
121+
"""
122+
Encode a pillow image to base64 format.
104123
124+
By default, the image is converted into RGB format before being encoded.
125+
"""
105126
buffered = BytesIO()
106-
if format == 'JPEG':
107-
image = image.convert('RGB')
127+
image = image.convert(image_mode)
108128
image.save(buffered, format)
109129
return base64.b64encode(buffered.getvalue()).decode('utf-8')
110130

0 commit comments

Comments
 (0)