Skip to content

Commit 68aa3c0

Browse files
zch42Luodiancoderabbitai[bot]
authored
[Bugfix] Add min image resolution requirement for vLLM Qwen-VL models (#737)
* Add min image resolution requirement for vLLM Qwen-VL models * more robust Qwen model detection * Update lmms_eval/models/vllm.py Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> --------- Co-authored-by: Li Bo <[email protected]> Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com>
1 parent c15ebb3 commit 68aa3c0

File tree

1 file changed

+24
-0
lines changed

1 file changed

+24
-0
lines changed

lmms_eval/models/vllm.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ def __init__(
4040
threads: int = 16, # Threads to use for decoding visuals
4141
trust_remote_code: Optional[bool] = True,
4242
chat_template: Optional[str] = None,
43+
min_image_pixels: int = 28, # minimum image dimension, required for Qwen 2/2.5-VL models
4344
**kwargs,
4445
) -> None:
4546
super().__init__()
@@ -50,6 +51,9 @@ def __init__(
5051
self.max_frame_num = max_frame_num
5152
self.threads = threads
5253
self.chat_template = chat_template
54+
self.min_image_pixels = min_image_pixels
55+
# Qwen 2/2.5-VL models enforce minimum image dimensions
56+
self._enforce_image_resize = self._is_qwen_vl_model(model_version)
5357

5458
# Convert any string arguments that start with { and end with } to dictionaries
5559
for key, value in kwargs.items():
@@ -85,13 +89,32 @@ def __init__(
8589
self.device = self.accelerator.device
8690
self.batch_size_per_gpu = int(batch_size)
8791

92+
def _is_qwen_vl_model(self, model_version: str) -> bool:
93+
qwen_vl_patterns = ["qwen2-vl", "qwen2.5-vl"]
94+
return any(pattern in model_version.lower() for pattern in qwen_vl_patterns)
95+
96+
def _maybe_resize_image(self, img: Image.Image) -> Image.Image:
97+
# edge‐case validation
98+
if self.min_image_pixels <= 0:
99+
return img
100+
if min(img.size) <= 0:
101+
raise ValueError(f"Invalid image dimensions: {img.size}")
102+
103+
if not self._enforce_image_resize or min(img.size) >= self.min_image_pixels:
104+
return img
105+
106+
scale = self.min_image_pixels / min(img.size) # maintain original aspect ratio
107+
new_size = tuple(int(dim * scale) for dim in img.size)
108+
return img.resize(new_size, Image.BICUBIC)
109+
88110
# Function to encode the image
89111
def encode_image(self, image: Union[Image.Image, str]):
90112
if isinstance(image, str):
91113
img = Image.open(image).convert("RGB")
92114
else:
93115
img = image.copy()
94116

117+
img = self._maybe_resize_image(img)
95118
output_buffer = BytesIO()
96119
img.save(output_buffer, format="PNG")
97120
byte_data = output_buffer.getvalue()
@@ -115,6 +138,7 @@ def encode_video(self, video_path):
115138
base64_frames = []
116139
for frame in frames:
117140
img = Image.fromarray(frame)
141+
img = self._maybe_resize_image(img)
118142
output_buffer = BytesIO()
119143
img.save(output_buffer, format="PNG")
120144
byte_data = output_buffer.getvalue()

0 commit comments

Comments
 (0)