Skip to content

Commit 3336c8c

Browse files
authored
1 parent b124e10 commit 3336c8c

File tree

1 file changed

+26
-10
lines changed

1 file changed

+26
-10
lines changed

examples/offline_inference/vision_language_multi_image.py

Lines changed: 26 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -593,21 +593,21 @@ def load_qwen_vl_chat(question: str, image_urls: list[str]) -> ModelRequestData:
593593

594594
def load_qwen2_vl(question: str, image_urls: list[str]) -> ModelRequestData:
595595
try:
596-
from qwen_vl_utils import process_vision_info
596+
from qwen_vl_utils import smart_resize
597597
except ModuleNotFoundError:
598598
print(
599599
"WARNING: `qwen-vl-utils` not installed, input images will not "
600600
"be automatically resized. You can enable this functionality by "
601601
"`pip install qwen-vl-utils`."
602602
)
603-
process_vision_info = None
603+
smart_resize = None
604604

605605
model_name = "Qwen/Qwen2-VL-7B-Instruct"
606606

607607
# Tested on L40
608608
engine_args = EngineArgs(
609609
model=model_name,
610-
max_model_len=32768 if process_vision_info is None else 4096,
610+
max_model_len=32768 if smart_resize is None else 4096,
611611
max_num_seqs=5,
612612
limit_mm_per_prompt={"image": len(image_urls)},
613613
)
@@ -630,10 +630,18 @@ def load_qwen2_vl(question: str, image_urls: list[str]) -> ModelRequestData:
630630
messages, tokenize=False, add_generation_prompt=True
631631
)
632632

633-
if process_vision_info is None:
633+
if smart_resize is None:
634634
image_data = [fetch_image(url) for url in image_urls]
635635
else:
636-
image_data, _ = process_vision_info(messages)
636+
637+
def post_process_image(image: Image) -> Image:
638+
width, height = image.size
639+
resized_height, resized_width = smart_resize(
640+
height, width, max_pixels=1024 * 28 * 28
641+
)
642+
return image.resize((resized_width, resized_height))
643+
644+
image_data = [post_process_image(fetch_image(url)) for url in image_urls]
637645

638646
return ModelRequestData(
639647
engine_args=engine_args,
@@ -644,20 +652,20 @@ def load_qwen2_vl(question: str, image_urls: list[str]) -> ModelRequestData:
644652

645653
def load_qwen2_5_vl(question: str, image_urls: list[str]) -> ModelRequestData:
646654
try:
647-
from qwen_vl_utils import process_vision_info
655+
from qwen_vl_utils import smart_resize
648656
except ModuleNotFoundError:
649657
print(
650658
"WARNING: `qwen-vl-utils` not installed, input images will not "
651659
"be automatically resized. You can enable this functionality by "
652660
"`pip install qwen-vl-utils`."
653661
)
654-
process_vision_info = None
662+
smart_resize = None
655663

656664
model_name = "Qwen/Qwen2.5-VL-3B-Instruct"
657665

658666
engine_args = EngineArgs(
659667
model=model_name,
660-
max_model_len=32768 if process_vision_info is None else 4096,
668+
max_model_len=32768 if smart_resize is None else 4096,
661669
max_num_seqs=5,
662670
limit_mm_per_prompt={"image": len(image_urls)},
663671
)
@@ -680,10 +688,18 @@ def load_qwen2_5_vl(question: str, image_urls: list[str]) -> ModelRequestData:
680688
messages, tokenize=False, add_generation_prompt=True
681689
)
682690

683-
if process_vision_info is None:
691+
if smart_resize is None:
684692
image_data = [fetch_image(url) for url in image_urls]
685693
else:
686-
image_data, _ = process_vision_info(messages, return_video_kwargs=False)
694+
695+
def post_process_image(image: Image) -> Image:
696+
width, height = image.size
697+
resized_height, resized_width = smart_resize(
698+
height, width, max_pixels=1024 * 28 * 28
699+
)
700+
return image.resize((resized_width, resized_height))
701+
702+
image_data = [post_process_image(fetch_image(url)) for url in image_urls]
687703

688704
return ModelRequestData(
689705
engine_args=engine_args,

0 commit comments

Comments
 (0)