66except Exception :
77 logger .warning (
88 'Can not import Qwen2VLForConditionalGeneration. '
9- 'Please upgrade transformers.'
9+ 'If you need it, please upgrade transformers.'
10+ )
11+
12+ try :
13+ from qwen_vl_utils import process_vision_info
14+ except Exception :
15+ logger .warning (
16+ 'Can not import qwen_vl_utils. '
17+ 'If you need it, please pip install qwen-vl-utils'
1018 )
1119
1220from llmc .utils .registry_factory import MODEL_REGISTRY
@@ -40,24 +48,33 @@ def build_model(self):
4048 self .model = self .vlm_model
4149 self .model_config = self .vlm_model_config
4250
51+ self .min_pixels = 256 * 28 * 28
52+ self .max_pixels = 1280 * 28 * 28
53+ logger .warning (f'min_pixels is set to: { self .min_pixels } ' )
54+ logger .warning (f'max_pixels is set to: { self .max_pixels } ' )
55+ logger .warning ('You can refer the link https://huggingface.co/Qwen/Qwen2-VL-2B-Instruct '
56+ 'to get more info of image Resolution for performance boost.' )
57+ self .processor = AutoProcessor .from_pretrained (
58+ self .model_path ,
59+ min_pixels = self .min_pixels ,
60+ max_pixels = self .max_pixels
61+ )
62+
4363 def batch_process (self , img_qas ):
44- from qwen_vl_utils import process_vision_info
45- processor = AutoProcessor .from_pretrained (self .model_path )
4664 messages = []
4765 for idx in range (len (img_qas )):
4866 img_path = img_qas [idx ]['img' ]
4967 if img_path is not None :
68+ content = []
69+ if not isinstance (img_path , list ):
70+ img_path = [img_path ]
71+ for img_idx in range (len (img_path )):
72+ content .append ({'type' : 'image' , 'image' : img_path [img_idx ]})
73+ content .append ({'type' : 'text' , 'text' : img_qas [idx ]['question' ]})
5074 message = [
5175 {
5276 'role' : 'user' ,
53- 'content' : [
54- {
55- 'type' : 'image' , 'image' : img_path ,
56- 'resized_height' : 280 , 'resized_width' : 420
57- # default: original resolution
58- },
59- {'type' : 'text' , 'text' : img_qas [idx ]['question' ]}
60- ]
77+ 'content' : content
6178 }
6279 ]
6380 else :
@@ -71,11 +88,11 @@ def batch_process(self, img_qas):
7188 ]
7289 messages .append (message )
7390 texts = [
74- processor .apply_chat_template (msg , tokenize = False , add_generation_prompt = True )
91+ self . processor .apply_chat_template (msg , tokenize = False , add_generation_prompt = True )
7592 for msg in messages
7693 ]
7794 image_inputs , video_inputs = process_vision_info (messages )
78- inputs = processor (
95+ inputs = self . processor (
7996 text = texts ,
8097 images = image_inputs ,
8198 videos = video_inputs ,
0 commit comments