@@ -184,18 +184,31 @@ def batch_process(self, img_qas):
184184 }
185185 return inputs
186186
187- def single_process (self , img_qas ):
187+ def single_process (self , img_qa ):
188188 tokenizer = AutoTokenizer .from_pretrained (self .model_path , trust_remote_code = True )
189- if img_qas ['img' ] is not None :
190- pixel_values = load_image (img_qas ['img' ], max_num = 12 ).to (
191- next (self .vlm_model .parameters ()).dtype
192- )
189+ num_patches_list = None
190+ pixel_values_list = []
191+ if img_qa ['img' ] is not None :
192+ if isinstance (img_qa ['img' ], list ):
193+ num_patches_list = []
194+ for img_idx in range (len (img_qa ['img' ])):
195+ pixel_values = load_image (img_qa ['img' ][img_idx ], max_num = 12 ).to (
196+ next (self .vlm_model .parameters ()).dtype
197+ )
198+ pixel_values_list .append (pixel_values )
199+ num_patches_list .append (pixel_values .size (0 ))
200+ pixel_values = torch .cat (pixel_values_list , dim = 0 )
201+ else :
202+ pixel_values = load_image (img_qa ['img' ], max_num = 12 ).to (
203+ next (self .vlm_model .parameters ()).dtype
204+ )
193205 else :
194206 pixel_values = None
195- question = img_qas ['question' ]
207+ question = img_qa ['question' ]
196208 if pixel_values is not None and '<image>' not in question :
197209 question = '<image>\n ' + question
198- num_patches_list = [pixel_values .shape [0 ]] if pixel_values is not None else []
210+ if num_patches_list is None :
211+ num_patches_list = [pixel_values .shape [0 ]] if pixel_values is not None else []
199212 generation_config = dict ()
200213
201214 IMG_CONTEXT_TOKEN = '<IMG_CONTEXT>'
0 commit comments