File tree Expand file tree Collapse file tree 2 files changed +235
-46
lines changed Expand file tree Collapse file tree 2 files changed +235
-46
lines changed Original file line number Diff line number Diff line change @@ -564,8 +564,7 @@ def _call_hf_processor(
564
564
# Since there may be extra tokens in the feature placeholders,
565
565
# we need to pass the image token ID to the model to select the
566
566
# tokens to merge from the vision encoder outputs
567
- processed_outputs ["image_token_id" ] = [image_token_id
568
- ] * len (image_data )
567
+ processed_outputs ["image_token_id" ] = torch .tensor (image_token_id )
569
568
570
569
return processed_outputs
571
570
@@ -575,13 +574,14 @@ def _get_mm_fields_config(
575
574
hf_processor_mm_kwargs : Mapping [str , object ],
576
575
) -> Mapping [str , MultiModalFieldConfig ]:
577
576
image_num_patches = hf_inputs .get ("image_num_patches" , torch .empty (0 ))
577
+ num_images = len (image_num_patches )
578
578
579
579
return dict (
580
580
pixel_values_flat = MultiModalFieldConfig .flat_from_sizes (
581
581
"image" , image_num_patches ),
582
582
image_num_patches = MultiModalFieldConfig .batched ("image" ),
583
583
image_embeds = MultiModalFieldConfig .batched ("image" ),
584
- image_token_id = MultiModalFieldConfig .batched ("image" ),
584
+ image_token_id = MultiModalFieldConfig .shared ("image" , num_images ),
585
585
)
586
586
587
587
def _get_prompt_replacements (
You can’t perform that action at this time.
0 commit comments