Skip to content

Commit c0a6eec

Browse files
[OV] Resize large images during VLM calibration data collection (#1322)
* Resize large images during VLM calibration data collection * Add argument for max_image_size
1 parent 69311c0 commit c0a6eec

File tree

1 file changed

+10
-1
lines changed

1 file changed

+10
-1
lines changed

optimum/intel/openvino/quantization.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -674,7 +674,10 @@ def _prepare_causal_lm_calibration_data(
674674
return OVCalibrationDataset(calibration_dataset)
675675

676676
def _prepare_visual_causal_lm_calibration_data(
677-
self, config: OVQuantizationConfigBase, dataset: "Dataset"
677+
self,
678+
config: OVQuantizationConfigBase,
679+
dataset: "Dataset",
680+
max_image_size: Optional[int] = 600,
678681
) -> OVCalibrationDataset:
679682
"""
680683
Prepares calibration data for VLM pipelines.
@@ -695,6 +698,12 @@ def _prepare_visual_causal_lm_calibration_data(
695698
instruction = item[dataset_metadata["inputs"]["instruction"]]
696699
image_url = item[dataset_metadata["inputs"]["image_url"]]
697700
image = Image.open(requests.get(image_url, stream=True).raw).convert("RGB")
701+
if max_image_size is not None:
702+
# To avoid large images, resize them keeping the aspect ratio
703+
scale_factor = max(image.size[0] / max_image_size, image.size[1] / max_image_size)
704+
if scale_factor > 1:
705+
new_size = (int(image.size[0] / scale_factor), int(image.size[1] / scale_factor))
706+
image = image.resize(new_size)
698707

699708
try:
700709
inputs = self.model.preprocess_inputs(

0 commit comments

Comments
 (0)