Skip to content

Commit 69311c0

Browse files
Fix vlm calibration dataset collection (#1321)
1 parent 7fec748 commit 69311c0

File tree

3 files changed

+32
-7
lines changed

3 files changed

+32
-7
lines changed

optimum/intel/openvino/quantization.py

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -705,13 +705,7 @@ def _prepare_visual_causal_lm_calibration_data(
705705
raise tokenizer_error
706706
raise value_error
707707

708-
input_ids = inputs.get("input_ids")
709-
position_ids = torch.arange(input_ids.size(1)).unsqueeze(0).to(input_ids.device)
710-
711-
inputs_embeds, attention_mask, position_ids = self.model.get_multimodal_embeddings(
712-
**inputs,
713-
position_ids=position_ids,
714-
)
708+
inputs_embeds, attention_mask, position_ids = self.model.get_multimodal_embeddings(**inputs)
715709

716710
language_model_inputs = self.model.language_model.prepare_inputs(
717711
input_ids=None,

tests/openvino/test_exporters_cli.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -638,6 +638,18 @@ class OVCLIExportTestCase(unittest.TestCase):
638638
"audio_speech_projection_model": {"int8": 2},
639639
},
640640
),
641+
(
642+
"image-text-to-text",
643+
"qwen2_5_vl",
644+
'int4 --group-size 16 --ratio 0.8 --sensitivity-metric "mean_activation_magnitude" '
645+
"--dataset contextual --num-samples 1 --trust-remote-code",
646+
{
647+
"lm_model": {"int8": 14, "int4": 16},
648+
"text_embeddings_model": {"int8": 1},
649+
"vision_embeddings_model": {"int8": 1},
650+
"vision_embeddings_merger_model": {"int8": 12},
651+
},
652+
),
641653
]
642654
)
643655

tests/openvino/test_quantization.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -998,6 +998,25 @@ class OVWeightCompressionTest(unittest.TestCase):
998998
"audio_speech_projection_model": {"int8": 2},
999999
},
10001000
),
1001+
(
1002+
OVModelForVisualCausalLM,
1003+
"qwen2_5_vl",
1004+
False,
1005+
dict(
1006+
bits=4,
1007+
group_size=16,
1008+
dataset="contextual",
1009+
ratio=0.8,
1010+
sensitivity_metric="mean_activation_magnitude",
1011+
num_samples=1,
1012+
),
1013+
{
1014+
"lm_model": {"int8": 14, "int4": 16},
1015+
"text_embeddings_model": {"int8": 1},
1016+
"vision_embeddings_model": {"int8": 1},
1017+
"vision_embeddings_merger_model": {"int8": 12},
1018+
},
1019+
),
10011020
]
10021021
)
10031022

0 commit comments

Comments
 (0)