Skip to content

Commit c7d6227

Browse files
Compress VLM model components to int8_sym instead of int8_asym (#1002)
* Compress VLM model components to int8_sym instead of int8_asym * Tweak references * Update reference values
1 parent 7636733 commit c7d6227

File tree

2 files changed

+5
-5
lines changed

2 files changed

+5
-5
lines changed

optimum/intel/openvino/quantization.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -437,7 +437,7 @@ def _quantize_ovbasemodel(
437437
sub_model_names = ["vision_embeddings", "text_embeddings"] + self.model.additional_parts
438438
sub_models = [getattr(self.model, f"{name}_model") for name in sub_model_names]
439439
for sub_model in sub_models:
440-
_weight_only_quantization(sub_model, OVWeightQuantizationConfig(bits=8, sym=False))
440+
_weight_only_quantization(sub_model, OVWeightQuantizationConfig(bits=8, sym=True))
441441
self.model.clear_requests()
442442
else:
443443
_weight_only_quantization(self.model.model, quantization_config, calibration_dataset)

tests/openvino/utils_tests.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -182,10 +182,10 @@
182182
"open-clip": (20, 28),
183183
"stable-diffusion-3": (66, 42, 58, 30),
184184
"flux": (56, 24, 28, 64),
185-
"llava": (30, 18, 2),
186-
"llava_next": (30, 18, 2),
187-
"minicpmv": (30, 52, 2, 12),
188-
"nanollava": (30, 30, 2),
185+
"llava": (30, 9, 1),
186+
"llava_next": (30, 9, 1),
187+
"minicpmv": (30, 26, 1, 6),
188+
"nanollava": (30, 15, 1),
189189
}
190190

191191
TEST_IMAGE_URL = "http://images.cocodataset.org/val2017/000000039769.jpg"

0 commit comments

Comments
 (0)