@@ -695,6 +695,8 @@ def _prepare_visual_causal_lm_calibration_data(
695695 Prepares calibration data for VLM pipelines.
696696 Currently, collects data only for a language model component.
697697 """
698+ from optimum .intel .openvino .modeling_visual_language import OVVisionEmbedding
699+
698700 processor = AutoProcessor .from_pretrained (config .processor , trust_remote_code = config .trust_remote_code )
699701 try :
700702 tokenizer = AutoTokenizer .from_pretrained (config .tokenizer , trust_remote_code = config .trust_remote_code )
@@ -704,43 +706,65 @@ def _prepare_visual_causal_lm_calibration_data(
704706
705707 dataset_metadata = PREDEFINED_VISUAL_LM_DATASETS [config .dataset ]
706708
707- calibration_data = []
708- num_samples = config .num_samples or 32
709- for item in tqdm (dataset , desc = "Collecting calibration dataset" , total = num_samples ):
710- if len (calibration_data ) > num_samples :
711- break
712-
713- instruction = item [dataset_metadata ["inputs" ]["instruction" ]]
714- image_url = item [dataset_metadata ["inputs" ]["image_url" ]]
715- image = Image .open (requests .get (image_url , stream = True ).raw ).convert ("RGB" )
716- if max_image_size is not None :
717- # To avoid large images, resize them keeping the aspect ratio
718- scale_factor = max (image .size [0 ] / max_image_size , image .size [1 ] / max_image_size )
719- if scale_factor > 1 :
720- new_size = (int (image .size [0 ] / scale_factor ), int (image .size [1 ] / scale_factor ))
721- image = image .resize (new_size )
722-
723- try :
724- inputs = self .model .preprocess_inputs (
725- text = instruction , image = image , processor = processor , tokenizer = tokenizer , config = self .model .config
709+ collected_inputs : Dict [str , List [Dict [str , Any ]]] = {"lm_model" : []}
710+ # Collect vision embeddings calibration data by using InferRequestWrapper
711+ vision_embedding_components = []
712+ for ov_component_name , ov_component in self .model .components .items ():
713+ if not isinstance (ov_component , OVVisionEmbedding ):
714+ continue
715+ vision_embedding_components .append (ov_component )
716+ submodel_name = f"{ ov_component_name } _model"
717+ collected_inputs [submodel_name ] = []
718+ ov_component ._compile ()
719+ ov_component .request = InferRequestWrapper (ov_component .request , collected_inputs [submodel_name ])
720+
721+ try :
722+ num_samples = config .num_samples or 32
723+ for item in tqdm (dataset , desc = "Collecting calibration dataset" , total = num_samples ):
724+ if len (collected_inputs ["lm_model" ]) >= num_samples :
725+ break
726+
727+ instruction = item [dataset_metadata ["inputs" ]["instruction" ]]
728+ image_url = item [dataset_metadata ["inputs" ]["image_url" ]]
729+ image = Image .open (requests .get (image_url , stream = True ).raw ).convert ("RGB" )
730+ if max_image_size is not None :
731+ # To avoid large images, resize them keeping the aspect ratio
732+ scale_factor = max (image .size [0 ] / max_image_size , image .size [1 ] / max_image_size )
733+ if scale_factor > 1 :
734+ new_size = (int (image .size [0 ] / scale_factor ), int (image .size [1 ] / scale_factor ))
735+ image = image .resize (new_size )
736+
737+ try :
738+ inputs = self .model .preprocess_inputs (
739+ text = instruction ,
740+ image = image ,
741+ processor = processor ,
742+ tokenizer = tokenizer ,
743+ config = self .model .config ,
744+ )
745+ except ValueError as value_error :
746+ if "Tokenizer is required." in str (value_error ) and tokenizer_error is not None :
747+ raise tokenizer_error
748+ raise value_error
749+
750+ inputs_embeds , attention_mask , position_ids = self .model .get_multimodal_embeddings (** inputs )
751+
752+ language_model_inputs = self .model .language_model .prepare_inputs (
753+ input_ids = None ,
754+ attention_mask = attention_mask ,
755+ position_ids = position_ids ,
756+ inputs_embeds = inputs_embeds ,
726757 )
727- except ValueError as value_error :
728- if "Tokenizer is required." in str (value_error ) and tokenizer_error is not None :
729- raise tokenizer_error
730- raise value_error
731-
732- inputs_embeds , attention_mask , position_ids = self .model .get_multimodal_embeddings (** inputs )
733-
734- language_model_inputs = self .model .language_model .prepare_inputs (
735- input_ids = None ,
736- attention_mask = attention_mask ,
737- position_ids = position_ids ,
738- inputs_embeds = inputs_embeds ,
739- )
740758
741- calibration_data .append (language_model_inputs )
759+ collected_inputs ["lm_model" ].append (language_model_inputs )
760+ finally :
761+ for ov_component in vision_embedding_components :
762+ ov_component .request = ov_component .request .request
763+
764+ for k in collected_inputs :
765+ collected_inputs [k ] = nncf .Dataset (collected_inputs [k ])
742766
743- return OVCalibrationDataset ({ "lm_model" : nncf . Dataset ( calibration_data )} )
767+ return OVCalibrationDataset (collected_inputs )
744768
745769 def _prepare_speech_to_text_calibration_data (
746770 self , config : OVQuantizationConfigBase , dataset : "Dataset"
@@ -1285,7 +1309,7 @@ def _quantize_ovbasemodel(
12851309 ** kwargs ,
12861310 ):
12871311 from optimum .intel .openvino .modeling_seq2seq import _OVModelForWhisper
1288- from optimum .intel .openvino .modeling_visual_language import OVModelForVisualCausalLM
1312+ from optimum .intel .openvino .modeling_visual_language import OVModelForVisualCausalLM , OVVisionEmbedding
12891313
12901314 if is_diffusers_available ():
12911315 from optimum .intel .openvino .modeling_diffusion import OVDiffusionPipeline
@@ -1295,25 +1319,19 @@ def _quantize_ovbasemodel(
12951319 calibration_dataset = self .dataset_builder .build_from_quantization_config (quantization_config )
12961320
12971321 quantization_configs = {}
1298- if isinstance (quantization_config , OVPipelineQuantizationConfig ):
1299- quantization_configs = quantization_config .quantization_configs
1300- elif (
1322+ default_config = None
1323+ if (
13011324 isinstance (quantization_config , OVWeightQuantizationConfig )
13021325 and quantization_config .quant_method != OVQuantizationMethod .HYBRID
13031326 ):
13041327 #
13051328 # Regular (non-hybrid) weight-only quantization
13061329 #
13071330 if isinstance (self .model , OVModelForVisualCausalLM ):
1308- for submodel_name in self .model .ov_submodels :
1309- quantization_configs [submodel_name ] = (
1310- quantization_config
1311- if submodel_name == "lm_model"
1312- else OVWeightQuantizationConfig (bits = 8 , sym = True )
1313- )
1331+ quantization_configs ["lm_model" ] = quantization_config
1332+ default_config = OVWeightQuantizationConfig (bits = 8 , sym = True )
13141333 else :
1315- for submodel_name in self .model .ov_submodels :
1316- quantization_configs [submodel_name ] = quantization_config
1334+ default_config = quantization_config
13171335 else :
13181336 #
13191337 # Hybrid/Full/Mixed quantization
@@ -1344,9 +1362,7 @@ def _quantize_ovbasemodel(
13441362 quantization_config_copy = quantization_config .clone ()
13451363 quantization_config_copy .dataset = None
13461364 quantization_config_copy .quant_method = OVQuantizationMethod .DEFAULT
1347- for submodel_name in self .model .ov_submodels :
1348- if submodel_name != diffusion_model_name :
1349- quantization_configs [submodel_name ] = quantization_config_copy
1365+ default_config = quantization_config_copy
13501366 else :
13511367 # The model may be for example OVModelForImageClassification, OVModelForAudioClassification, etc.
13521368 quantization_configs ["model" ] = quantization_config
@@ -1363,44 +1379,52 @@ def _quantize_ovbasemodel(
13631379 elif is_diffusers_available () and isinstance (self .model , OVDiffusionPipeline ):
13641380 diffusion_model_name = next (iter (calibration_dataset ))
13651381 quantization_configs [diffusion_model_name ] = quantization_config
1366- for submodel_name in self .model .ov_submodels :
1367- if submodel_name != diffusion_model_name :
1368- quantization_configs [submodel_name ] = OVWeightQuantizationConfig (bits = 8 )
1382+ default_config = OVWeightQuantizationConfig (bits = 8 )
13691383 elif isinstance (self .model , OVModelForVisualCausalLM ):
1370- for submodel_name in self .model .ov_submodels :
1371- quantization_configs [submodel_name ] = (
1372- quantization_config
1373- if submodel_name == "lm_model"
1374- else OVWeightQuantizationConfig (bits = 8 , sym = True )
1375- )
1376- else :
1377- for submodel_name in self .model .ov_submodels :
1384+ quantization_configs ["lm_model" ] = quantization_config
1385+ vision_embedding_submodel_names = [
1386+ f"{ name } _model"
1387+ for name , component in self .model .components .items ()
1388+ if isinstance (component , OVVisionEmbedding )
1389+ ]
1390+ for submodel_name in vision_embedding_submodel_names :
13781391 quantization_configs [submodel_name ] = quantization_config
1392+ default_config = OVWeightQuantizationConfig (bits = 8 , sym = True )
1393+ else :
1394+ default_config = quantization_config
13791395 elif isinstance (quantization_config , OVMixedQuantizationConfig ):
13801396 #
13811397 # Mixed quantization
13821398 #
13831399 if is_diffusers_available () and isinstance (self .model , OVDiffusionPipeline ):
13841400 raise NotImplementedError ("Mixed precision quantization isn't supported for diffusers." )
13851401
1386- for submodel_name in self .model .ov_submodels :
1387- quantization_configs [submodel_name ] = quantization_config
1388- else :
1402+ default_config = quantization_config
1403+ elif not isinstance (quantization_config , OVPipelineQuantizationConfig ):
13891404 raise ValueError (f"Unsupported type of quantization config: { type (quantization_config )} " )
13901405
1391- for submodel_name , config in quantization_configs .items ():
1392- if submodel_name not in self .model .ov_submodels :
1393- raise RuntimeError (
1394- f"Unexpected submodel name encountered during applying quantization: { submodel_name } . "
1395- f"Available submodels: { list (self .model .ov_submodels .keys ())} ."
1396- )
1406+ pipeline_quantization_config = (
1407+ quantization_config
1408+ if isinstance (quantization_config , OVPipelineQuantizationConfig )
1409+ else OVPipelineQuantizationConfig (quantization_configs , default_config = default_config )
1410+ )
1411+
1412+ for submodel_name in self .model .ov_submodels :
1413+ config = pipeline_quantization_config .quantization_configs .get (
1414+ submodel_name , pipeline_quantization_config .default_config
1415+ )
1416+ if config is None :
1417+ continue
13971418 submodel = self .model .ov_submodels [submodel_name ]
13981419 nncf_dataset = calibration_dataset .get (submodel_name , None ) if calibration_dataset else None
13991420
14001421 if isinstance (config , OVWeightQuantizationConfig ) and config .quant_method == OVQuantizationMethod .HYBRID :
14011422 config = _get_hybrid_mixed_quantization_config (submodel , config , ** kwargs )
14021423
14031424 if isinstance (config , OVWeightQuantizationConfig ):
1425+ if config .bits == 8 :
1426+ # 8-bit weight only data-aware quantization is not supported
1427+ nncf_dataset = None
14041428 # Weight only quantization is performed in-place
14051429 _weight_only_quantization (submodel , config , nncf_dataset , ** kwargs )
14061430 elif isinstance (config , (OVQuantizationConfig , OVMixedQuantizationConfig )):
0 commit comments