1616import dataclasses
1717import inspect
1818import logging
19- import os
2019from collections import UserDict , deque
2120from contextlib import contextmanager
2221from io import BytesIO
3029import openvino
3130import requests
3231import torch
33- import transformers
3432from huggingface_hub .constants import HUGGINGFACE_HUB_CACHE
35- from nncf .quantization .advanced_parameters import OverflowFix
3633from nncf .torch import register_module
3734from nncf .torch .initialization import PTInitializingDataLoader
3835from openvino import Core , Tensor
3936from openvino ._offline_transformations import compress_quantize_weights_transformation
4037from PIL import Image
41- from torch .utils ._pytree import tree_map
4238from torch .utils .data import DataLoader , RandomSampler
4339from tqdm import tqdm
44- from transformers import AutoProcessor , AutoTokenizer , DataCollator , PreTrainedModel , default_data_collator
40+ from transformers import AutoProcessor , AutoTokenizer , DataCollator , default_data_collator
4541from transformers .pytorch_utils import Conv1D
4642from transformers .utils import is_accelerate_available
4743
4844from optimum .exporters .tasks import TasksManager
49- from optimum .exporters .utils import check_dummy_inputs_are_allowed
5045from optimum .intel .openvino .modeling_sam import OVSamPromptEncoder , OVSamVisionEncoder
5146from optimum .quantization_base import OptimumQuantizer
5247from optimum .utils .logging import warn_once
5348
54- from ...exporters .openvino import export , export_pytorch_via_onnx
55- from ...exporters .openvino .model_patcher import patch_model_with_bettertransformer
56- from ...exporters .openvino .stateful import ensure_export_task_support_stateful , ensure_stateful_is_available
5749from ..utils .constant import _TASK_ALIASES
5850from ..utils .import_utils import (
5951 DATASETS_IMPORT_ERROR ,
6355 is_nncf_version ,
6456 is_sentence_transformers_available ,
6557)
66- from ..utils .modeling_utils import get_model_device
6758from .configuration import (
6859 OVConfig ,
6960 OVMixedQuantizationConfig ,
7364 OVQuantizationMethod ,
7465 OVWeightQuantizationConfig ,
7566)
76- from .modeling import OVModelForFeatureExtraction , OVModelForMaskedLM , OVModelForZeroShotImageClassification
67+ from .modeling import OVModel , OVModelForFeatureExtraction , OVModelForMaskedLM , OVModelForZeroShotImageClassification
7768from .modeling_base import OVBaseModel
7869from .modeling_decoder import OVBaseDecoderModel , OVModelForCausalLM
7970from .modeling_sam import OVSamModel
8071from .modeling_seq2seq import OVDecoder , OVEncoder , OVModelForSeq2SeqLM , _OVModelForWhisper
8172from .modeling_visual_language import OVModelForVisualCausalLM , OVVisionEmbedding
8273from .utils import (
83- MAX_ONNX_OPSET ,
84- MIN_ONNX_QDQ_OPSET ,
85- ONNX_WEIGHTS_NAME ,
86- OV_XML_FILE_NAME ,
8774 PREDEFINED_LANGUAGE_DATASETS ,
8875 PREDEFINED_SAM_DATASETS ,
8976 PREDEFINED_SD_DATASETS ,
@@ -258,11 +245,11 @@ class OVCalibrationDatasetBuilder:
258245 will contain two keys: `encoder_model` and `decoder_model`.
259246 """
260247
261- def __init__ (self , model : transformers . PreTrainedModel , seed : int = 42 ):
248+ def __init__ (self , model : OVModel , seed : int = 42 ):
262249 """
263250
264251 Args:
265- model (`transformers.PreTrainedModel `):
252+ model (`OVModel `):
266253 The model to build calibration dataset for.
267254 seed (`int`, defaults to 42):
268255 Random seed to use for reproducibility.
@@ -1153,11 +1140,11 @@ class OVQuantizer(OptimumQuantizer):
11531140 Handle the NNCF quantization process.
11541141 """
11551142
1156- def __init__ (self , model : transformers . PreTrainedModel , task : Optional [str ] = None , seed : int = 42 , ** kwargs ):
1143+ def __init__ (self , model : OVModel , task : Optional [str ] = None , seed : int = 42 , ** kwargs ):
11571144 """
11581145 Args:
1159- model (`transformers.PreTrainedModel `):
1160- The [PreTrainedModel ](https://huggingface.co/docs/transformers/main_classes/model#transformers.PreTrainedModel ) to quantize.
1146+ model (`OVModel `):
1147+ The [OVModel ](https://huggingface.co/docs/optimum-intel/en/openvino/reference ) to quantize.
11611148 task (`str`, defaults to None):
11621149 The task defining the model topology used for the ONNX export.
11631150 seed (`int`, defaults to 42):
@@ -1169,7 +1156,7 @@ def __init__(self, model: transformers.PreTrainedModel, task: Optional[str] = No
11691156 self .dataset_builder = OVCalibrationDatasetBuilder (model , seed )
11701157
11711158 @classmethod
1172- def from_pretrained (cls , model : PreTrainedModel , ** kwargs ):
1159+ def from_pretrained (cls , model : OVModel , ** kwargs ):
11731160 # TODO : Create model
11741161 return cls (model , ** kwargs )
11751162
@@ -1314,18 +1301,9 @@ def quantize(
13141301 calibration_dataset ,
13151302 ** kwargs ,
13161303 )
1317-
13181304 elif isinstance (self .model , torch .nn .Module ):
1319- logger .warning (
1320- "The support of `torch.nn.Module` will be deprecated in a future release of optimum-intel, please use the corresponding `OVModelForXxx` class to load you model."
1321- "To convert a PyTorch model to OpenVINO, you can set `export=True` when loading your model as `OVModelForXxx.from_pretrained(..., export=True)`"
1322- )
1323- self ._quantize_torchmodel (
1324- ov_config ,
1325- save_directory ,
1326- calibration_dataset ,
1327- file_name ,
1328- ** kwargs ,
1305+ raise TypeError (
1306+ "The support of `torch.nn.Module` is deprecated, please use the corresponding `OVModelForXxx` class to load and export your model to the OpenVINO IR format."
13291307 )
13301308 else :
13311309 raise TypeError (f"Unsupported model type: { type (self .model )} " )
@@ -1479,121 +1457,6 @@ def _quantize_ovbasemodel(
14791457 self .model .save_pretrained (save_directory )
14801458 ov_config .save_pretrained (save_directory )
14811459
1482- def _quantize_torchmodel (
1483- self ,
1484- ov_config : OVConfig ,
1485- save_directory : Union [str , Path ],
1486- calibration_datasets : Optional [OVCalibrationDataset ] = None ,
1487- file_name : Optional [str ] = None ,
1488- ** kwargs ,
1489- ):
1490- if save_directory is None :
1491- # TODO : can be set to self.model.config.name_or_path for OVModels when not provided
1492- raise ValueError ("`save_directory` needs to be specified" )
1493-
1494- self ._set_task ()
1495- save_directory = Path (save_directory )
1496- save_directory .mkdir (parents = True , exist_ok = True )
1497- ov_file_name = file_name if file_name is not None else OV_XML_FILE_NAME
1498- output_path = save_directory .joinpath (ov_file_name )
1499- output_path = output_path .with_suffix (".xml" ).as_posix ()
1500-
1501- model_type = self .model .config .model_type
1502- onnx_config_class = TasksManager .get_exporter_config_constructor (
1503- exporter = "openvino" ,
1504- model = self .model ,
1505- task = self .task ,
1506- model_type = model_type ,
1507- )
1508-
1509- save_onnx_model = ov_config .save_onnx_model
1510- onnx_file_name = (
1511- ONNX_WEIGHTS_NAME if file_name is None and save_onnx_model else Path (ov_file_name ).with_suffix (".onnx" )
1512- )
1513-
1514- task = self .task
1515- model = self .model
1516- self .model .config .save_pretrained (save_directory )
1517- if task .startswith ("text-generation" ):
1518- onnx_config = onnx_config_class (
1519- model .config , use_past = model .config .use_cache , use_past_in_inputs = model .config .use_cache
1520- )
1521- if model .config .use_cache :
1522- task = "text-generation-with-past"
1523- else :
1524- onnx_config = onnx_config_class (model .config )
1525-
1526- stateful = ensure_stateful_is_available () and ensure_export_task_support_stateful (task )
1527-
1528- quantization_config = ov_config .quantization_config
1529- if isinstance (quantization_config , OVWeightQuantizationConfig ):
1530- if stateful :
1531- # patch model before weight compression
1532- model = patch_model_with_bettertransformer (model )
1533-
1534- dummy_inputs = onnx_config .generate_dummy_inputs (framework = "pt" )
1535- device = get_model_device (model )
1536- dummy_inputs = tree_map (
1537- lambda value : value .to (device ) if isinstance (value , torch .Tensor ) else value , dummy_inputs
1538- )
1539- check_dummy_inputs_are_allowed (model , dummy_inputs )
1540-
1541- nncf .compress_weights (model , dataset = nncf .Dataset ([dummy_inputs ]))
1542- else :
1543- if not isinstance (quantization_config , OVQuantizationConfig ):
1544- raise ValueError (f"Unsupported type of quantization config: { type (quantization_config )} " )
1545- if stateful :
1546- logger .warning (
1547- "Quantization algorithm does not support optimized stateful models. "
1548- "The original model without optimization will be quantized and exported."
1549- )
1550- stateful = False
1551-
1552- if calibration_datasets is None :
1553- raise ValueError ("Calibration dataset is required to run quantization." )
1554- if "model" not in calibration_datasets :
1555- raise RuntimeError ("Calibration dataset should contain a key 'model' with a dataset." )
1556- model = nncf .quantize (
1557- model ,
1558- calibration_datasets ["model" ],
1559- subset_size = quantization_config .num_samples or 128 ,
1560- ignored_scope = quantization_config .get_ignored_scope_instance (),
1561- model_type = nncf .ModelType (quantization_config .model_type ),
1562- preset = (
1563- nncf .QuantizationPreset .PERFORMANCE if quantization_config .sym else nncf .QuantizationPreset .MIXED
1564- ),
1565- fast_bias_correction = quantization_config .fast_bias_correction ,
1566- advanced_parameters = nncf .AdvancedQuantizationParameters (
1567- overflow_fix = OverflowFix (quantization_config .overflow_fix )
1568- ),
1569- ** kwargs ,
1570- )
1571-
1572- model_path = save_directory / (onnx_file_name if save_onnx_model else ov_file_name )
1573- onnx_path = save_directory / onnx_file_name
1574- export_fn = export if not save_onnx_model else export_pytorch_via_onnx
1575- opset = min (onnx_config .DEFAULT_ONNX_OPSET , MAX_ONNX_OPSET )
1576- opset = max (opset , MIN_ONNX_QDQ_OPSET )
1577- export_kwargs = {}
1578- if not save_onnx_model :
1579- export_kwargs = {"stateful" : stateful }
1580-
1581- _ , _ , is_onnx = export_fn (model = model , config = onnx_config , output = model_path , opset = opset , ** export_kwargs )
1582- if is_onnx :
1583- # Load and save the compressed model
1584- model = core .read_model (onnx_path )
1585- # Model required second saving for appling weights compression transformations
1586- self ._save_pretrained (model , output_path )
1587- # if onnx conversion happens as fallback for pytorch conversion, remove onnx model
1588- if not save_onnx_model :
1589- os .remove (onnx_path )
1590- try :
1591- os .remove (f"{ onnx_path } _data" )
1592- except FileNotFoundError :
1593- pass
1594-
1595- ov_config .save_pretrained (save_directory )
1596-
15971460 @staticmethod
15981461 def _save_pretrained (model : openvino .Model , output_path : str ):
15991462 compress_quantize_weights_transformation (model )
0 commit comments