Skip to content

Commit 7f76b5d

Browse files
authored
Deprecate Quantizer support of nn.Module (#1421)
* Deprecate Quantizer support of nn.Module * fix style * fix * update test * fix
1 parent 7d2f197 commit 7f76b5d

File tree

2 files changed

+10
-273
lines changed

2 files changed

+10
-273
lines changed

optimum/intel/openvino/quantization.py

Lines changed: 10 additions & 147 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
import dataclasses
1717
import inspect
1818
import logging
19-
import os
2019
from collections import UserDict, deque
2120
from contextlib import contextmanager
2221
from io import BytesIO
@@ -30,30 +29,23 @@
3029
import openvino
3130
import requests
3231
import torch
33-
import transformers
3432
from huggingface_hub.constants import HUGGINGFACE_HUB_CACHE
35-
from nncf.quantization.advanced_parameters import OverflowFix
3633
from nncf.torch import register_module
3734
from nncf.torch.initialization import PTInitializingDataLoader
3835
from openvino import Core, Tensor
3936
from openvino._offline_transformations import compress_quantize_weights_transformation
4037
from PIL import Image
41-
from torch.utils._pytree import tree_map
4238
from torch.utils.data import DataLoader, RandomSampler
4339
from tqdm import tqdm
44-
from transformers import AutoProcessor, AutoTokenizer, DataCollator, PreTrainedModel, default_data_collator
40+
from transformers import AutoProcessor, AutoTokenizer, DataCollator, default_data_collator
4541
from transformers.pytorch_utils import Conv1D
4642
from transformers.utils import is_accelerate_available
4743

4844
from optimum.exporters.tasks import TasksManager
49-
from optimum.exporters.utils import check_dummy_inputs_are_allowed
5045
from optimum.intel.openvino.modeling_sam import OVSamPromptEncoder, OVSamVisionEncoder
5146
from optimum.quantization_base import OptimumQuantizer
5247
from optimum.utils.logging import warn_once
5348

54-
from ...exporters.openvino import export, export_pytorch_via_onnx
55-
from ...exporters.openvino.model_patcher import patch_model_with_bettertransformer
56-
from ...exporters.openvino.stateful import ensure_export_task_support_stateful, ensure_stateful_is_available
5749
from ..utils.constant import _TASK_ALIASES
5850
from ..utils.import_utils import (
5951
DATASETS_IMPORT_ERROR,
@@ -63,7 +55,6 @@
6355
is_nncf_version,
6456
is_sentence_transformers_available,
6557
)
66-
from ..utils.modeling_utils import get_model_device
6758
from .configuration import (
6859
OVConfig,
6960
OVMixedQuantizationConfig,
@@ -73,17 +64,13 @@
7364
OVQuantizationMethod,
7465
OVWeightQuantizationConfig,
7566
)
76-
from .modeling import OVModelForFeatureExtraction, OVModelForMaskedLM, OVModelForZeroShotImageClassification
67+
from .modeling import OVModel, OVModelForFeatureExtraction, OVModelForMaskedLM, OVModelForZeroShotImageClassification
7768
from .modeling_base import OVBaseModel
7869
from .modeling_decoder import OVBaseDecoderModel, OVModelForCausalLM
7970
from .modeling_sam import OVSamModel
8071
from .modeling_seq2seq import OVDecoder, OVEncoder, OVModelForSeq2SeqLM, _OVModelForWhisper
8172
from .modeling_visual_language import OVModelForVisualCausalLM, OVVisionEmbedding
8273
from .utils import (
83-
MAX_ONNX_OPSET,
84-
MIN_ONNX_QDQ_OPSET,
85-
ONNX_WEIGHTS_NAME,
86-
OV_XML_FILE_NAME,
8774
PREDEFINED_LANGUAGE_DATASETS,
8875
PREDEFINED_SAM_DATASETS,
8976
PREDEFINED_SD_DATASETS,
@@ -258,11 +245,11 @@ class OVCalibrationDatasetBuilder:
258245
will contain two keys: `encoder_model` and `decoder_model`.
259246
"""
260247

261-
def __init__(self, model: transformers.PreTrainedModel, seed: int = 42):
248+
def __init__(self, model: OVModel, seed: int = 42):
262249
"""
263250
264251
Args:
265-
model (`transformers.PreTrainedModel`):
252+
model (`OVModel`):
266253
The model to build calibration dataset for.
267254
seed (`int`, defaults to 42):
268255
Random seed to use for reproducibility.
@@ -1153,11 +1140,11 @@ class OVQuantizer(OptimumQuantizer):
11531140
Handle the NNCF quantization process.
11541141
"""
11551142

1156-
def __init__(self, model: transformers.PreTrainedModel, task: Optional[str] = None, seed: int = 42, **kwargs):
1143+
def __init__(self, model: OVModel, task: Optional[str] = None, seed: int = 42, **kwargs):
11571144
"""
11581145
Args:
1159-
model (`transformers.PreTrainedModel`):
1160-
The [PreTrainedModel](https://huggingface.co/docs/transformers/main_classes/model#transformers.PreTrainedModel) to quantize.
1146+
model (`OVModel`):
1147+
The [OVModel](https://huggingface.co/docs/optimum-intel/en/openvino/reference) to quantize.
11611148
task (`str`, defaults to None):
11621149
The task defining the model topology used for the ONNX export.
11631150
seed (`int`, defaults to 42):
@@ -1169,7 +1156,7 @@ def __init__(self, model: transformers.PreTrainedModel, task: Optional[str] = No
11691156
self.dataset_builder = OVCalibrationDatasetBuilder(model, seed)
11701157

11711158
@classmethod
1172-
def from_pretrained(cls, model: PreTrainedModel, **kwargs):
1159+
def from_pretrained(cls, model: OVModel, **kwargs):
11731160
# TODO : Create model
11741161
return cls(model, **kwargs)
11751162

@@ -1314,18 +1301,9 @@ def quantize(
13141301
calibration_dataset,
13151302
**kwargs,
13161303
)
1317-
13181304
elif isinstance(self.model, torch.nn.Module):
1319-
logger.warning(
1320-
"The support of `torch.nn.Module` will be deprecated in a future release of optimum-intel, please use the corresponding `OVModelForXxx` class to load you model."
1321-
"To convert a PyTorch model to OpenVINO, you can set `export=True` when loading your model as `OVModelForXxx.from_pretrained(..., export=True)`"
1322-
)
1323-
self._quantize_torchmodel(
1324-
ov_config,
1325-
save_directory,
1326-
calibration_dataset,
1327-
file_name,
1328-
**kwargs,
1305+
raise TypeError(
1306+
"The support of `torch.nn.Module` is deprecated, please use the corresponding `OVModelForXxx` class to load and export your model to the OpenVINO IR format."
13291307
)
13301308
else:
13311309
raise TypeError(f"Unsupported model type: {type(self.model)}")
@@ -1479,121 +1457,6 @@ def _quantize_ovbasemodel(
14791457
self.model.save_pretrained(save_directory)
14801458
ov_config.save_pretrained(save_directory)
14811459

1482-
def _quantize_torchmodel(
1483-
self,
1484-
ov_config: OVConfig,
1485-
save_directory: Union[str, Path],
1486-
calibration_datasets: Optional[OVCalibrationDataset] = None,
1487-
file_name: Optional[str] = None,
1488-
**kwargs,
1489-
):
1490-
if save_directory is None:
1491-
# TODO : can be set to self.model.config.name_or_path for OVModels when not provided
1492-
raise ValueError("`save_directory` needs to be specified")
1493-
1494-
self._set_task()
1495-
save_directory = Path(save_directory)
1496-
save_directory.mkdir(parents=True, exist_ok=True)
1497-
ov_file_name = file_name if file_name is not None else OV_XML_FILE_NAME
1498-
output_path = save_directory.joinpath(ov_file_name)
1499-
output_path = output_path.with_suffix(".xml").as_posix()
1500-
1501-
model_type = self.model.config.model_type
1502-
onnx_config_class = TasksManager.get_exporter_config_constructor(
1503-
exporter="openvino",
1504-
model=self.model,
1505-
task=self.task,
1506-
model_type=model_type,
1507-
)
1508-
1509-
save_onnx_model = ov_config.save_onnx_model
1510-
onnx_file_name = (
1511-
ONNX_WEIGHTS_NAME if file_name is None and save_onnx_model else Path(ov_file_name).with_suffix(".onnx")
1512-
)
1513-
1514-
task = self.task
1515-
model = self.model
1516-
self.model.config.save_pretrained(save_directory)
1517-
if task.startswith("text-generation"):
1518-
onnx_config = onnx_config_class(
1519-
model.config, use_past=model.config.use_cache, use_past_in_inputs=model.config.use_cache
1520-
)
1521-
if model.config.use_cache:
1522-
task = "text-generation-with-past"
1523-
else:
1524-
onnx_config = onnx_config_class(model.config)
1525-
1526-
stateful = ensure_stateful_is_available() and ensure_export_task_support_stateful(task)
1527-
1528-
quantization_config = ov_config.quantization_config
1529-
if isinstance(quantization_config, OVWeightQuantizationConfig):
1530-
if stateful:
1531-
# patch model before weight compression
1532-
model = patch_model_with_bettertransformer(model)
1533-
1534-
dummy_inputs = onnx_config.generate_dummy_inputs(framework="pt")
1535-
device = get_model_device(model)
1536-
dummy_inputs = tree_map(
1537-
lambda value: value.to(device) if isinstance(value, torch.Tensor) else value, dummy_inputs
1538-
)
1539-
check_dummy_inputs_are_allowed(model, dummy_inputs)
1540-
1541-
nncf.compress_weights(model, dataset=nncf.Dataset([dummy_inputs]))
1542-
else:
1543-
if not isinstance(quantization_config, OVQuantizationConfig):
1544-
raise ValueError(f"Unsupported type of quantization config: {type(quantization_config)}")
1545-
if stateful:
1546-
logger.warning(
1547-
"Quantization algorithm does not support optimized stateful models. "
1548-
"The original model without optimization will be quantized and exported."
1549-
)
1550-
stateful = False
1551-
1552-
if calibration_datasets is None:
1553-
raise ValueError("Calibration dataset is required to run quantization.")
1554-
if "model" not in calibration_datasets:
1555-
raise RuntimeError("Calibration dataset should contain a key 'model' with a dataset.")
1556-
model = nncf.quantize(
1557-
model,
1558-
calibration_datasets["model"],
1559-
subset_size=quantization_config.num_samples or 128,
1560-
ignored_scope=quantization_config.get_ignored_scope_instance(),
1561-
model_type=nncf.ModelType(quantization_config.model_type),
1562-
preset=(
1563-
nncf.QuantizationPreset.PERFORMANCE if quantization_config.sym else nncf.QuantizationPreset.MIXED
1564-
),
1565-
fast_bias_correction=quantization_config.fast_bias_correction,
1566-
advanced_parameters=nncf.AdvancedQuantizationParameters(
1567-
overflow_fix=OverflowFix(quantization_config.overflow_fix)
1568-
),
1569-
**kwargs,
1570-
)
1571-
1572-
model_path = save_directory / (onnx_file_name if save_onnx_model else ov_file_name)
1573-
onnx_path = save_directory / onnx_file_name
1574-
export_fn = export if not save_onnx_model else export_pytorch_via_onnx
1575-
opset = min(onnx_config.DEFAULT_ONNX_OPSET, MAX_ONNX_OPSET)
1576-
opset = max(opset, MIN_ONNX_QDQ_OPSET)
1577-
export_kwargs = {}
1578-
if not save_onnx_model:
1579-
export_kwargs = {"stateful": stateful}
1580-
1581-
_, _, is_onnx = export_fn(model=model, config=onnx_config, output=model_path, opset=opset, **export_kwargs)
1582-
if is_onnx:
1583-
# Load and save the compressed model
1584-
model = core.read_model(onnx_path)
1585-
# Model required second saving for appling weights compression transformations
1586-
self._save_pretrained(model, output_path)
1587-
# if onnx conversion happens as fallback for pytorch conversion, remove onnx model
1588-
if not save_onnx_model:
1589-
os.remove(onnx_path)
1590-
try:
1591-
os.remove(f"{onnx_path}_data")
1592-
except FileNotFoundError:
1593-
pass
1594-
1595-
ov_config.save_pretrained(save_directory)
1596-
15971460
@staticmethod
15981461
def _save_pretrained(model: openvino.Model, output_path: str):
15991462
compress_quantize_weights_transformation(model)

0 commit comments

Comments
 (0)