Skip to content

Commit 384dda5

Browse files
Deprecate onnx/ort model export and quantization (#795)
* deprecate onnx/ort model export and quantization * fix
1 parent 92fe39f commit 384dda5

File tree

3 files changed

+37
-15
lines changed

3 files changed

+37
-15
lines changed

optimum/intel/neural_compressor/configuration.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15+
import logging
1516
from typing import Dict, Optional, Union
1617

1718
from neural_compressor.config import DistillationConfig, WeightPruningConfig, _BaseQuantizationConfig
@@ -28,6 +29,8 @@
2829
"post_training_weight_only": "weight_only",
2930
}
3031

32+
logger = logging.getLogger(__name__)
33+
3134

3235
class INCConfig(BaseConfig):
3336
CONFIG_NAME = "inc_config.json"
@@ -49,6 +52,9 @@ def __init__(
4952
self.distillation = self._create_distillation_config(distillation) or {}
5053
self.save_onnx_model = save_onnx_model
5154

55+
if self.save_onnx_model:
56+
logger.warning("ONNX model saving is deprecated and will be removed soon.")
57+
5258
@staticmethod
5359
def _create_quantization_config(config: Union[Dict, _BaseQuantizationConfig]):
5460
# TODO : add activations_dtype and weights_dtype

optimum/intel/neural_compressor/quantization.py

Lines changed: 23 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -200,9 +200,15 @@ def quantize(
200200
use_xpu = device == torch.device("xpu") or device == "xpu"
201201
calibration_dataloader = None
202202

203+
if save_onnx_model:
204+
logger.warning("ONNX model export is deprecated and will be removed soon.")
205+
206+
if isinstance(self._original_model, ORTModel):
207+
logger.warning("ONNX model quantization is deprecated and will be removed soon.")
208+
203209
if save_onnx_model and isinstance(self._original_model, ORTModel):
210+
logger.warning("The model provided is already an ONNX model. Setting `save_onnx_model` to False.")
204211
save_onnx_model = False
205-
logger.warning("Model provided is an ONNX model, `save_onnx_model` is set to False")
206212

207213
default_name = WEIGHTS_NAME if not isinstance(self._original_model, ORTModel) else ONNX_WEIGHTS_NAME
208214
self._set_task()
@@ -223,13 +229,16 @@ def quantize(
223229
f"but only version {IPEX_MINIMUM_VERSION} or higher is supported."
224230
)
225231

226-
if save_onnx_model:
227-
if (
228-
not isinstance(quantization_config, PostTrainingQuantConfig)
229-
or INCQuantizationMode(quantization_config.approach) == INCQuantizationMode.DYNAMIC
230-
):
231-
logger.warning("ONNX export for dynamic and weight only quantized model is not supported.")
232-
save_onnx_model = False
232+
if save_onnx_model and (
233+
not isinstance(quantization_config, PostTrainingQuantConfig)
234+
or INCQuantizationMode(quantization_config.approach) == INCQuantizationMode.DYNAMIC
235+
):
236+
logger.warning(
237+
"ONNX export for dynamic and weight only quantized model is not supported. "
238+
"Only static quantization model can be exported to ONNX format. "
239+
"Setting `save_onnx_model` to False."
240+
)
241+
save_onnx_model = False
233242

234243
# ITREX Weight Only Quantization
235244
if not isinstance(quantization_config, PostTrainingQuantConfig):
@@ -296,9 +305,13 @@ def quantize(
296305
remove_unused_columns=remove_unused_columns,
297306
data_collator=data_collator,
298307
)
308+
299309
op_type_dict = getattr(quantization_config, "op_type_dict", None)
300-
if op_type_dict is None or "Embedding" not in op_type_dict:
301-
logger.warning("ONNX export is no supported for model with quantized embeddings")
310+
if save_onnx_model and (op_type_dict is None or "Embedding" not in op_type_dict):
311+
logger.warning(
312+
"ONNX export is no supported for model with quantized embeddings. "
313+
"Setting `save_onnx_model` to False."
314+
)
302315
save_onnx_model = False
303316

304317
if not isinstance(quantization_config, PostTrainingQuantConfig):

optimum/intel/neural_compressor/trainer.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,9 @@ def __init__(
175175
# TODO : To deprecate once support transformers > 4.30.0
176176
self.deepspeed = None
177177

178+
if save_onnx_model:
179+
logger.warning("ONNX model saving is deprecated and will be removed soon.")
180+
178181
# Attach dtype and architecture to the config
179182
if quantization_config is not None:
180183
self.dtype = "int8"
@@ -678,15 +681,12 @@ def _inner_training_loop(
678681
def save_model(
679682
self,
680683
output_dir: Optional[str] = None,
681-
_internal_call: bool = False,
682-
save_onnx_model: Optional[bool] = None,
684+
save_onnx_model: bool = False,
683685
):
684686
"""
685687
Will save the model, so you can reload it using `from_pretrained()`.
686688
Will only save from the main process.
687689
"""
688-
save_onnx_model = save_onnx_model if save_onnx_model is not None else self.save_onnx_model
689-
690690
if output_dir is None:
691691
output_dir = self.args.output_dir
692692

@@ -734,7 +734,10 @@ def _save(
734734

735735
# Disable ONNX export for quantized model as deprecated in neural-compressor>=2.2.0
736736
if save_onnx_model and self.dtype == "int8":
737-
logger.warning("ONNX export for quantized model is no longer supported by neural-compressor>=2.2.0. ")
737+
logger.warning(
738+
"ONNX export for quantized model is no longer supported by neural-compressor>=2.2.0. "
739+
"Setting `save_onnx_model` to False."
740+
)
738741
save_onnx_model = False
739742

740743
# Export the compressed model to the ONNX format

0 commit comments

Comments
 (0)