Remove deprecated arguments (#817)

echarlaix · web-flow · commit 07ada341f8d7 · 2024-07-15T10:48:06.000+02:00
diff --git a/optimum/commands/export/openvino.py b/optimum/commands/export/openvino.py
@@ -189,14 +189,6 @@ def parse_args_openvino(parser: "ArgumentParser"):
         action="store_true",
         help="Do not add converted tokenizer and detokenizer OpenVINO models.",
     )
-    # TODO : deprecated
-    optional_group.add_argument("--fp16", action="store_true", help="Compress weights to fp16")
-    optional_group.add_argument("--int8", action="store_true", help="Compress weights to int8")
-    optional_group.add_argument(
-        "--convert-tokenizer",
-        action="store_true",
-        help="[Deprecated] Add converted tokenizer and detokenizer with OpenVINO Tokenizers.",
-    )
 
 
 class OVExportCommand(BaseOptimumCLICommand):
@@ -243,17 +235,6 @@ def _get_default_int4_config(model_id_or_path, library_name):
             )
             library_name = "transformers"
 
-        if self.args.fp16:
-            logger.warning(
-                "`--fp16` option is deprecated and will be removed in a future version. Use `--weight-format` instead."
-            )
-            self.args.weight_format = "fp16"
-        if self.args.int8:
-            logger.warning(
-                "`--int8` option is deprecated and will be removed in a future version. Use `--weight-format` instead."
-            )
-            self.args.weight_format = "int8"
-
         if self.args.weight_format is None:
             ov_config = None
         elif self.args.weight_format in {"fp16", "fp32"}:
@@ -296,9 +277,6 @@ def _get_default_int4_config(model_id_or_path, library_name):
                 quantization_config["group_size"] = 128 if "128" in self.args.weight_format else 64
             ov_config = OVConfig(quantization_config=quantization_config)
 
-        if self.args.convert_tokenizer:
-            logger.warning("`--convert-tokenizer` option is deprecated. Tokenizer will be converted by default.")
-
         quantization_config = ov_config.quantization_config if ov_config else None
         quantize_with_dataset = quantization_config and getattr(quantization_config, "dataset", None) is not None
         task = infer_task(self.args.task, self.args.model)
diff --git a/optimum/intel/openvino/quantization.py b/optimum/intel/openvino/quantization.py
@@ -193,11 +193,6 @@ def __init__(self, model: transformers.PreTrainedModel, task: Optional[str] = No
         signature = inspect.signature(self.model.forward)
         self._signature_columns = list(signature.parameters.keys())
 
-    @property
-    def input_names(self):
-        logger.warning("The`input_names` attribute is deprecated and will be removed in v1.18.0")
-        return None
-
     @classmethod
     def from_pretrained(cls, model: PreTrainedModel, **kwargs):
         # TODO : Create model
@@ -212,7 +207,6 @@ def quantize(
         batch_size: int = 1,
         data_collator: Optional[DataCollator] = None,
         remove_unused_columns: bool = True,
-        weights_only: bool = None,
         **kwargs,
     ):
         """
@@ -235,10 +229,6 @@ def quantize(
                 The function to use to form a batch from a list of elements of the calibration dataset.
             remove_unused_columns (`bool`, defaults to `True`):
                 Whether to remove the columns unused by the model forward method.
-            weights_only (`bool`, *optional*):
-                Being deprecated.
-                Compress weights to integer precision (8-bit by default) while keeping activations
-                floating-point. Fits best for LLM footprint reduction and performance acceleration.
 
         Examples:
         ```python
@@ -263,32 +253,20 @@ def quantize(
         >>> optimized_model = OVModelForSequenceClassification.from_pretrained("./quantized_model")
         ```
         """
-        # TODO: deprecate weights_only argument
-        if weights_only is not None:
-            logger.warning(
-                "`weights_only` argument is deprecated and will be removed in v1.18.0. In the future please provide `ov_config.quantization_config` "
-                "as an instance of `OVWeightQuantizationConfig` for weight-only compression or as an instance of `OVQuantizationConfig` for full model quantization."
-            )
-
         if ov_config is None:
             ov_config = OVConfig()
         if not isinstance(ov_config, OVConfig):
             raise TypeError(f"`ov_config` should be an `OVConfig`, but got: {type(ov_config)} instead.")
         quantization_config = ov_config.quantization_config
         if quantization_config is None:
-            if (weights_only is None or weights_only is True) and calibration_dataset is None:
-                if weights_only is None:
-                    logger.info(
-                        "`quantization_config` was not provided, 8-bit asymmetric weight quantization will be applied."
-                    )
-                ov_config.quantization_config = OVWeightQuantizationConfig(bits=8)
-            else:
-                logger.warning(
-                    "`quantization_config` was not provided, but calibration dataset was provided, assuming full "
-                    "model quantization is intended. In the future, please provide `quantization_config` as an "
-                    "instance of OVQuantizationConfig."
-                )
-                ov_config.quantization_config = OVQuantizationConfig()
+            logger.warning(
+                "`quantization_config` was not provided. In the future, please provide `quantization_config`"
+            )
+            ov_config.quantization_config = (
+                OVWeightQuantizationConfig(bits=8)
+                if calibration_dataset is None
+                else OVWeightQuantizationConfig(bits=8)
+            )
 
         if isinstance(self.model, OVBaseModel):
             self._quantize_ovbasemodel(