Added a fix for FP16 overflow issue on GPU/NPU (#994)

AlexKoff88 · web-flow · commit b3cbc951a87c · 2024-11-11T19:52:56.000+04:00
* Added a fix for FP16 overflow issue on GPU/NPU

* Style

* Updated export test

* Style
diff --git a/optimum/exporters/openvino/convert.py b/optimum/exporters/openvino/convert.py
@@ -91,9 +91,31 @@
     from optimum.intel.openvino.configuration import OVConfig
 
 
-def _save_model(model, path: str, ov_config: Optional["OVConfig"] = None, library_name: Optional[str] = None):
+def _set_runtime_options(
+    models_and_export_configs: Dict[
+        str,
+        Tuple[Union["PreTrainedModel", "TFPreTrainedModel", "ModelMixin", "DiffusionPipeline"], "OnnxConfig"],
+    ],
+    task: str,
+):
+    for model_name in models_and_export_configs.keys():
+        _, sub_export_config = models_and_export_configs[model_name]
+        if "vae_" in model_name or "text-generation" in task:
+            sub_export_config.runtime_options = {"ACTIVATIONS_SCALE_FACTOR": "8.0"}
+
+
+def _save_model(
+    model,
+    path: str,
+    ov_config: Optional["OVConfig"] = None,
+    library_name: Optional[str] = None,
+    config: OnnxConfig = None,
+):
     compress_to_fp16 = ov_config is not None and ov_config.dtype == "fp16"
     model = _add_version_info_to_model(model, library_name)
+
+    if hasattr(config, "runtime_options"):
+        model = _add_runtime_options_to_rt_info(model, config.runtime_options)
     save_model(model, path, compress_to_fp16)
 
 
@@ -213,6 +235,7 @@ def export_tensorflow(
         output.parent / output,
         ov_config=ov_config,
         library_name=library_name,
+        config=config,
     )
     del ov_model
     return input_names, output_names, True
@@ -276,6 +299,7 @@ def export_pytorch_via_onnx(
         output.parent / OV_XML_FILE_NAME if output.suffix != ".xml" else output,
         ov_config=ov_config,
         library_name=library_name,
+        config=config,
     )
     del ov_model
     return input_names, output_names, True
@@ -450,6 +474,7 @@ def ts_patched_forward(*args, **kwargs):
             output,
             ov_config=ov_config,
             library_name=library_name,
+            config=config,
         )
         clear_class_registry()
         del ov_model
@@ -718,6 +743,8 @@ def export_from_model(
 
         model.save_config(output)
 
+    _set_runtime_options(models_and_export_configs, task)
+
     export_models(
         models_and_export_configs=models_and_export_configs,
         output_dir=output,
@@ -792,6 +819,19 @@ def export_tokenizer(
         save_model(model, output / file_name.format(suffix))
 
 
+def _add_runtime_options_to_rt_info(model: Model, options: Dict):
+    """
+    Add runtime optinos
+    """
+    try:
+        for name, value in options.items():
+            model.set_rt_info(value, ["runtime_options", name])
+    except Exception:
+        pass
+
+    return model
+
+
 def _add_version_info_to_model(model: Model, library_name: Optional[str] = None):
     """
     Add dependency versions to OpenVINO model
diff --git a/tests/openvino/test_export.py b/tests/openvino/test_export.py
@@ -124,6 +124,15 @@ def _openvino_export(
                     self.assertEqual(
                         ov_model.model.get_rt_info()["optimum"]["transformers_version"], _transformers_version
                     )
+                    self.assertTrue(ov_model.model.has_rt_info(["runtime_options", "ACTIVATIONS_SCALE_FACTOR"]))
+
+                if library_name == "diffusers":
+                    self.assertTrue(
+                        ov_model.vae_encoder.model.has_rt_info(["runtime_options", "ACTIVATIONS_SCALE_FACTOR"])
+                    )
+                    self.assertTrue(
+                        ov_model.vae_decoder.model.has_rt_info(["runtime_options", "ACTIVATIONS_SCALE_FACTOR"])
+                    )
 
     @parameterized.expand(SUPPORTED_ARCHITECTURES)
     def test_export(self, model_type: str):