Removal onnx fallback for openvino export (#1272)

eaidova · web-flow · commit 23c58c357b60 · 2025-05-02T10:45:38.000+02:00
* test removal onnx fallback

* fixes
diff --git a/optimum/exporters/openvino/convert.py b/optimum/exporters/openvino/convert.py
@@ -417,83 +417,52 @@ def export_pytorch(
 
         dummy_inputs = config.rename_ambiguous_inputs(dummy_inputs)
         dummy_inputs, dict_inputs = remove_none_from_dummy_inputs(dummy_inputs)
-
-        try:
-            # TorchScript used behind OpenVINO conversion. Optimum supports only return_dict=True models for patching,
-            # while TorchScript do not support dictionary with values of mixed types (e.g. Tensor and None) in model input/output
-            # To handle it, additional wrapper on patcher forward applied.
-            # model.config.torchscript = True can not be used for patching, because it overrides return_dict to False
-            patcher = config.patch_model_for_export(model, model_kwargs=model_kwargs)
-            patched_forward = patcher.patched_forward
-            dummy_input_keys = list(dummy_inputs.keys())
-
-            @functools.wraps(patched_forward)
-            def ts_patched_forward(*args, **kwargs):
-                ordered_example_inputs = [
-                    param for param in inspect.signature(patcher.orig_forward).parameters if param in dummy_input_keys
-                ]
-                kwargs.update(zip(ordered_example_inputs, args))
-                for i in range(len(dict_inputs)):
-                    input_name, keys = dict_inputs[i]
-                    tuple_input = kwargs[input_name]
-                    input_dict = dict(zip(keys, tuple_input))
-                    kwargs[input_name] = input_dict
-                outputs = patched_forward(**kwargs)
-                return tuple([value if not isinstance(value, list) else tuple(value) for value in outputs.values()])
-
-            patcher.patched_forward = ts_patched_forward
-
-            ts_decoder_kwargs = {}
-            model_config = getattr(model, "config", {})
-            model_type = getattr(model_config, "model_type", "").replace("_", "-")
-            if allow_skip_tracing_check(library_name, model_type):
-                ts_decoder_kwargs["trace_kwargs"] = {"check_trace": False}
-
-            with patcher:
-                if patch_16bit_model:
-                    from openvino.frontend.pytorch.patch_model import __make_16bit_traceable
-
-                    __make_16bit_traceable(model)
-                check_dummy_inputs_are_allowed(model, dummy_inputs)
-                input_info = _get_input_info(model, config, dummy_inputs)
-                ts_decoder = TorchScriptPythonDecoder(model, example_input=dummy_inputs, **ts_decoder_kwargs)
-                ov_model = convert_model(
-                    ts_decoder,
-                    example_input=dummy_inputs,
-                    input=[(item.shape, item.type) for item in input_info],
-                )
-        except Exception as ex:
-            logger.warning(f"Export model to OpenVINO directly failed with: \n{ex}.\nModel will be exported to ONNX")
-
-            if stateful:
-                # cannot raise because stateful is enabled by default and it would break backward compatibility for models that couldn't convert to OV directly
-                # TODO: Implement stateful for ONNX path as well, not doing it right now because of lack of validation
-                logger.warning(
-                    "[ WARNING ] Making stateful models is not supported when exporting to ONNX as an intermediate step. "
-                    "A stateless model will be exported instead. It may result in sub-optimal inference performance."
-                    "Provide a model that can be converted to OpenVINO without fallback to ONNX conversion path."
-                )
-
+        # TorchScript used behind OpenVINO conversion. Optimum supports only return_dict=True models for patching,
+        # while TorchScript do not support dictionary with values of mixed types (e.g. Tensor and None) in model input/output
+        # To handle it, additional wrapper on patcher forward applied.
+        # model.config.torchscript = True can not be used for patching, because it overrides return_dict to False
+        patcher = config.patch_model_for_export(model, model_kwargs=model_kwargs)
+        patched_forward = patcher.patched_forward
+        dummy_input_keys = list(dummy_inputs.keys())
+
+        @functools.wraps(patched_forward)
+        def ts_patched_forward(*args, **kwargs):
+            ordered_example_inputs = [
+                param
+                for param in inspect.signature(
+                    patcher.orig_forward if library_name != "sentence_transformers" else patcher.patched_forward
+                ).parameters
+                if param in dummy_input_keys
+            ]
+            kwargs.update(zip(ordered_example_inputs, args))
+            for i in range(len(dict_inputs)):
+                input_name, keys = dict_inputs[i]
+                tuple_input = kwargs[input_name]
+                input_dict = dict(zip(keys, tuple_input))
+                kwargs[input_name] = input_dict
+            outputs = patched_forward(**kwargs)
+            return tuple([value if not isinstance(value, list) else tuple(value) for value in outputs.values()])
+
+        patcher.patched_forward = ts_patched_forward
+
+        ts_decoder_kwargs = {}
+        model_config = getattr(model, "config", {})
+        model_type = getattr(model_config, "model_type", "").replace("_", "-")
+        if allow_skip_tracing_check(library_name, model_type):
+            ts_decoder_kwargs["trace_kwargs"] = {"check_trace": False}
+
+        with patcher:
             if patch_16bit_model:
-                from openvino.frontend.pytorch.patch_model import unpatch_model
-
-                unpatch_model(model, "_openvino_module_extension_patch_orig_forward")
-                for m in model.modules():
-                    if any(p.dtype in [torch.float16, torch.bfloat16] for p in m.parameters(False)) or any(
-                        b.dtype in [torch.float16, torch.bfloat16] for b in m.buffers(False)
-                    ):
-                        m.float()
-
-            return export_pytorch_via_onnx(
-                model,
-                config,
-                opset,
-                output,
-                device,
-                input_shapes,
-                model_kwargs,
-                ov_config=ov_config,
-                library_name=library_name,
+                from openvino.frontend.pytorch.patch_model import __make_16bit_traceable
+
+                __make_16bit_traceable(model)
+            check_dummy_inputs_are_allowed(model, dummy_inputs)
+            input_info = _get_input_info(model, config, dummy_inputs)
+            ts_decoder = TorchScriptPythonDecoder(model, example_input=dummy_inputs, **ts_decoder_kwargs)
+            ov_model = convert_model(
+                ts_decoder,
+                example_input=dummy_inputs,
+                input=[(item.shape, item.type) for item in input_info],
             )
 
         ov_model.validate_nodes_and_infer_types()  # TODO: remove as unnecessary validation?
diff --git a/optimum/exporters/openvino/utils.py b/optimum/exporters/openvino/utils.py
@@ -359,7 +359,7 @@ def set_simplified_chat_template(ov_tokenizer_model, processor_chat_template=Non
     return ov_tokenizer_model
 
 
-SKIP_CHECK_TRACE_MODELS = ("deepseek", "deepseek-v2", "deepseek-v3")
+SKIP_CHECK_TRACE_MODELS = ("deepseek", "deepseek-v2", "deepseek-v3", "levit")
 
 
 def allow_skip_tracing_check(library_name, model_type):
diff --git a/tests/openvino/test_export.py b/tests/openvino/test_export.py
@@ -321,6 +321,7 @@ def test_export_custom_model(self):
             out_features=256,
         )
         model = SentenceTransformer(modules=[word_embedding_model, pooling_model, dense_model])
+        model.to(torch.device("cpu"))
 
         with TemporaryDirectory() as tmpdirname:
             export_from_model(model, output=tmpdirname, task="feature-extraction")
diff --git a/tests/openvino/utils_tests.py b/tests/openvino/utils_tests.py
@@ -244,7 +244,7 @@ def get_num_quantized_nodes(model):
         "f8e4m3": "f8e4m3",
         "f8e5m2": "f8e5m2",
     }
-    num_weight_nodes = {n: 0 for n in types_map.values()}
+    num_weight_nodes = dict.fromkeys(types_map.values(), 0)
     ov_model = model if isinstance(model, ov.Model) else model.model
     for elem in ov_model.get_ops():
         if "FakeQuantize" in elem.name:
@@ -325,7 +325,7 @@ def check_compression_state_per_model(
     for i, (submodel, expected_num_weight_nodes) in enumerate(zip(models, expected_num_weight_nodes_per_model)):
         ov_model = submodel if isinstance(submodel, ov.Model) else submodel.model
         num_fake_nodes, num_weight_nodes = get_num_quantized_nodes(ov_model)
-        expected_num_weight_nodes.update({k: 0 for k in set(num_weight_nodes) - set(expected_num_weight_nodes)})
+        expected_num_weight_nodes.update(dict.fromkeys(set(num_weight_nodes) - set(expected_num_weight_nodes), 0))
 
         actual_num_weights_per_model[i] = num_weight_nodes
         actual_num_fake_nodes_per_model[i] = num_fake_nodes

Original file line number	Diff line number	Diff line change
`@@ -321,6 +321,7 @@ def test_export_custom_model(self):`
`321`	`321`	`out_features=256,`
`322`	`322`	`)`
`323`	`323`	`model = SentenceTransformer(modules=[word_embedding_model, pooling_model, dense_model])`
	`324`	`+ model.to(torch.device("cpu"))`
`324`	`325`
`325`	`326`	`with TemporaryDirectory() as tmpdirname:`
`326`	`327`	`export_from_model(model, output=tmpdirname, task="feature-extraction")`