Skip to content

Commit 23c58c3

Browse files
authored
Removal onnx fallback for openvino export (#1272)
* test removal onnx fallback * fixes
1 parent 28e49b5 commit 23c58c3

File tree

4 files changed

+49
-79
lines changed

4 files changed

+49
-79
lines changed

optimum/exporters/openvino/convert.py

Lines changed: 45 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -417,83 +417,52 @@ def export_pytorch(
417417

418418
dummy_inputs = config.rename_ambiguous_inputs(dummy_inputs)
419419
dummy_inputs, dict_inputs = remove_none_from_dummy_inputs(dummy_inputs)
420-
421-
try:
422-
# TorchScript used behind OpenVINO conversion. Optimum supports only return_dict=True models for patching,
423-
# while TorchScript do not support dictionary with values of mixed types (e.g. Tensor and None) in model input/output
424-
# To handle it, additional wrapper on patcher forward applied.
425-
# model.config.torchscript = True can not be used for patching, because it overrides return_dict to False
426-
patcher = config.patch_model_for_export(model, model_kwargs=model_kwargs)
427-
patched_forward = patcher.patched_forward
428-
dummy_input_keys = list(dummy_inputs.keys())
429-
430-
@functools.wraps(patched_forward)
431-
def ts_patched_forward(*args, **kwargs):
432-
ordered_example_inputs = [
433-
param for param in inspect.signature(patcher.orig_forward).parameters if param in dummy_input_keys
434-
]
435-
kwargs.update(zip(ordered_example_inputs, args))
436-
for i in range(len(dict_inputs)):
437-
input_name, keys = dict_inputs[i]
438-
tuple_input = kwargs[input_name]
439-
input_dict = dict(zip(keys, tuple_input))
440-
kwargs[input_name] = input_dict
441-
outputs = patched_forward(**kwargs)
442-
return tuple([value if not isinstance(value, list) else tuple(value) for value in outputs.values()])
443-
444-
patcher.patched_forward = ts_patched_forward
445-
446-
ts_decoder_kwargs = {}
447-
model_config = getattr(model, "config", {})
448-
model_type = getattr(model_config, "model_type", "").replace("_", "-")
449-
if allow_skip_tracing_check(library_name, model_type):
450-
ts_decoder_kwargs["trace_kwargs"] = {"check_trace": False}
451-
452-
with patcher:
453-
if patch_16bit_model:
454-
from openvino.frontend.pytorch.patch_model import __make_16bit_traceable
455-
456-
__make_16bit_traceable(model)
457-
check_dummy_inputs_are_allowed(model, dummy_inputs)
458-
input_info = _get_input_info(model, config, dummy_inputs)
459-
ts_decoder = TorchScriptPythonDecoder(model, example_input=dummy_inputs, **ts_decoder_kwargs)
460-
ov_model = convert_model(
461-
ts_decoder,
462-
example_input=dummy_inputs,
463-
input=[(item.shape, item.type) for item in input_info],
464-
)
465-
except Exception as ex:
466-
logger.warning(f"Export model to OpenVINO directly failed with: \n{ex}.\nModel will be exported to ONNX")
467-
468-
if stateful:
469-
# cannot raise because stateful is enabled by default and it would break backward compatibility for models that couldn't convert to OV directly
470-
# TODO: Implement stateful for ONNX path as well, not doing it right now because of lack of validation
471-
logger.warning(
472-
"[ WARNING ] Making stateful models is not supported when exporting to ONNX as an intermediate step. "
473-
"A stateless model will be exported instead. It may result in sub-optimal inference performance."
474-
"Provide a model that can be converted to OpenVINO without fallback to ONNX conversion path."
475-
)
476-
420+
# TorchScript used behind OpenVINO conversion. Optimum supports only return_dict=True models for patching,
421+
# while TorchScript do not support dictionary with values of mixed types (e.g. Tensor and None) in model input/output
422+
# To handle it, additional wrapper on patcher forward applied.
423+
# model.config.torchscript = True can not be used for patching, because it overrides return_dict to False
424+
patcher = config.patch_model_for_export(model, model_kwargs=model_kwargs)
425+
patched_forward = patcher.patched_forward
426+
dummy_input_keys = list(dummy_inputs.keys())
427+
428+
@functools.wraps(patched_forward)
429+
def ts_patched_forward(*args, **kwargs):
430+
ordered_example_inputs = [
431+
param
432+
for param in inspect.signature(
433+
patcher.orig_forward if library_name != "sentence_transformers" else patcher.patched_forward
434+
).parameters
435+
if param in dummy_input_keys
436+
]
437+
kwargs.update(zip(ordered_example_inputs, args))
438+
for i in range(len(dict_inputs)):
439+
input_name, keys = dict_inputs[i]
440+
tuple_input = kwargs[input_name]
441+
input_dict = dict(zip(keys, tuple_input))
442+
kwargs[input_name] = input_dict
443+
outputs = patched_forward(**kwargs)
444+
return tuple([value if not isinstance(value, list) else tuple(value) for value in outputs.values()])
445+
446+
patcher.patched_forward = ts_patched_forward
447+
448+
ts_decoder_kwargs = {}
449+
model_config = getattr(model, "config", {})
450+
model_type = getattr(model_config, "model_type", "").replace("_", "-")
451+
if allow_skip_tracing_check(library_name, model_type):
452+
ts_decoder_kwargs["trace_kwargs"] = {"check_trace": False}
453+
454+
with patcher:
477455
if patch_16bit_model:
478-
from openvino.frontend.pytorch.patch_model import unpatch_model
479-
480-
unpatch_model(model, "_openvino_module_extension_patch_orig_forward")
481-
for m in model.modules():
482-
if any(p.dtype in [torch.float16, torch.bfloat16] for p in m.parameters(False)) or any(
483-
b.dtype in [torch.float16, torch.bfloat16] for b in m.buffers(False)
484-
):
485-
m.float()
486-
487-
return export_pytorch_via_onnx(
488-
model,
489-
config,
490-
opset,
491-
output,
492-
device,
493-
input_shapes,
494-
model_kwargs,
495-
ov_config=ov_config,
496-
library_name=library_name,
456+
from openvino.frontend.pytorch.patch_model import __make_16bit_traceable
457+
458+
__make_16bit_traceable(model)
459+
check_dummy_inputs_are_allowed(model, dummy_inputs)
460+
input_info = _get_input_info(model, config, dummy_inputs)
461+
ts_decoder = TorchScriptPythonDecoder(model, example_input=dummy_inputs, **ts_decoder_kwargs)
462+
ov_model = convert_model(
463+
ts_decoder,
464+
example_input=dummy_inputs,
465+
input=[(item.shape, item.type) for item in input_info],
497466
)
498467

499468
ov_model.validate_nodes_and_infer_types() # TODO: remove as unnecessary validation?

optimum/exporters/openvino/utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -359,7 +359,7 @@ def set_simplified_chat_template(ov_tokenizer_model, processor_chat_template=Non
359359
return ov_tokenizer_model
360360

361361

362-
SKIP_CHECK_TRACE_MODELS = ("deepseek", "deepseek-v2", "deepseek-v3")
362+
SKIP_CHECK_TRACE_MODELS = ("deepseek", "deepseek-v2", "deepseek-v3", "levit")
363363

364364

365365
def allow_skip_tracing_check(library_name, model_type):

tests/openvino/test_export.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -321,6 +321,7 @@ def test_export_custom_model(self):
321321
out_features=256,
322322
)
323323
model = SentenceTransformer(modules=[word_embedding_model, pooling_model, dense_model])
324+
model.to(torch.device("cpu"))
324325

325326
with TemporaryDirectory() as tmpdirname:
326327
export_from_model(model, output=tmpdirname, task="feature-extraction")

tests/openvino/utils_tests.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -244,7 +244,7 @@ def get_num_quantized_nodes(model):
244244
"f8e4m3": "f8e4m3",
245245
"f8e5m2": "f8e5m2",
246246
}
247-
num_weight_nodes = {n: 0 for n in types_map.values()}
247+
num_weight_nodes = dict.fromkeys(types_map.values(), 0)
248248
ov_model = model if isinstance(model, ov.Model) else model.model
249249
for elem in ov_model.get_ops():
250250
if "FakeQuantize" in elem.name:
@@ -325,7 +325,7 @@ def check_compression_state_per_model(
325325
for i, (submodel, expected_num_weight_nodes) in enumerate(zip(models, expected_num_weight_nodes_per_model)):
326326
ov_model = submodel if isinstance(submodel, ov.Model) else submodel.model
327327
num_fake_nodes, num_weight_nodes = get_num_quantized_nodes(ov_model)
328-
expected_num_weight_nodes.update({k: 0 for k in set(num_weight_nodes) - set(expected_num_weight_nodes)})
328+
expected_num_weight_nodes.update(dict.fromkeys(set(num_weight_nodes) - set(expected_num_weight_nodes), 0))
329329

330330
actual_num_weights_per_model[i] = num_weight_nodes
331331
actual_num_fake_nodes_per_model[i] = num_fake_nodes

0 commit comments

Comments
 (0)