Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion _unittests/ut_export/test_serialization.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,6 @@ def forward(self, cache):
Model(), [(self._get_cache(),), (self._get_cache(bsize=3, nheads=5),)]
)
guessed = md.guess_dynamic_shapes()
print("****", guessed)
DYN = torch.export.Dim.DYNAMIC
self.assertEqual(
(([{0: DYN, 1: DYN}, {0: DYN, 1: DYN}, {0: DYN, 1: DYN}, {0: DYN, 1: DYN}],), {}),
Expand Down
48 changes: 39 additions & 9 deletions _unittests/ut_tasks/try_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@ class TestTryExportHuggingFaceHubModel(ExtTestCase):
@ignore_warnings(UserWarning)
def test_imagetext2text_qwen_2_5_vl_instruct_visual(self):
"""
clear&&NEVERTEST=1 python _unittests/ut_tasks/try_tasks.py -k qwen_2_5
clear&&NEVERTEST=1 python _unittests/ut_tasks/try_export.py -k qwen_2_5

possible prefix: ``TEXTDEVICE=cuda TESTDTYPE=float16 EXPORTER=onnx-dynamo

::

Expand All @@ -33,6 +35,15 @@ def test_imagetext2text_qwen_2_5_vl_instruct_visual(self):
return_dict:bool
)
"""
device = os.environ.get("TESTDEVICE", "cpu")
dtype = os.environ.get("TESTDTYPE", "float32")
torch_dtype = {
"float16": torch.float16,
"bfloat16": torch.bfloat16,
"float32": torch.float32,
}[dtype]
exporter = os.environ.get("EXPORTER", "custom")

from transformers import AutoModel, AutoProcessor

# model_id = "Qwen/Qwen2.5-VL-7B-Instruct"
Expand All @@ -57,28 +68,28 @@ def _config_reduction(config, task):
)
model = data["model"]

model = model.to("cpu").to(torch.float32)
model = model.to(device).to(getattr(torch, dtype))

print(f"-- model.dtype={model.dtype}")
print(f"-- model.device={model.device}")
processor = AutoProcessor.from_pretrained(model_id, use_fast=True)
print(f"-- processor={type(processor)}")

inputs = dict(
hidden_states=torch.rand((1292, 1176), dtype=torch.float32),
grid_thw=torch.tensor([[1, 34, 38]], dtype=torch.int64),
hidden_states=torch.rand((1292, 1176), dtype=torch_dtype).to(device),
grid_thw=torch.tensor([[1, 34, 38]], dtype=torch.int64).to(device),
)

print(f"-- inputs: {self.string_type(inputs, with_shape=True)}")
# this is too long
# expected = model.visual(**inputs)
# print(f"-- expected: {self.string_type(expected, with_shape=True)}")
expected = model.visual(**inputs)
print(f"-- expected: {self.string_type(expected, with_shape=True)}")

exporter = "custom" # "onnx-dynamo"
filename = self.get_dump_file(
f"test_imagetext2text_qwen_2_5_vl_instruct_visual.{exporter}.onnx"
f"test_imagetext2text_qwen_2_5_vl_instruct_visual.{device}.{dtype}.{exporter}.onnx"
)
fileep = self.get_dump_file(
f"test_imagetext2text_qwen_2_5_vl_instruct_visual.{exporter}.graph"
f"test_imagetext2text_qwen_2_5_vl_instruct_visual.{device}.{dtype}.{exporter}.graph"
)
dynamic_shapes = dict(
hidden_states={0: "hidden_width", 1: "hidden_height"},
Expand All @@ -103,8 +114,27 @@ def _config_reduction(config, task):
exporter=exporter,
verbose=1,
save_ep=fileep,
target_opset=22,
optimize=True,
)

self.assert_onnx_disc(
f"test_imagetext2text_qwen_2_5_vl_instruct_visual.{device}.{dtype}.{exporter}",
filename,
model.visual,
export_inputs,
verbose=1,
providers=(
["CUDAExecutionProvider", "CPUExecutionProvider"]
if device == "cuda"
else ["CPUExecutionProvider"]
),
use_ort=True,
atol=0.02,
rtol=10,
ort_optimized_graph=False,
)


if __name__ == "__main__":
unittest.main(verbosity=2)
4 changes: 2 additions & 2 deletions _unittests/ut_torch_models/test_tiny_llms_bypassed.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@ def test_export_tiny_llm_2_bypassed(self):
inputs = modificator(copy.deepcopy(inputs))

def debug():
print("***", string_type(inputs, with_shape=True))
print("***", data["dynamic_shapes"])
print("--", string_type(inputs, with_shape=True))
print("--", data["dynamic_shapes"])
import torch.export._draft_export

_ep, report = torch.export._draft_export.draft_export(
Expand Down
4 changes: 2 additions & 2 deletions _unittests/ut_torch_models/test_tiny_llms_onnx.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,8 +110,8 @@ def test_bypass_onnx_export_tiny_llm_official_full(self):
self.assertEqual(
{"attention_mask", "past_key_values", "input_ids", "position_ids"}, set(inputs)
)
print("***", self.string_type(inputs, with_shape=True))
print("---", type(model))
print("--", self.string_type(inputs, with_shape=True))
print("--", type(model))
with torch_export_patches(
patch_transformers=True, verbose=1, stop_if_static=1
) as modificator:
Expand Down
5 changes: 4 additions & 1 deletion onnx_diagnostic/export/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ def to_onnx(
exporter: str = "onnx-dynamo",
exporter_kwargs: Optional[Dict[str, Any]] = None,
save_ep: Optional[str] = None,
optimize: bool = True,
use_control_flow_dispatcher: bool = False,
) -> Any:
"""
Expand All @@ -37,6 +38,7 @@ def to_onnx(
:param exporter: exporter to use (``onnx-dynamo``, ``modelbuilder``, ``custom``)
:param exporter_kwargs: additional parameters sent to the exporter
:param save_ep: saves the exported program
:param optimize: optimizes the model
:param use_control_flow_dispatcher: use the dispatcher created to supported
custom loops (see :func:`onnx_diagnostic.export.control_flow.loop_for`)
:return: the output of the selected exporter, usually a structure including
Expand Down Expand Up @@ -106,7 +108,8 @@ def to_onnx(
dynamo=True,
**(exporter_kwargs or {}),
)
ort_fusions.optimize_for_ort(epo.model)
if optimize:
ort_fusions.optimize_for_ort(epo.model)
if filename:
epo.save(filename, external_data=True)
return epo
Expand Down
33 changes: 28 additions & 5 deletions onnx_diagnostic/ext_test_case.py
Original file line number Diff line number Diff line change
Expand Up @@ -1188,6 +1188,7 @@ def assert_onnx_disc(
copy_inputs: bool = True,
expected: Optional[Any] = None,
use_ort: bool = False,
ort_optimized_graph: bool = False,
**kwargs,
):
"""
Expand All @@ -1206,6 +1207,7 @@ def assert_onnx_disc(
:param expected: expected values
:param copy_inputs: to copy the inputs
:param use_ort: use :class:`onnxruntime.InferenceSession`
:param ort_optimized_graph: dumps the optimized onnxruntime graph
:param kwargs: arguments sent to
:class:`onnx_diagnostic.helpers.ort_session.InferenceSessionForTorch`
"""
Expand All @@ -1216,29 +1218,50 @@ def assert_onnx_disc(
kws = dict(with_shape=True, with_min_max=verbose > 1)
vname = test_name or "assert_onnx_disc"
if test_name:
import onnx

name = f"{test_name}.onnx"
if verbose:
print(f"[{vname}] save the onnx model into {name!r}")
name = self.dump_onnx(name, proto)
if isinstance(proto, str):
name = proto
proto = onnx.load(name)
else:
assert isinstance(
proto, onnx.ModelProto
), f"Unexpected type {type(proto)} for proto"
name = self.dump_onnx(name, proto)
if verbose:
print(f"[{vname}] file size {os.stat(name).st_size // 2**10:1.3f} kb")
if verbose:
print(f"[{vname}] make feeds {string_type(inputs, **kws)}")
if use_ort:
assert isinstance(
proto, onnx.ModelProto
), f"Unexpected type {type(proto)} for proto"
feeds = make_feeds(proto, inputs, use_numpy=True, copy=True)
if verbose:
print(f"[{vname}] feeds {string_type(feeds, **kws)}")
import onnxruntime

if verbose:
print(f"[{vname}] create onnxruntime.InferenceSession")
options = onnxruntime.SessionOptions()
if ort_optimized_graph:
options.optimized_model_filepath = f"{name}.optort.onnx"
sess = onnxruntime.InferenceSession(
proto.SerializeToString(), providers=["CPUExecutionProvider"]
proto.SerializeToString(),
options,
providers=kwargs.get("providers", ["CPUExecutionProvider"]),
)
if verbose:
print(f"[{vname}] run ort feeds {string_type(feeds, **kws)}")
got = sess.run(None, feeds)
else:
feeds = make_feeds(proto, inputs, copy=True)
if verbose:
print(f"[{vname}] feeds {string_type(feeds, **kws)}")
print(f"[{vname}] create InferenceSessionForTorch")
sess = InferenceSessionForTorch(proto, **kwargs)
if verbose:
print(f"[{vname}] run orttorch feeds {string_type(feeds, **kws)}")
got = sess.run(None, feeds)
if verbose:
print(f"[{vname}] compute expected values")
Expand Down
Loading