Skip to content

Commit 69171f8

Browse files
xadupresdpython
andauthored
Many small changes (#298)
* many smal changes * changes * psuh --------- Co-authored-by: xavier dupré <[email protected]>
1 parent eccdc07 commit 69171f8

File tree

6 files changed

+75
-20
lines changed

6 files changed

+75
-20
lines changed

_unittests/ut_export/test_serialization.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,6 @@ def forward(self, cache):
8888
Model(), [(self._get_cache(),), (self._get_cache(bsize=3, nheads=5),)]
8989
)
9090
guessed = md.guess_dynamic_shapes()
91-
print("****", guessed)
9291
DYN = torch.export.Dim.DYNAMIC
9392
self.assertEqual(
9493
(([{0: DYN, 1: DYN}, {0: DYN, 1: DYN}, {0: DYN, 1: DYN}, {0: DYN, 1: DYN}],), {}),

_unittests/ut_tasks/try_export.py

Lines changed: 39 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,9 @@ class TestTryExportHuggingFaceHubModel(ExtTestCase):
1414
@ignore_warnings(UserWarning)
1515
def test_imagetext2text_qwen_2_5_vl_instruct_visual(self):
1616
"""
17-
clear&&NEVERTEST=1 python _unittests/ut_tasks/try_tasks.py -k qwen_2_5
17+
clear&&NEVERTEST=1 python _unittests/ut_tasks/try_export.py -k qwen_2_5
18+
19+
possible prefix: ``TEXTDEVICE=cuda TESTDTYPE=float16 EXPORTER=onnx-dynamo
1820
1921
::
2022
@@ -33,6 +35,15 @@ def test_imagetext2text_qwen_2_5_vl_instruct_visual(self):
3335
return_dict:bool
3436
)
3537
"""
38+
device = os.environ.get("TESTDEVICE", "cpu")
39+
dtype = os.environ.get("TESTDTYPE", "float32")
40+
torch_dtype = {
41+
"float16": torch.float16,
42+
"bfloat16": torch.bfloat16,
43+
"float32": torch.float32,
44+
}[dtype]
45+
exporter = os.environ.get("EXPORTER", "custom")
46+
3647
from transformers import AutoModel, AutoProcessor
3748

3849
# model_id = "Qwen/Qwen2.5-VL-7B-Instruct"
@@ -57,28 +68,28 @@ def _config_reduction(config, task):
5768
)
5869
model = data["model"]
5970

60-
model = model.to("cpu").to(torch.float32)
71+
model = model.to(device).to(getattr(torch, dtype))
6172

73+
print(f"-- model.dtype={model.dtype}")
6274
print(f"-- model.device={model.device}")
6375
processor = AutoProcessor.from_pretrained(model_id, use_fast=True)
6476
print(f"-- processor={type(processor)}")
6577

6678
inputs = dict(
67-
hidden_states=torch.rand((1292, 1176), dtype=torch.float32),
68-
grid_thw=torch.tensor([[1, 34, 38]], dtype=torch.int64),
79+
hidden_states=torch.rand((1292, 1176), dtype=torch_dtype).to(device),
80+
grid_thw=torch.tensor([[1, 34, 38]], dtype=torch.int64).to(device),
6981
)
7082

7183
print(f"-- inputs: {self.string_type(inputs, with_shape=True)}")
7284
# this is too long
73-
# expected = model.visual(**inputs)
74-
# print(f"-- expected: {self.string_type(expected, with_shape=True)}")
85+
expected = model.visual(**inputs)
86+
print(f"-- expected: {self.string_type(expected, with_shape=True)}")
7587

76-
exporter = "custom" # "onnx-dynamo"
7788
filename = self.get_dump_file(
78-
f"test_imagetext2text_qwen_2_5_vl_instruct_visual.{exporter}.onnx"
89+
f"test_imagetext2text_qwen_2_5_vl_instruct_visual.{device}.{dtype}.{exporter}.onnx"
7990
)
8091
fileep = self.get_dump_file(
81-
f"test_imagetext2text_qwen_2_5_vl_instruct_visual.{exporter}.graph"
92+
f"test_imagetext2text_qwen_2_5_vl_instruct_visual.{device}.{dtype}.{exporter}.graph"
8293
)
8394
dynamic_shapes = dict(
8495
hidden_states={0: "hidden_width", 1: "hidden_height"},
@@ -103,8 +114,27 @@ def _config_reduction(config, task):
103114
exporter=exporter,
104115
verbose=1,
105116
save_ep=fileep,
117+
target_opset=22,
118+
optimize=True,
106119
)
107120

121+
self.assert_onnx_disc(
122+
f"test_imagetext2text_qwen_2_5_vl_instruct_visual.{device}.{dtype}.{exporter}",
123+
filename,
124+
model.visual,
125+
export_inputs,
126+
verbose=1,
127+
providers=(
128+
["CUDAExecutionProvider", "CPUExecutionProvider"]
129+
if device == "cuda"
130+
else ["CPUExecutionProvider"]
131+
),
132+
use_ort=True,
133+
atol=0.02,
134+
rtol=10,
135+
ort_optimized_graph=False,
136+
)
137+
108138

109139
if __name__ == "__main__":
110140
unittest.main(verbosity=2)

_unittests/ut_torch_models/test_tiny_llms_bypassed.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,8 @@ def test_export_tiny_llm_2_bypassed(self):
2828
inputs = modificator(copy.deepcopy(inputs))
2929

3030
def debug():
31-
print("***", string_type(inputs, with_shape=True))
32-
print("***", data["dynamic_shapes"])
31+
print("--", string_type(inputs, with_shape=True))
32+
print("--", data["dynamic_shapes"])
3333
import torch.export._draft_export
3434

3535
_ep, report = torch.export._draft_export.draft_export(

_unittests/ut_torch_models/test_tiny_llms_onnx.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -110,8 +110,8 @@ def test_bypass_onnx_export_tiny_llm_official_full(self):
110110
self.assertEqual(
111111
{"attention_mask", "past_key_values", "input_ids", "position_ids"}, set(inputs)
112112
)
113-
print("***", self.string_type(inputs, with_shape=True))
114-
print("---", type(model))
113+
print("--", self.string_type(inputs, with_shape=True))
114+
print("--", type(model))
115115
with torch_export_patches(
116116
patch_transformers=True, verbose=1, stop_if_static=1
117117
) as modificator:

onnx_diagnostic/export/api.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ def to_onnx(
1616
exporter: str = "onnx-dynamo",
1717
exporter_kwargs: Optional[Dict[str, Any]] = None,
1818
save_ep: Optional[str] = None,
19+
optimize: bool = True,
1920
use_control_flow_dispatcher: bool = False,
2021
) -> Any:
2122
"""
@@ -37,6 +38,7 @@ def to_onnx(
3738
:param exporter: exporter to use (``onnx-dynamo``, ``modelbuilder``, ``custom``)
3839
:param exporter_kwargs: additional parameters sent to the exporter
3940
:param save_ep: saves the exported program
41+
:param optimize: optimizes the model
4042
:param use_control_flow_dispatcher: use the dispatcher created to supported
4143
custom loops (see :func:`onnx_diagnostic.export.control_flow.loop_for`)
4244
:return: the output of the selected exporter, usually a structure including
@@ -106,7 +108,8 @@ def to_onnx(
106108
dynamo=True,
107109
**(exporter_kwargs or {}),
108110
)
109-
ort_fusions.optimize_for_ort(epo.model)
111+
if optimize:
112+
ort_fusions.optimize_for_ort(epo.model)
110113
if filename:
111114
epo.save(filename, external_data=True)
112115
return epo

onnx_diagnostic/ext_test_case.py

Lines changed: 28 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1188,6 +1188,7 @@ def assert_onnx_disc(
11881188
copy_inputs: bool = True,
11891189
expected: Optional[Any] = None,
11901190
use_ort: bool = False,
1191+
ort_optimized_graph: bool = False,
11911192
**kwargs,
11921193
):
11931194
"""
@@ -1206,6 +1207,7 @@ def assert_onnx_disc(
12061207
:param expected: expected values
12071208
:param copy_inputs: to copy the inputs
12081209
:param use_ort: use :class:`onnxruntime.InferenceSession`
1210+
:param ort_optimized_graph: dumps the optimized onnxruntime graph
12091211
:param kwargs: arguments sent to
12101212
:class:`onnx_diagnostic.helpers.ort_session.InferenceSessionForTorch`
12111213
"""
@@ -1216,29 +1218,50 @@ def assert_onnx_disc(
12161218
kws = dict(with_shape=True, with_min_max=verbose > 1)
12171219
vname = test_name or "assert_onnx_disc"
12181220
if test_name:
1221+
import onnx
1222+
12191223
name = f"{test_name}.onnx"
12201224
if verbose:
12211225
print(f"[{vname}] save the onnx model into {name!r}")
1222-
name = self.dump_onnx(name, proto)
1226+
if isinstance(proto, str):
1227+
name = proto
1228+
proto = onnx.load(name)
1229+
else:
1230+
assert isinstance(
1231+
proto, onnx.ModelProto
1232+
), f"Unexpected type {type(proto)} for proto"
1233+
name = self.dump_onnx(name, proto)
12231234
if verbose:
12241235
print(f"[{vname}] file size {os.stat(name).st_size // 2**10:1.3f} kb")
12251236
if verbose:
12261237
print(f"[{vname}] make feeds {string_type(inputs, **kws)}")
12271238
if use_ort:
1239+
assert isinstance(
1240+
proto, onnx.ModelProto
1241+
), f"Unexpected type {type(proto)} for proto"
12281242
feeds = make_feeds(proto, inputs, use_numpy=True, copy=True)
1229-
if verbose:
1230-
print(f"[{vname}] feeds {string_type(feeds, **kws)}")
12311243
import onnxruntime
12321244

1245+
if verbose:
1246+
print(f"[{vname}] create onnxruntime.InferenceSession")
1247+
options = onnxruntime.SessionOptions()
1248+
if ort_optimized_graph:
1249+
options.optimized_model_filepath = f"{name}.optort.onnx"
12331250
sess = onnxruntime.InferenceSession(
1234-
proto.SerializeToString(), providers=["CPUExecutionProvider"]
1251+
proto.SerializeToString(),
1252+
options,
1253+
providers=kwargs.get("providers", ["CPUExecutionProvider"]),
12351254
)
1255+
if verbose:
1256+
print(f"[{vname}] run ort feeds {string_type(feeds, **kws)}")
12361257
got = sess.run(None, feeds)
12371258
else:
12381259
feeds = make_feeds(proto, inputs, copy=True)
12391260
if verbose:
1240-
print(f"[{vname}] feeds {string_type(feeds, **kws)}")
1261+
print(f"[{vname}] create InferenceSessionForTorch")
12411262
sess = InferenceSessionForTorch(proto, **kwargs)
1263+
if verbose:
1264+
print(f"[{vname}] run orttorch feeds {string_type(feeds, **kws)}")
12421265
got = sess.run(None, feeds)
12431266
if verbose:
12441267
print(f"[{vname}] compute expected values")

0 commit comments

Comments
 (0)