Skip to content

Commit 34e801c

Browse files
authored
a couple of changes for qwen (#292)
* a couple of changes for qwen * fix * fix * fix * more patches * spell
1 parent 11f2c83 commit 34e801c

File tree

13 files changed

+972
-278
lines changed

13 files changed

+972
-278
lines changed

CHANGELOGS.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,11 @@
11
Change Logs
22
===========
33

4+
0.8.2
5+
+++++
6+
7+
* :pr:`292`: new patches for Qwen models
8+
49
0.8.1
510
+++++
611

_doc/index.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -239,8 +239,8 @@ The function replaces dynamic dimensions defined as strings by
239239
Older versions
240240
==============
241241

242+
* `0.8.2 <../v0.8.2/index.html>`_
242243
* `0.8.1 <../v0.8.1/index.html>`_
243-
* `0.8.0 <../v0.8.0/index.html>`_
244244
* `0.7.16 <../v0.7.16/index.html>`_
245245
* `0.6.3 <../v0.6.3/index.html>`_
246246
* `0.5.0 <../v0.5.0/index.html>`_

_unittests/ut_tasks/try_export.py

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
import os
2+
import unittest
3+
import torch
4+
from onnx_diagnostic.ext_test_case import ExtTestCase, never_test, ignore_warnings
5+
from onnx_diagnostic.torch_export_patches import torch_export_patches
6+
from onnx_diagnostic.torch_models.hghub.model_inputs import get_untrained_model_with_inputs
7+
from onnx_diagnostic.export.api import to_onnx
8+
9+
# from onnx_diagnostic.export.shape_helper import make_fake_with_dynamic_dimensions
10+
11+
12+
class TestTryExportHuggingFaceHubModel(ExtTestCase):
13+
@never_test()
14+
@ignore_warnings(UserWarning)
15+
def test_imagetext2text_qwen_2_5_vl_instruct_visual(self):
16+
"""
17+
clear&&NEVERTEST=1 python _unittests/ut_tasks/try_tasks.py -k qwen_2_5
18+
19+
::
20+
21+
kwargs=dict(
22+
cache_position:T7s3602,
23+
input_ids:T7s1x3602,
24+
inputs_embeds:None,
25+
attention_mask:T7s1x3602,
26+
position_ids:T7s4x1x3602,
27+
pixel_values:T1s14308x1176,
28+
pixel_values_videos:None,
29+
image_grid_thw:T7s1x3,
30+
video_grid_thw:None,
31+
second_per_grid_ts:None,
32+
use_cache:bool,
33+
return_dict:bool
34+
)
35+
"""
36+
from transformers import AutoModel, AutoProcessor
37+
38+
# model_id = "Qwen/Qwen2.5-VL-7B-Instruct"
39+
model_id = "Qwen/Qwen2.5-VL-3B-Instruct"
40+
if os.environ.get("PRETRAINED", ""):
41+
model = AutoModel.from_pretrained(model_id, device_map="auto", dtype="auto").eval()
42+
else:
43+
44+
def _config_reduction(config, task):
45+
return {
46+
"num_hidden_layers": 2,
47+
"text_config": {
48+
"num_hidden_layers": 2,
49+
"layer_types": ["full_attention", "full_attention"],
50+
},
51+
# "_attn_implementation": "flash_attention_2",
52+
}
53+
54+
config_reduction = _config_reduction
55+
data = get_untrained_model_with_inputs(
56+
model_id, verbose=1, add_second_input=False, config_reduction=config_reduction
57+
)
58+
model = data["model"]
59+
60+
model = model.to("cpu").to(torch.float32)
61+
62+
print(f"-- model.device={model.device}")
63+
processor = AutoProcessor.from_pretrained(model_id, use_fast=True)
64+
print(f"-- processor={type(processor)}")
65+
66+
inputs = dict(
67+
hidden_states=torch.rand((14308, 1176), dtype=torch.float32),
68+
grid_thw=torch.tensor([[1, 98, 146]], dtype=torch.int64),
69+
)
70+
71+
print(f"-- inputs: {self.string_type(inputs, with_shape=True)}")
72+
# this is too long
73+
# expected = model.visual(**inputs)
74+
# print(f"-- expected: {self.string_type(expected, with_shape=True)}")
75+
76+
exporter = "custom" # "onnx-dynamo"
77+
filename = self.get_dump_file(
78+
f"test_imagetext2text_qwen_2_5_vl_instruct_visual.{exporter}.onnx"
79+
)
80+
fileep = self.get_dump_file(
81+
f"test_imagetext2text_qwen_2_5_vl_instruct_visual.{exporter}.graph"
82+
)
83+
dynamic_shapes = dict(
84+
hidden_states={0: "hidden_width", 1: "hidden_height"},
85+
grid_thw={}, # {0: "n_images"}, # TODO: fix
86+
)
87+
88+
# fake_inputs = make_fake_with_dynamic_dimensions(inputs, dynamic_shapes)[0]
89+
export_inputs = inputs
90+
print()
91+
with torch_export_patches(
92+
patch_torch=True,
93+
patch_sympy=False,
94+
patch_transformers=True,
95+
verbose=1,
96+
stop_if_static=2,
97+
):
98+
to_onnx(
99+
model.visual,
100+
kwargs=export_inputs,
101+
dynamic_shapes=dynamic_shapes,
102+
filename=filename,
103+
exporter=exporter,
104+
verbose=1,
105+
save_ep=fileep,
106+
)
107+
108+
109+
if __name__ == "__main__":
110+
unittest.main(verbosity=2)

_unittests/ut_tasks/try_tasks.py

Lines changed: 106 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,15 @@
11
import os
22
import unittest
33
import torch
4-
from onnx_diagnostic.ext_test_case import ExtTestCase, never_test
4+
from onnx_diagnostic.ext_test_case import ExtTestCase, never_test, ignore_warnings
55
from onnx_diagnostic.helpers import string_type
66
from onnx_diagnostic.helpers.cache_helper import make_dynamic_cache, make_encoder_decoder_cache
77
from onnx_diagnostic.helpers.torch_helper import steal_forward
88
from onnx_diagnostic.torch_export_patches import torch_export_patches
99
from onnx_diagnostic.torch_models.hghub.model_inputs import get_untrained_model_with_inputs
1010

1111

12-
class TestHuggingFaceHubModel(ExtTestCase):
12+
class TestTryHuggingFaceHubModel(ExtTestCase):
1313
@never_test()
1414
def test_image_classification(self):
1515
# clear&&NEVERTEST=1 python _unittests/ut_tasks/try_tasks.py -k image_c
@@ -988,6 +988,110 @@ def test_imagetext2text_generation_gemma3_4b_it(self):
988988
)
989989
print(output_text)
990990

991+
@never_test()
992+
@ignore_warnings(UserWarning)
993+
def test_imagetext2text_qwen_2_5_vl_instruct(self):
994+
"""
995+
clear&&NEVERTEST=1 python _unittests/ut_tasks/try_tasks.py -k qwen_2_5
996+
997+
::
998+
999+
kwargs=dict(
1000+
cache_position:T7s3602,
1001+
input_ids:T7s1x3602,
1002+
inputs_embeds:None,
1003+
attention_mask:T7s1x3602,
1004+
position_ids:T7s4x1x3602,
1005+
pixel_values:T1s14308x1176,
1006+
pixel_values_videos:None,
1007+
image_grid_thw:T7s1x3,
1008+
video_grid_thw:None,
1009+
second_per_grid_ts:None,
1010+
use_cache:bool,
1011+
return_dict:bool
1012+
)
1013+
"""
1014+
from transformers import AutoModel, AutoProcessor
1015+
from qwen_vl_utils import process_vision_info
1016+
1017+
# model_id = "Qwen/Qwen2.5-VL-7B-Instruct"
1018+
model_id = "Qwen/Qwen2.5-VL-3B-Instruct"
1019+
if os.environ.get("PRETRAINED", ""):
1020+
model = AutoModel.from_pretrained(model_id, device_map="auto", dtype="auto").eval()
1021+
else:
1022+
1023+
def config_reduction(config, task):
1024+
return {
1025+
"num_hidden_layers": 2,
1026+
"text_config": {
1027+
"num_hidden_layers": 2,
1028+
"layer_types": ["full_attention", "full_attention"],
1029+
},
1030+
}
1031+
1032+
data = get_untrained_model_with_inputs(
1033+
model_id, verbose=1, add_second_input=False, config_reduction=config_reduction
1034+
)
1035+
model = data["model"]
1036+
1037+
print(f"-- model.device={model.device}")
1038+
processor = AutoProcessor.from_pretrained(model_id, use_fast=True)
1039+
print(f"-- processor={type(processor)}")
1040+
1041+
messages = [
1042+
{
1043+
"role": "user",
1044+
"content": [
1045+
{
1046+
"type": "image",
1047+
"image": "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/demo.jpeg",
1048+
},
1049+
{"type": "text", "text": "Describe this image."},
1050+
],
1051+
}
1052+
]
1053+
text = processor.apply_chat_template(
1054+
messages, tokenize=False, add_generation_prompt=True
1055+
)
1056+
image_inputs, video_inputs = process_vision_info(messages)
1057+
inputs = processor(
1058+
text=[text],
1059+
images=image_inputs,
1060+
videos=video_inputs,
1061+
padding=True,
1062+
return_tensors="pt",
1063+
)
1064+
inputs = inputs.to("cuda")
1065+
model = model.to("cuda").to(torch.bfloat16)
1066+
1067+
print(f"-- processor {type(processor)}")
1068+
print(f"-- inputs={self.string_type(inputs, with_shape=True, with_min_max=True)}")
1069+
1070+
print()
1071+
with (
1072+
torch_export_patches(
1073+
patch_torch=False,
1074+
patch_sympy=False,
1075+
patch_transformers=True,
1076+
verbose=1,
1077+
),
1078+
steal_forward(
1079+
[model, model.visual],
1080+
dump_file=self.get_dump_file("test_imagetext2text_qwen_2_5_vl_instruct.onnx"),
1081+
dump_drop={"attention_mask", "past_key_values", "pixel_values"},
1082+
save_as_external_data=False,
1083+
with_shapes=True,
1084+
),
1085+
):
1086+
generated_ids = model.generate(**inputs, max_new_tokens=128)
1087+
generated_ids_trimmed = [
1088+
out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
1089+
]
1090+
output_text = processor.batch_decode(
1091+
generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
1092+
)
1093+
print(output_text)
1094+
9911095

9921096
if __name__ == "__main__":
9931097
unittest.main(verbosity=2)

onnx_diagnostic/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,5 +3,5 @@
33
Functions, classes to dig into a model when this one is right, slow, wrong...
44
"""
55

6-
__version__ = "0.8.1"
6+
__version__ = "0.8.2"
77
__author__ = "Xavier Dupré"

onnx_diagnostic/export/api.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from typing import Any, Dict, List, Sequence, Optional, Tuple, Union
1+
from typing import Any, Dict, List, Optional, Sequence, Tuple, Union
22
import torch
33

44

@@ -14,6 +14,8 @@ def to_onnx(
1414
output_names: Optional[List[str]] = None,
1515
output_dynamic_shapes: Optional[Union[Dict[str, Any], Tuple[Any]]] = None,
1616
exporter: str = "onnx-dynamo",
17+
exporter_kwargs: Optional[Dict[str, Any]] = None,
18+
save_ep: Optional[str] = None,
1719
) -> Any:
1820
"""
1921
Common API for exporters. By default, the models are optimized to use the
@@ -32,6 +34,8 @@ def to_onnx(
3234
:param output_names: to change the output of the onnx model
3335
:param output_dynamic_shapes: to overwrite the dynamic shapes names
3436
:param exporter: exporter to use (``onnx-dynamo``, ``modelbuilder``, ``custom``)
37+
:param exporter_kwargs: additional parameters sent to the exporter
38+
:param save_ep: saves the exported program
3539
:return: the output of the selected exporter, usually a structure including
3640
an onnx model
3741
@@ -48,7 +52,10 @@ def to_onnx(
4852
)
4953
"""
5054
if exporter == "custom":
51-
from experimental_experiment.torch_interpreter import to_onnx as _to_onnx
55+
from experimental_experiment.torch_interpreter import (
56+
to_onnx as _to_onnx,
57+
ExportOptions,
58+
)
5259
from experimental_experiment.xbuilder import OptimizationOptions
5360

5461
return _to_onnx(
@@ -63,7 +70,9 @@ def to_onnx(
6370
dynamic_shapes=dynamic_shapes,
6471
large_model=True,
6572
output_dynamic_shapes=output_dynamic_shapes,
73+
export_options=ExportOptions(save_ep=save_ep),
6674
options=OptimizationOptions(patterns="default+onnxruntime"),
75+
**(exporter_kwargs or {}),
6776
)
6877
if exporter in ("dynamo", "onnx-dynamo"):
6978
import onnxscript.rewriter.ort_fusions as ort_fusions
@@ -80,6 +89,7 @@ def to_onnx(
8089
opset_version=target_opset,
8190
dynamic_shapes=dynamic_shapes,
8291
dynamo=True,
92+
**(exporter_kwargs or {}),
8393
)
8494
ort_fusions.optimize_for_ort(epo.model)
8595
epo.save(filename)
@@ -117,6 +127,7 @@ def to_onnx(
117127
precision=str(first_float[0].dtype).split(".")[-1],
118128
execution_provider="cuda" if first.is_cuda else "cpu",
119129
cache_dir=os.path.dirname(filename),
130+
**(exporter_kwargs or {}),
120131
)
121132
save_model_builder(onx, os.path.dirname(filename))
122133
return onx

0 commit comments

Comments
 (0)