Skip to content

Commit 8dfe56e

Browse files
authored
documentation (#299)
* minor bugfs * doc
1 parent 69171f8 commit 8dfe56e

File tree

3 files changed

+71
-26
lines changed

3 files changed

+71
-26
lines changed

_unittests/ut_tasks/try_export.py

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -46,20 +46,26 @@ def test_imagetext2text_qwen_2_5_vl_instruct_visual(self):
4646

4747
from transformers import AutoModel, AutoProcessor
4848

49-
# model_id = "Qwen/Qwen2.5-VL-7B-Instruct"
50-
model_id = "Qwen/Qwen2.5-VL-3B-Instruct"
49+
model_id = "Qwen/Qwen2.5-VL-7B-Instruct"
50+
# model_id = "Qwen/Qwen2.5-VL-3B-Instruct"
5151
if os.environ.get("PRETRAINED", ""):
52-
model = AutoModel.from_pretrained(model_id, device_map="auto", dtype="auto").eval()
52+
print("-- pretrained model")
53+
model = AutoModel.from_pretrained(
54+
model_id, device_map=device, dtype=torch_dtype, attn_implementation="sdpa"
55+
).eval()
5356
else:
57+
print("-- random model")
5458

5559
def _config_reduction(config, task):
5660
return {
57-
"num_hidden_layers": 2,
61+
# "num_hidden_layers": 2,
5862
"text_config": {
5963
"num_hidden_layers": 2,
6064
"layer_types": ["full_attention", "full_attention"],
6165
},
6266
# "_attn_implementation": "flash_attention_2",
67+
"_attn_implementation": "sdpa",
68+
"dtype": "float16",
6369
}
6470

6571
config_reduction = _config_reduction
@@ -70,6 +76,7 @@ def _config_reduction(config, task):
7076

7177
model = model.to(device).to(getattr(torch, dtype))
7278

79+
print(f"-- config._attn_implementation={model.config._attn_implementation}")
7380
print(f"-- model.dtype={model.dtype}")
7481
print(f"-- model.device={model.device}")
7582
processor = AutoProcessor.from_pretrained(model_id, use_fast=True)

_unittests/ut_tasks/try_tasks.py

Lines changed: 15 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1011,15 +1011,20 @@ def test_imagetext2text_qwen_2_5_vl_instruct(self):
10111011
return_dict:bool
10121012
)
10131013
"""
1014-
import transformers
1015-
from transformers import AutoModel, AutoProcessor
1014+
from transformers import AutoProcessor
10161015
from qwen_vl_utils import process_vision_info
10171016

1018-
# model_id = "Qwen/Qwen2.5-VL-7B-Instruct"
1019-
model_id = "Qwen/Qwen2.5-VL-3B-Instruct"
1017+
model_id = "Qwen/Qwen2.5-VL-7B-Instruct"
1018+
# model_id = "Qwen/Qwen2.5-VL-3B-Instruct"
10201019
if os.environ.get("PRETRAINED", ""):
1021-
model = AutoModel.from_pretrained(model_id, device_map="auto", dtype="auto").eval()
1020+
print("-- use pretrained model")
1021+
from transformers import Qwen2_5_VLForConditionalGeneration
1022+
1023+
model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
1024+
model_id, device_map="auto", dtype="auto", trust_remote_code=True
1025+
).eval()
10221026
else:
1027+
print("-- use dummy model")
10231028

10241029
def config_reduction(config, task):
10251030
return {
@@ -1035,6 +1040,7 @@ def config_reduction(config, task):
10351040
)
10361041
model = data["model"]
10371042

1043+
print(f"-- model type={type(model)}")
10381044
print(f"-- model.device={model.device}")
10391045
processor = AutoProcessor.from_pretrained(model_id, use_fast=True)
10401046
print(f"-- processor={type(processor)}")
@@ -1063,25 +1069,13 @@ def config_reduction(config, task):
10631069
padding=True,
10641070
return_tensors="pt",
10651071
)
1066-
inputs = inputs.to("cuda")
1067-
model = model.to("cuda").to(torch.bfloat16)
1072+
# model = model.to("cuda").to(torch.bfloat16)
1073+
# inputs = inputs.to("cuda")
10681074

10691075
print(f"-- processor {type(processor)}")
10701076
print(f"-- inputs={self.string_type(inputs, with_shape=True, with_min_max=True)}")
1071-
1072-
f_ = transformers.models.qwen2_5_vl.modeling_qwen2_5_vl.apply_multimodal_rotary_pos_emb
1073-
1074-
def _apply_multimodal_rotary_pos_emb(*args, **kwargs):
1075-
print(
1076-
"-- apply_multimodal_rotary_pos_emb:",
1077-
self.string_type(args, with_shape=True),
1078-
self.string_type(kwargs, with_shape=True),
1079-
)
1080-
return f_(*args, **kwargs)
1081-
1082-
transformers.models.qwen2_5_vl.modeling_qwen2_5_vl.apply_multimodal_rotary_pos_emb = (
1083-
_apply_multimodal_rotary_pos_emb
1084-
)
1077+
generated_ids = model.generate(**inputs, max_new_tokens=128)
1078+
print("-- second")
10851079

10861080
print()
10871081
with (

onnx_diagnostic/helpers/mini_onnx_builder.py

Lines changed: 45 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -422,6 +422,27 @@ def create_onnx_model_from_input_tensors(
422422
:return: ModelProto
423423
424424
The function raises an error if not supported.
425+
An example:
426+
427+
.. code-block:: python
428+
429+
from onnx_diagnostic.helpers.mini_onnx_builder import (
430+
create_onnx_model_from_input_tensors,
431+
)
432+
import onnx
433+
434+
proto = create_onnx_model_from_input_tensors(
435+
dict(
436+
query_states=query_states,
437+
key_states=key_states,
438+
value_states=value_states,
439+
cu_seqlens=cu_seqlens,
440+
max_seqlen=(cu_seqlens[1:] - cu_seqlens[:-1]).max(),
441+
scaling=self.scaling,
442+
attn_output=attn_output,
443+
)
444+
)
445+
onnx.save(proto, "attention_inputs.onnx")
425446
"""
426447
if switch_low_high is None:
427448
switch_low_high = sys.byteorder != "big"
@@ -461,7 +482,17 @@ def _unflatten(
461482
if spl[-1] == "array":
462483
return pos + 1, outputs[pos]
463484
if spl[-1] == "tensor":
464-
return pos + 1, torch.from_numpy(outputs[pos]).to(device)
485+
try:
486+
return pos + 1, torch.from_numpy(outputs[pos]).to(device)
487+
except TypeError:
488+
# it shuold be more robusts
489+
import ml_dtypes
490+
491+
if outputs[pos].dtype == ml_dtypes.bfloat16:
492+
return pos + 1, torch.from_numpy(outputs[pos].astype(float)).to(device).to(
493+
torch.bfloat16
494+
)
495+
raise
465496
raise AssertionError(f"Unexpected name {name!r} in {names}")
466497

467498
res: List[Any] = []
@@ -557,6 +588,19 @@ def create_input_tensors_from_onnx_model(
557588
:return: restored data
558589
559590
See example :ref:`l-plot-intermediate-results` for an example.
591+
592+
.. code-bloc:: python
593+
594+
import os
595+
from onnx_diagnostic.helpers.mini_onnx_builder import (
596+
create_input_tensors_from_onnx_model,
597+
)
598+
from onnx_diagnostic.helpers import string_type
599+
600+
restored = create_input_tensors_from_onnx_model("attention_inputs.onnx")
601+
for k, v in restored.items():
602+
print(f"{k}: {string_type(v, with_shape=True, with_min_max=True)}")
603+
560604
"""
561605
if engine == "ExtendedReferenceEvaluator":
562606
from ..reference import ExtendedReferenceEvaluator

0 commit comments

Comments
 (0)