sdpython
diff --git a/‎CHANGELOGS.rst‎
Lines changed: 5 additions & 0 deletions b/‎CHANGELOGS.rst‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎_doc/index.rst‎
Lines changed: 1 addition & 1 deletion b/‎_doc/index.rst‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎_unittests/ut_tasks/try_export.py‎
Lines changed: 110 additions & 0 deletions b/‎_unittests/ut_tasks/try_export.py‎
Lines changed: 110 additions & 0 deletions
diff --git a/‎_unittests/ut_tasks/try_tasks.py‎
Lines changed: 106 additions & 2 deletions b/‎_unittests/ut_tasks/try_tasks.py‎
Lines changed: 106 additions & 2 deletions
diff --git a/‎onnx_diagnostic/__init__.py‎
Lines changed: 1 addition & 1 deletion b/‎onnx_diagnostic/__init__.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎onnx_diagnostic/export/api.py‎
Lines changed: 13 additions & 2 deletions b/‎onnx_diagnostic/export/api.py‎
Lines changed: 13 additions & 2 deletions
@@ -1,6 +1,11 @@
 Change Logs
 ===========
 
+0.8.2
++++++
+
+* :pr:`292`: new patches for Qwen models
+
 0.8.1
 +++++
 
 
@@ -239,8 +239,8 @@ The function replaces dynamic dimensions defined as strings by
 Older versions
 ==============
 
+* `0.8.2 <../v0.8.2/index.html>`_
 * `0.8.1 <../v0.8.1/index.html>`_
-* `0.8.0 <../v0.8.0/index.html>`_
 * `0.7.16 <../v0.7.16/index.html>`_
 * `0.6.3 <../v0.6.3/index.html>`_
 * `0.5.0 <../v0.5.0/index.html>`_
 
@@ -0,0 +1,110 @@
+import os
+import unittest
+import torch
+from onnx_diagnostic.ext_test_case import ExtTestCase, never_test, ignore_warnings
+from onnx_diagnostic.torch_export_patches import torch_export_patches
+from onnx_diagnostic.torch_models.hghub.model_inputs import get_untrained_model_with_inputs
+from onnx_diagnostic.export.api import to_onnx
+
+# from onnx_diagnostic.export.shape_helper import make_fake_with_dynamic_dimensions
+
+
+class TestTryExportHuggingFaceHubModel(ExtTestCase):
+    @never_test()
+    @ignore_warnings(UserWarning)
+    def test_imagetext2text_qwen_2_5_vl_instruct_visual(self):
+        """
+        clear&&NEVERTEST=1 python _unittests/ut_tasks/try_tasks.py -k qwen_2_5
+
+        ::
+
+            kwargs=dict(
+                cache_position:T7s3602,
+                input_ids:T7s1x3602,
+                inputs_embeds:None,
+                attention_mask:T7s1x3602,
+                position_ids:T7s4x1x3602,
+                pixel_values:T1s14308x1176,
+                pixel_values_videos:None,
+                image_grid_thw:T7s1x3,
+                video_grid_thw:None,
+                second_per_grid_ts:None,
+                use_cache:bool,
+                return_dict:bool
+            )
+        """
+        from transformers import AutoModel, AutoProcessor
+
+        # model_id = "Qwen/Qwen2.5-VL-7B-Instruct"
+        model_id = "Qwen/Qwen2.5-VL-3B-Instruct"
+        if os.environ.get("PRETRAINED", ""):
+            model = AutoModel.from_pretrained(model_id, device_map="auto", dtype="auto").eval()
+        else:
+
+            def _config_reduction(config, task):
+                return {
+                    "num_hidden_layers": 2,
+                    "text_config": {
+                        "num_hidden_layers": 2,
+                        "layer_types": ["full_attention", "full_attention"],
+                    },
+                    # "_attn_implementation": "flash_attention_2",
+                }
+
+            config_reduction = _config_reduction
+            data = get_untrained_model_with_inputs(
+                model_id, verbose=1, add_second_input=False, config_reduction=config_reduction
+            )
+            model = data["model"]
+
+        model = model.to("cpu").to(torch.float32)
+
+        print(f"-- model.device={model.device}")
+        processor = AutoProcessor.from_pretrained(model_id, use_fast=True)
+        print(f"-- processor={type(processor)}")
+
+        inputs = dict(
+            hidden_states=torch.rand((14308, 1176), dtype=torch.float32),
+            grid_thw=torch.tensor([[1, 98, 146]], dtype=torch.int64),
+        )
+
+        print(f"-- inputs: {self.string_type(inputs, with_shape=True)}")
+        # this is too long
+        # expected = model.visual(**inputs)
+        # print(f"-- expected: {self.string_type(expected, with_shape=True)}")
+
+        exporter = "custom"  # "onnx-dynamo"
+        filename = self.get_dump_file(
+            f"test_imagetext2text_qwen_2_5_vl_instruct_visual.{exporter}.onnx"
+        )
+        fileep = self.get_dump_file(
+            f"test_imagetext2text_qwen_2_5_vl_instruct_visual.{exporter}.graph"
+        )
+        dynamic_shapes = dict(
+            hidden_states={0: "hidden_width", 1: "hidden_height"},
+            grid_thw={},  # {0: "n_images"}, # TODO: fix
+        )
+
+        # fake_inputs = make_fake_with_dynamic_dimensions(inputs, dynamic_shapes)[0]
+        export_inputs = inputs
+        print()
+        with torch_export_patches(
+            patch_torch=True,
+            patch_sympy=False,
+            patch_transformers=True,
+            verbose=1,
+            stop_if_static=2,
+        ):
+            to_onnx(
+                model.visual,
+                kwargs=export_inputs,
+                dynamic_shapes=dynamic_shapes,
+                filename=filename,
+                exporter=exporter,
+                verbose=1,
+                save_ep=fileep,
+            )
+
+
+if __name__ == "__main__":
+    unittest.main(verbosity=2)
@@ -1,15 +1,15 @@
 import os
 import unittest
 import torch
-from onnx_diagnostic.ext_test_case import ExtTestCase, never_test
+from onnx_diagnostic.ext_test_case import ExtTestCase, never_test, ignore_warnings
 from onnx_diagnostic.helpers import string_type
 from onnx_diagnostic.helpers.cache_helper import make_dynamic_cache, make_encoder_decoder_cache
 from onnx_diagnostic.helpers.torch_helper import steal_forward
 from onnx_diagnostic.torch_export_patches import torch_export_patches
 from onnx_diagnostic.torch_models.hghub.model_inputs import get_untrained_model_with_inputs
 
 
-class TestHuggingFaceHubModel(ExtTestCase):
+class TestTryHuggingFaceHubModel(ExtTestCase):
     @never_test()
     def test_image_classification(self):
         # clear&&NEVERTEST=1 python _unittests/ut_tasks/try_tasks.py -k image_c
@@ -988,6 +988,110 @@ def test_imagetext2text_generation_gemma3_4b_it(self):
         )
         print(output_text)
 
+    @never_test()
+    @ignore_warnings(UserWarning)
+    def test_imagetext2text_qwen_2_5_vl_instruct(self):
+        """
+        clear&&NEVERTEST=1 python _unittests/ut_tasks/try_tasks.py -k qwen_2_5
+
+        ::
+
+            kwargs=dict(
+                cache_position:T7s3602,
+                input_ids:T7s1x3602,
+                inputs_embeds:None,
+                attention_mask:T7s1x3602,
+                position_ids:T7s4x1x3602,
+                pixel_values:T1s14308x1176,
+                pixel_values_videos:None,
+                image_grid_thw:T7s1x3,
+                video_grid_thw:None,
+                second_per_grid_ts:None,
+                use_cache:bool,
+                return_dict:bool
+            )
+        """
+        from transformers import AutoModel, AutoProcessor
+        from qwen_vl_utils import process_vision_info
+
+        # model_id = "Qwen/Qwen2.5-VL-7B-Instruct"
+        model_id = "Qwen/Qwen2.5-VL-3B-Instruct"
+        if os.environ.get("PRETRAINED", ""):
+            model = AutoModel.from_pretrained(model_id, device_map="auto", dtype="auto").eval()
+        else:
+
+            def config_reduction(config, task):
+                return {
+                    "num_hidden_layers": 2,
+                    "text_config": {
+                        "num_hidden_layers": 2,
+                        "layer_types": ["full_attention", "full_attention"],
+                    },
+                }
+
+            data = get_untrained_model_with_inputs(
+                model_id, verbose=1, add_second_input=False, config_reduction=config_reduction
+            )
+            model = data["model"]
+
+        print(f"-- model.device={model.device}")
+        processor = AutoProcessor.from_pretrained(model_id, use_fast=True)
+        print(f"-- processor={type(processor)}")
+
+        messages = [
+            {
+                "role": "user",
+                "content": [
+                    {
+                        "type": "image",
+                        "image": "https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/demo.jpeg",
+                    },
+                    {"type": "text", "text": "Describe this image."},
+                ],
+            }
+        ]
+        text = processor.apply_chat_template(
+            messages, tokenize=False, add_generation_prompt=True
+        )
+        image_inputs, video_inputs = process_vision_info(messages)
+        inputs = processor(
+            text=[text],
+            images=image_inputs,
+            videos=video_inputs,
+            padding=True,
+            return_tensors="pt",
+        )
+        inputs = inputs.to("cuda")
+        model = model.to("cuda").to(torch.bfloat16)
+
+        print(f"-- processor {type(processor)}")
+        print(f"-- inputs={self.string_type(inputs, with_shape=True, with_min_max=True)}")
+
+        print()
+        with (
+            torch_export_patches(
+                patch_torch=False,
+                patch_sympy=False,
+                patch_transformers=True,
+                verbose=1,
+            ),
+            steal_forward(
+                [model, model.visual],
+                dump_file=self.get_dump_file("test_imagetext2text_qwen_2_5_vl_instruct.onnx"),
+                dump_drop={"attention_mask", "past_key_values", "pixel_values"},
+                save_as_external_data=False,
+                with_shapes=True,
+            ),
+        ):
+            generated_ids = model.generate(**inputs, max_new_tokens=128)
+        generated_ids_trimmed = [
+            out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
+        ]
+        output_text = processor.batch_decode(
+            generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
+        )
+        print(output_text)
+
 
 if __name__ == "__main__":
     unittest.main(verbosity=2)
@@ -3,5 +3,5 @@
 Functions, classes to dig into a model when this one is right, slow, wrong...
 """
 
-__version__ = "0.8.1"
+__version__ = "0.8.2"
 __author__ = "Xavier Dupré"
@@ -1,4 +1,4 @@
-from typing import Any, Dict, List, Sequence, Optional, Tuple, Union
+from typing import Any, Dict, List, Optional, Sequence, Tuple, Union
 import torch
 
 
@@ -14,6 +14,8 @@ def to_onnx(
     output_names: Optional[List[str]] = None,
     output_dynamic_shapes: Optional[Union[Dict[str, Any], Tuple[Any]]] = None,
     exporter: str = "onnx-dynamo",
+    exporter_kwargs: Optional[Dict[str, Any]] = None,
+    save_ep: Optional[str] = None,
 ) -> Any:
     """
     Common API for exporters. By default, the models are optimized to use the
@@ -32,6 +34,8 @@ def to_onnx(
     :param output_names: to change the output of the onnx model
     :param output_dynamic_shapes: to overwrite the dynamic shapes names
     :param exporter: exporter to use (``onnx-dynamo``, ``modelbuilder``, ``custom``)
+    :param exporter_kwargs: additional parameters sent to the exporter
+    :param save_ep: saves the exported program
     :return: the output of the selected exporter, usually a structure including
         an onnx model
 
@@ -48,7 +52,10 @@ def to_onnx(
         )
     """
     if exporter == "custom":
-        from experimental_experiment.torch_interpreter import to_onnx as _to_onnx
+        from experimental_experiment.torch_interpreter import (
+            to_onnx as _to_onnx,
+            ExportOptions,
+        )
         from experimental_experiment.xbuilder import OptimizationOptions
 
         return _to_onnx(
@@ -63,7 +70,9 @@ def to_onnx(
             dynamic_shapes=dynamic_shapes,
             large_model=True,
             output_dynamic_shapes=output_dynamic_shapes,
+            export_options=ExportOptions(save_ep=save_ep),
             options=OptimizationOptions(patterns="default+onnxruntime"),
+            **(exporter_kwargs or {}),
         )
     if exporter in ("dynamo", "onnx-dynamo"):
         import onnxscript.rewriter.ort_fusions as ort_fusions
@@ -80,6 +89,7 @@ def to_onnx(
             opset_version=target_opset,
             dynamic_shapes=dynamic_shapes,
             dynamo=True,
+            **(exporter_kwargs or {}),
         )
         ort_fusions.optimize_for_ort(epo.model)
         epo.save(filename)
@@ -117,6 +127,7 @@ def to_onnx(
             precision=str(first_float[0].dtype).split(".")[-1],
             execution_provider="cuda" if first.is_cuda else "cpu",
             cache_dir=os.path.dirname(filename),
+            **(exporter_kwargs or {}),
         )
         save_model_builder(onx, os.path.dirname(filename))
         return onx