fix

xadupre · xadupre · commit 2eab178ff132 · 2025-11-10T18:29:47.000+01:00
diff --git a/_unittests/ut_export/test_api.py b/_unittests/ut_export/test_api.py
@@ -1,6 +1,12 @@
+import os
 import unittest
 import torch
-from onnx_diagnostic.ext_test_case import ExtTestCase, hide_stdout, has_transformers
+from onnx_diagnostic.ext_test_case import (
+    ExtTestCase,
+    hide_stdout,
+    has_transformers,
+    ignore_warnings,
+)
 from onnx_diagnostic.helpers import max_diff
 from onnx_diagnostic.helpers.torch_helper import torch_deepcopy
 from onnx_diagnostic.helpers.rt_helper import make_feeds
@@ -36,6 +42,7 @@ def forward(self, x, y):
         )
 
     @hide_stdout()
+    @ignore_warnings(FutureWarning)
     def test_tiny_llm_to_onnx(self):
         import onnxruntime
 
@@ -68,6 +75,8 @@ def test_tiny_llm_to_onnx(self):
                         filename=filename,
                     )
         for exporter, filename in filenames.items():
+            if not os.path.exists(filename):
+                continue
             with self.subTest(exporter=f"validate-{exporter}"):
                 sess = onnxruntime.InferenceSession(
                     filename, providers=["CPUExecutionProvider"]
@@ -90,6 +99,8 @@ def test_tiny_llm_to_onnx(self):
 
         expected = model(**torch_deepcopy(problem))
         for exporter, filename in filenames.items():
+            if not os.path.exists(filename):
+                continue
             with self.subTest(exporter=f"full-mask-{exporter}"):
                 sess = onnxruntime.InferenceSession(
                     filename, providers=["CPUExecutionProvider"]
diff --git a/_unittests/ut_torch_export_patches/test_patch_transformers.py b/_unittests/ut_torch_export_patches/test_patch_transformers.py
@@ -2,6 +2,7 @@
 import torch
 import transformers
 import transformers.integrations.sdpa_attention as sdpa_attention
+import onnx
 import onnx_diagnostic.torch_export_patches.patches.patch_transformers as patch_transformers
 from onnx_diagnostic.ext_test_case import ExtTestCase, requires_transformers, ignore_warnings
 from onnx_diagnostic.helpers.torch_helper import torch_deepcopy, fake_torchdynamo_exporting
@@ -387,13 +388,54 @@ def test_patched_qwen2_5_vl_vision_attention_forward(self):
         expected = instance.forward(**inputs)
         got = patched_Qwen2_5_VLVisionAttention.forward(instance, **inputs)
         self.assertEqualArray(expected, got)
-        if 1:  # with torch_export_patches(patch_transformers=False, patch_torch=True):
-            with fake_torchdynamo_exporting():
-                assert (
-                    _is_torchdynamo_exporting()
-                ), f"exporting is not set to true? {torch.compiler.is_exporting_flag}"
-                got = patched_Qwen2_5_VLVisionAttention.forward(instance, **inputs)
-                self.assertEqualArray(expected, got)
+        with fake_torchdynamo_exporting():
+            assert (
+                _is_torchdynamo_exporting()
+            ), f"exporting is not set to true? {torch.compiler.is_exporting_flag}"
+            got = patched_Qwen2_5_VLVisionAttention.forward(instance, **inputs)
+            self.assertEqualArray(expected, got)
+
+    @requires_transformers("4.55")
+    @unittest.skipIf(not patch_qwen2_5, "Qwen25 not part of this transformers")
+    def test_qwen2_5_vl_vision_attention_iteration(self):
+        from onnx_diagnostic.torch_export_patches.patches.patch_transformers import (
+            patched_Qwen2_5_VLVisionAttentionOneIteration,
+        )
+
+        model = patched_Qwen2_5_VLVisionAttentionOneIteration()
+        inputs = (
+            torch.tensor([736, 800], dtype=torch.int64),
+            torch.rand((1, 16, 1292, 80), dtype=torch.float16),
+            torch.rand((1, 16, 1292, 80), dtype=torch.float16),
+            torch.rand((1, 16, 1292, 80), dtype=torch.float16),
+        )
+        ds = (
+            {},
+            {0: "batch", 1: "length", 2: "dim"},
+            {0: "batch", 1: "length", 2: "dim"},
+            {0: "batch", 1: "length", 2: "dim"},
+        )
+        for exporter in ("custom", "onnx-dynamo"):
+            # onnx-dynamo needs OpOverload(op='aten.sym_storage_offset' (transformers>=5.0?)
+            filename = self.get_dump_file(
+                f"test_qwen2_5_vl_vision_attention_iteration.{exporter}.onnx"
+            )
+            to_onnx(
+                model,
+                inputs,
+                dynamic_shapes=ds,
+                exporter=exporter,
+                filename=filename,
+                exporter_kwargs={"report": True} if exporter == "onnx-dynamo" else {},
+            )
+            self.assert_onnx_disc(
+                f"test_qwen2_5_vl_vision_attention_iteration-{exporter}",
+                onnx.load(filename),
+                model,
+                inputs,
+                atol=1e-3,
+                rtol=1,
+            )
 
 
 if __name__ == "__main__":
diff --git a/onnx_diagnostic/ext_test_case.py b/onnx_diagnostic/ext_test_case.py
@@ -1214,13 +1214,14 @@ def assert_onnx_disc(
         from .helpers.ort_session import InferenceSessionForTorch
 
         kws = dict(with_shape=True, with_min_max=verbose > 1)
-        if verbose:
-            vname = test_name or "assert_onnx_disc"
+        vname = test_name or "assert_onnx_disc"
         if test_name:
             name = f"{test_name}.onnx"
-            print(f"[{vname}] save the onnx model into {name!r}")
+            if verbose:
+                print(f"[{vname}] save the onnx model into {name!r}")
             name = self.dump_onnx(name, proto)
-            print(f"[{vname}] file size {os.stat(name).st_size // 2**10:1.3f} kb")
+            if verbose:
+                print(f"[{vname}] file size {os.stat(name).st_size // 2**10:1.3f} kb")
         if verbose:
             print(f"[{vname}] make feeds {string_type(inputs, **kws)}")
         if use_ort:
diff --git a/onnx_diagnostic/helpers/log_helper.py b/onnx_diagnostic/helpers/log_helper.py
@@ -901,13 +901,19 @@ def view(
             else g.groupby([*key_index, *key_columns], dropna=False).sum()
         )
         not_unique = r[r["count"] > 1]
+        if not_unique.shape[0] > 0 and os.environ.get("DUPLICATE", ""):
+            filename = os.environ.get("DUPLICATE")
+            subset = data.set_index([*key_index, *key_columns]).merge(
+                not_unique.head(), left_index=True, right_index=True
+            )
+            subset.to_excel(filename)
         assert not_unique.shape[0] == 0, (
             f"view_def.name={view_def.name!r}, "
             f"unable to run the pivot with index={sorted(key_index)}, "
             f"key={sorted(key_columns)}, key_agg={key_agg}, values={sorted(values)}, "
             f"columns={sorted(data.columns)}, ignored={view_def.ignore_columns}, "
-            f"not unique={set(data.columns) - unique}"
-            f"\n--\n{not_unique.head(10)}"
+            f"not unique={set(data.columns) - unique}, set DUPLICATE=<filename> "
+            f"to store the duplicates in a excel file\n--\n{not_unique.head(10)}"
         )
 
         # pivot
@@ -1000,8 +1006,12 @@ def _fix_aggregation_change(
         keys = set(self.keys_time) - {columns_to_fix}
         select = data[self.keys_time]
         select_agg = select.groupby(list(keys)).count()
+        if select_agg.shape[0] == 0:
+            # nothing to fix
+            return data
         assert select_agg[columns_to_fix].max() <= 1, (
-            f"Column {columns_to_fix!r} has two distinct values at least for one date\n"
+            f"Column {columns_to_fix!r} has two distinct values at least for one date, "
+            f"max={select_agg[columns_to_fix].max()}\n"
             f"{select_agg[select_agg[columns_to_fix] > 1]}"
         )
 
@@ -1038,6 +1048,16 @@ def _fix_aggregation_change(
             f"data.columns.equals(res.columns)={data.columns.equals(res.columns)}, "
             f"data.index.equals(res.columns)={data.index.equals(res.columns)}, "
         )
+        select = res[self.keys_time]
+        select_agg = select.groupby(list(keys)).count()
+        if select_agg.shape[0] == 0:
+            # nothing to fix
+            return data
+        assert select_agg[columns_to_fix].max() <= 1, (
+            f"Column {columns_to_fix!r} has two distinct values at least for one date, "
+            f"max={select_agg[columns_to_fix].max()}\n"
+            f"{select_agg[select_agg[columns_to_fix] > 1]}"
+        )
         return res
 
     def _dropna(
@@ -1977,7 +1997,8 @@ def make_view_def(self, name: str) -> Optional[CubeViewDef]:
         * **cmd:** command lines
         * **raw-short:** raw data without all the unused columns
         """
-        fix_aggregation_change = ["model_speedup_input_set", "model_test_with"]
+        # This does not work.
+        fix_aggregation_change = []  # "model_speedup_input_set", "model_test_with"]
         fs = ["suite", "model_suite", "task", "model_name", "model_task"]
         index_cols = self._filter_column(fs, self.keys_time)
         assert index_cols, (
diff --git a/onnx_diagnostic/torch_export_patches/patches/patch_transformers.py b/onnx_diagnostic/torch_export_patches/patches/patch_transformers.py
@@ -2265,6 +2265,34 @@ def forward(
             hidden_states = hidden_states[reverse_indices, :]
             return hidden_states
 
+    class patched_Qwen2_5_VLVisionAttentionOneIteration(torch.nn.Module):
+        def forward(
+            self,
+            start_end,
+            query_states,
+            key_states,
+            value_states,
+            scaling: float = 1.0,
+            dropout: float = 0.0,
+            **kwargs,
+        ):
+            a = start_end[0].item()
+            b = start_end[1].item()
+            q = query_states[:, :, a:b, :]
+            k = key_states[:, :, a:b, :]
+            v = value_states[:, :, a:b, :]
+            return patched_sdpa_attention_forward(
+                self,
+                q,
+                k,
+                v,
+                attention_mask=None,
+                scaling=scaling,
+                dropout=dropout,
+                is_causal=False,
+                **kwargs,
+            )[0]
+
     class patched_Qwen2_5_VLVisionAttention:
         _PATCHES_ = ["forward"]
         _PATCHED_CLASS_ = (
@@ -2361,22 +2389,15 @@ def forward(
                     attention_interface = patched_sdpa_attention_forward
 
                 def _iteration(start_end, query_states, key_states, value_states):
-                    a = start_end[0]
-                    b = start_end[1]
-                    q = query_states[:, :, a:b, :]
-                    k = key_states[:, :, a:b, :]
-                    v = value_states[:, :, a:b, :]
-                    return attention_interface(
+                    return patched_Qwen2_5_VLVisionAttentionOneIteration.forward(
                         self,
-                        q,
-                        k,
-                        v,
-                        attention_mask=None,
+                        start_end,
+                        query_states,
+                        key_states,
+                        value_states,
                         scaling=self.scaling,
                         dropout=0.0 if not self.training else self.attention_dropout,
-                        is_causal=False,
-                        **kwargs,
-                    )[0]
+                    )
 
                 starts = cu_seqlens[:-1]
                 ends = cu_seqlens[1:]