disable two ewemples

xadupre · xadupre · commit 1043a16f6860 · 2025-10-28T17:50:21.000+01:00
diff --git a/_unittests/ut_helpers/test_rt_helper.py b/_unittests/ut_helpers/test_rt_helper.py
@@ -2,41 +2,62 @@
 import unittest
 import torch
 from onnx_diagnostic.ext_test_case import ExtTestCase, hide_stdout
+from onnx_diagnostic.helpers import max_diff, flatten_object
 from onnx_diagnostic.helpers.rt_helper import onnx_generate
+from onnx_diagnostic.helpers.torch_helper import torch_deepcopy
+from onnx_diagnostic.helpers.ort_session import InferenceSessionForTorch
 from onnx_diagnostic.torch_models.hghub import get_untrained_model_with_inputs
 from onnx_diagnostic.torch_export_patches import torch_export_patches
 from onnx_diagnostic.export.api import to_onnx
 
 
 class TestRtSession(ExtTestCase):
     def simple_generate_with_cache(
-        self, model, input_ids: torch.Tensor, eos_token_id: int, max_new_tokens: int = 100
+        self,
+        model,
+        input_ids: torch.Tensor,
+        eos_token_id: int,
+        session: InferenceSessionForTorch,
+        max_new_tokens: int = 100,
     ):
         # First call: prefill
         outputs = model(
             input_ids,
+            use_cache=True,
             attention_mask=torch.ones(
                 input_ids.shape, dtype=input_ids.dtype, device=input_ids.device
             ),
-            use_cache=True,
         )
 
         # Next calls: decode
         for _ in range(max_new_tokens):
             next_token_logits = outputs.logits[:, -1, :]
-            past_key_values = outputs.past_key_values
             next_token_id = torch.argmax(next_token_logits, dim=-1, keepdim=True)
             if next_token_id.item() == eos_token_id:
                 break
             input_ids = torch.cat([input_ids, next_token_id], dim=-1)
+            attention_mask = torch.ones(
+                input_ids.shape, dtype=input_ids.dtype, device=input_ids.device
+            )
+            feeds = dict(
+                zip(
+                    session.input_names,
+                    torch_deepcopy(
+                        flatten_object(
+                            [next_token_id, attention_mask, outputs.past_key_values]
+                        )
+                    ),
+                )
+            )
+            onnx_results = session.run(None, feeds)
             outputs = model(
                 next_token_id,
                 use_cache=True,
-                past_key_values=past_key_values,
-                attention_mask=torch.ones(
-                    input_ids.shape, dtype=input_ids.dtype, device=input_ids.device
-                ),
+                past_key_values=outputs.past_key_values,
+                attention_mask=attention_mask,
             )
+            diff = max_diff(outputs, onnx_results)
+            print("****", diff)
         return input_ids
 
     @hide_stdout()
@@ -63,14 +84,18 @@ def test_onnx_generate(self):
             )
 
         print("-- test_onnx_generate: generate")
-        res = onnx_generate(model_name, input_ids[:1], 2, max_new_tokens=10)
+        res, session = onnx_generate(
+            model_name, input_ids[:1], 2, max_new_tokens=10, return_session=True
+        )
         n_inputs = input_ids.shape[1]
         self.assertEqualArray(input_ids[:1], res[:, :n_inputs])
         self.assertEqual(res.dtype, torch.int64)
         self.assertEqual(res.shape, (1, 13))
         print("-- test_onnx_generate: done")
         # expected = model.generate(input_ids[:1], max_new_tokens=10)
-        expected = self.simple_generate_with_cache(model, input_ids[:1], 2, max_new_tokens=10)
+        expected = self.simple_generate_with_cache(
+            model, input_ids[:1], 2, max_new_tokens=10, session=session
+        )
         self.assertEqualArray(input_ids[:1], expected[:, :n_inputs])
         print("******", res)
         print("******", expected)
diff --git a/_unittests/ut_xrun_doc/test_documentation_examples.py b/_unittests/ut_xrun_doc/test_documentation_examples.py
@@ -84,6 +84,8 @@ def add_test_methods(cls):
             if not reason and not has_dot and name in {"plot_dump_intermediate_results.py"}:
                 reason = "dot not installed"
 
+            # transformers
+
             if (
                 not reason
                 and name in {"plot_export_tiny_llm.py"}
@@ -98,13 +100,23 @@ def add_test_methods(cls):
             ):
                 reason = "transformers<4.52"
 
+            if (
+                not reason
+                and name in {"plot_export_with_dynamic_cache.py", "plot_export_tiny_phi2.py"}
+                and not has_transformers("4.55")
+            ):
+                reason = "transformers<4.55"
+
+            # pytorch
+
             if (
                 not reason
                 and name
                 in {
+                    "plot_export_hub_codellama.py",
                     "plot_export_locate_issue.py",
                     "plot_export_with_auto.py",
-                    "plot_export_hub_codellama.py",
+                    "plot_export_tiny_llm.py",
                 }
                 and not has_torch("2.8")
             ):
@@ -117,13 +129,6 @@ def add_test_methods(cls):
             ):
                 reason = "unstable, let's wait for the next version"
 
-            if (
-                not reason
-                and name in {"plot_export_tiny_phi2.py"}
-                and not has_transformers("4.55")
-            ):
-                reason = "unstable, let's wait for the next version"
-
             if not reason and name in {
                 "plot_export_tiny_llm_dim01.py",
                 "plot_export_tiny_llm_dim01_onnx.py",