remove _seen_tokens from the patched code (#185)

xadupre · web-flow · commit d5e5fbcfb4b1 · 2025-07-11T19:41:56.000+02:00
* remove _seen_tokens from the patched code

* update feature extraction

* fix ut

* fix issues

* add one more patch

* bart

* fix missing attribute

* check

* change supported version

* tris

* 53

* patches

* 53

* fix patch
diff --git a/CHANGELOGS.rst b/CHANGELOGS.rst
@@ -4,6 +4,8 @@ Change Logs
 0.7.5
 +++++
 
+* :pr:`185`: remove the use of _seen_tokens in DynamicCache (removed in transformers>4.53),
+  updates dummpy inputs for feature-extraction
 * :pr:`184`: implements side-by-side
 
 0.7.4
diff --git a/_unittests/ut_tasks/test_tasks.py b/_unittests/ut_tasks/test_tasks.py
@@ -6,7 +6,7 @@
     has_transformers,
     requires_transformers,
 )
-from onnx_diagnostic.helpers.torch_helper import to_any
+from onnx_diagnostic.helpers.torch_helper import to_any, torch_deepcopy
 from onnx_diagnostic.torch_models.hghub.model_inputs import get_untrained_model_with_inputs
 from onnx_diagnostic.torch_export_patches import torch_export_patches
 from onnx_diagnostic.torch_export_patches.patch_inputs import use_dyn_not_str
@@ -207,13 +207,14 @@ def test_fill_mask(self):
             )
 
     @hide_stdout()
+    @requires_transformers("4.53.99")
     def test_feature_extraction_bart_base(self):
         mid = "facebook/bart-base"
         data = get_untrained_model_with_inputs(mid, verbose=1, add_second_input=True)
         self.assertEqual(data["task"], "feature-extraction")
         self.assertIn((data["size"], data["n_weights"]), [(557681664, 139420416)])
         model, inputs, ds = data["model"], data["inputs"], data["dynamic_shapes"]
-        model(**inputs)
+        model(**torch_deepcopy(inputs))
         model(**data["inputs2"])
         with torch_export_patches(patch_transformers=True, verbose=10):
             torch.export.export(
diff --git a/_unittests/ut_tasks/test_tasks_image_text_to_text.py b/_unittests/ut_tasks/test_tasks_image_text_to_text.py
@@ -6,22 +6,23 @@
     requires_transformers,
     requires_torch,
 )
+from onnx_diagnostic.helpers.torch_helper import torch_deepcopy
 from onnx_diagnostic.torch_models.hghub.model_inputs import get_untrained_model_with_inputs
 from onnx_diagnostic.torch_export_patches import torch_export_patches
 from onnx_diagnostic.torch_export_patches.patch_inputs import use_dyn_not_str
 
 
 class TestTasksImageTextToText(ExtTestCase):
     @hide_stdout()
-    @requires_transformers("4.52")
+    @requires_transformers("4.53")
     @requires_torch("2.7.99")
     def test_image_text_to_text(self):
         mid = "HuggingFaceM4/tiny-random-idefics"
         data = get_untrained_model_with_inputs(mid, verbose=1, add_second_input=True)
         self.assertEqual(data["task"], "image-text-to-text")
         self.assertIn((data["size"], data["n_weights"]), [(12742888, 3185722)])
         model, inputs, ds = data["model"], data["inputs"], data["dynamic_shapes"]
-        model(**inputs)
+        model(**torch_deepcopy(inputs))
         model(**data["inputs2"])
         with torch_export_patches(patch_transformers=True, verbose=10):
             torch.export.export(
diff --git a/_unittests/ut_tasks/try_tasks.py b/_unittests/ut_tasks/try_tasks.py
@@ -1,6 +1,8 @@
 import unittest
+import torch
 from onnx_diagnostic.ext_test_case import ExtTestCase, never_test
 from onnx_diagnostic.helpers import string_type
+from onnx_diagnostic.helpers.cache_helper import make_dynamic_cache, make_encoder_decoder_cache
 from onnx_diagnostic.helpers.torch_helper import steal_forward
 
 
@@ -378,6 +380,51 @@ def test_feature_extraction(self):
         model = BartModel.from_pretrained("facebook/bart-base")
         text = "Replace me by any text you'd like."
         encoded_input = tokenizer(text, return_tensors="pt")
+        sequence_length, sequence_length2 = 30, 4
+        sequence_length = 3
+        batch_size, encoder_attention_heads, encoder_ffn_dim = 1, 12, 64
+        batch_size, decoder_attention_heads, decoder_ffn_dim = 1, 12, 64
+        num_hidden_layers = 6
+        encoded_input["past_key_values"] = make_encoder_decoder_cache(
+            make_dynamic_cache(
+                [
+                    (
+                        torch.randn(
+                            batch_size,
+                            encoder_attention_heads,
+                            sequence_length,
+                            encoder_ffn_dim,
+                        ),
+                        torch.randn(
+                            batch_size,
+                            encoder_attention_heads,
+                            sequence_length,
+                            encoder_ffn_dim,
+                        ),
+                    )
+                    for i in range(num_hidden_layers)
+                ]
+            ),
+            make_dynamic_cache(
+                [
+                    (
+                        torch.randn(
+                            batch_size,
+                            decoder_attention_heads,
+                            sequence_length2,
+                            decoder_ffn_dim,
+                        ),
+                        torch.randn(
+                            batch_size,
+                            decoder_attention_heads,
+                            sequence_length2,
+                            decoder_ffn_dim,
+                        ),
+                    )
+                    for i in range(num_hidden_layers)
+                ]
+            ),
+        )
         print()
         print("-- inputs", string_type(encoded_input, with_shape=True, with_min_max=True))
         output = model(**encoded_input)
diff --git a/_unittests/ut_torch_models/test_validate_models.py b/_unittests/ut_torch_models/test_validate_models.py
@@ -12,7 +12,7 @@
 
 
 class TestValidateModel(ExtTestCase):
-    @requires_transformers("4.52")
+    @requires_transformers("4.53")
     @requires_torch("2.7.99")
     @requires_experimental()
     @hide_stdout()
@@ -33,7 +33,7 @@ def test_validate_microsoft_phi4_reasoning(self):
         self.assertLess(summary["disc_onnx_ort_run_abs"], 1e-5)
         self.assertIn("onnx_filename", data)
 
-    @requires_transformers("4.52")
+    @requires_transformers("4.53")
     @requires_torch("2.7.99")
     @requires_experimental()
     @hide_stdout()
diff --git a/_unittests/ut_torch_models/test_validate_whole_models.py b/_unittests/ut_torch_models/test_validate_whole_models.py
@@ -258,7 +258,7 @@ def test_validate_model_vit_model(self):
     @requires_torch("2.7")
     @hide_stdout()
     @ignore_warnings(FutureWarning)
-    @requires_transformers("4.51")
+    @requires_transformers("4.53")
     def test_validate_phi35_mini_instruct(self):
         mid = "microsoft/Phi-3.5-mini-instruct"
         summary, data = validate_model(
@@ -281,7 +281,7 @@ def test_validate_phi35_mini_instruct(self):
     @requires_torch("2.7")
     @hide_stdout()
     @ignore_warnings(FutureWarning)
-    @requires_transformers("4.51")
+    @requires_transformers("4.53")
     def test_validate_phi35_4k_mini_instruct(self):
         mid = "microsoft/Phi-3-mini-4k-instruct"
         summary, data = validate_model(
diff --git a/onnx_diagnostic/tasks/feature_extraction.py b/onnx_diagnostic/tasks/feature_extraction.py
@@ -1,17 +1,15 @@
 from typing import Any, Callable, Dict, Optional, Tuple
 import torch
 from ..helpers.config_helper import update_config, check_hasattr
+from ..helpers.cache_helper import make_dynamic_cache, make_encoder_decoder_cache
 
 __TASK__ = "feature-extraction"
 
 
 def reduce_model_config(config: Any) -> Dict[str, Any]:
     """Reduces a model size."""
-    check_hasattr(config, "num_attention_heads", "num_hidden_layers")
-    kwargs = dict(
-        num_hidden_layers=min(config.num_hidden_layers, 2),
-        num_attention_heads=min(config.num_attention_heads, 4),
-    )
+    check_hasattr(config, "num_hidden_layers")
+    kwargs = dict(num_hidden_layers=min(config.num_hidden_layers, 2))
     update_config(config, kwargs)
     return kwargs
 
@@ -22,6 +20,12 @@ def get_inputs(
     batch_size: int,
     sequence_length: int,
     dummy_max_token_id: int,
+    sequence_length2: int = 3,
+    decoder_attention_heads: Optional[int] = None,
+    encoder_attention_heads: Optional[int] = None,
+    encoder_ffn_dim: Optional[int] = None,
+    decoder_ffn_dim: Optional[int] = None,
+    num_hidden_layers: Optional[int] = None,
     add_second_input: int = 1,
     **kwargs,  # unused
 ):
@@ -50,6 +54,66 @@ def get_inputs(
         ),
         attention_mask=torch.ones((batch_size, sequence_length)).to(torch.int64),
     )
+    if (
+        encoder_attention_heads
+        and decoder_attention_heads
+        and encoder_ffn_dim
+        and decoder_ffn_dim
+        and num_hidden_layers
+    ):
+        inputs["past_key_values"] = make_encoder_decoder_cache(
+            make_dynamic_cache(
+                [
+                    (
+                        torch.randn(
+                            batch_size,
+                            encoder_attention_heads,
+                            sequence_length,
+                            encoder_ffn_dim,
+                        ),
+                        torch.randn(
+                            batch_size,
+                            encoder_attention_heads,
+                            sequence_length,
+                            encoder_ffn_dim,
+                        ),
+                    )
+                    for i in range(num_hidden_layers)
+                ]
+            ),
+            make_dynamic_cache(
+                [
+                    (
+                        torch.randn(
+                            batch_size,
+                            decoder_attention_heads,
+                            sequence_length2,
+                            decoder_ffn_dim,
+                        ),
+                        torch.randn(
+                            batch_size,
+                            decoder_attention_heads,
+                            sequence_length2,
+                            decoder_ffn_dim,
+                        ),
+                    )
+                    for i in range(num_hidden_layers)
+                ]
+            ),
+        )
+        cache_length = "cache_length_key"
+        cache_length2 = "cache_length_val"
+        shapes["past_key_values"] = [  # type: ignore[assignment]
+            [
+                [{0: batch, 2: cache_length} for _ in range(num_hidden_layers)],
+                [{0: batch, 2: cache_length} for _ in range(num_hidden_layers)],
+            ],
+            [
+                [{0: batch, 2: cache_length2} for _ in range(num_hidden_layers)],
+                [{0: batch, 2: cache_length2} for _ in range(num_hidden_layers)],
+            ],
+        ]
+
     res = dict(inputs=inputs, dynamic_shapes=shapes)
     if add_second_input:
         assert (
@@ -61,6 +125,12 @@ def get_inputs(
             batch_size=batch_size + 1,
             sequence_length=sequence_length + add_second_input,
             dummy_max_token_id=dummy_max_token_id,
+            sequence_length2=sequence_length2,
+            decoder_attention_heads=decoder_attention_heads,
+            encoder_attention_heads=encoder_attention_heads,
+            encoder_ffn_dim=encoder_ffn_dim,
+            decoder_ffn_dim=decoder_ffn_dim,
+            num_hidden_layers=num_hidden_layers,
             add_second_input=0,
             **kwargs,
         )["inputs"]
@@ -80,4 +150,15 @@ def random_input_kwargs(config: Any) -> Tuple[Dict[str, Any], Callable]:
         sequence_length=30,
         dummy_max_token_id=31999 if config is None else (config.vocab_size - 1),
     )
+    for att in [
+        "decoder_attention_heads",
+        "encoder_attention_heads",
+        "encoder_ffn_dim",
+        "decoder_ffn_dim",
+        "num_hidden_layers",
+    ]:
+        if hasattr(config, att):
+            kwargs[att] = getattr(config, att)
+    kwargs["decoder_ffn_dim"] = kwargs["encoder_ffn_dim"] = 64
+    print(kwargs)
     return kwargs, get_inputs
diff --git a/onnx_diagnostic/tasks/text2text_generation.py b/onnx_diagnostic/tasks/text2text_generation.py
@@ -69,8 +69,8 @@ def get_inputs(
     ), f"Not yet implemented for cls_cache={kwargs['cls_cache']!r}."
     batch = torch.export.Dim("batch", min=1, max=1024)
     seq_length = "seq_length"  # torch.export.Dim("seq_length", min=1, max=4096)
-    cache_length = "cache_length_key"  # torch.export.Dim("cache_length", min=1, max=4096)
-    cache_length2 = "cache_length_val"  # torch.export.Dim("cache_length2", min=1, max=4096)
+    cache_length = "cache_length_key"
+    cache_length2 = "cache_length_val"
 
     shapes = {
         "input_ids": {0: batch, 1: seq_length},
diff --git a/onnx_diagnostic/torch_export_patches/onnx_export_errors.py b/onnx_diagnostic/torch_export_patches/onnx_export_errors.py
@@ -16,6 +16,8 @@ def get_function(name: str) -> Tuple[type, Callable]:
     module_name = ".".join(spl[:-1])
     fname = spl[-1]
     mod = importlib.import_module(module_name)
+    if not hasattr(mod, fname):
+        return None, None
     return mod, getattr(mod, fname)
 
 
@@ -33,12 +35,16 @@ def get_patches(mod, verbose: int = 0) -> Tuple[str, List[Any]]:
                 doc = v.__doc__.lstrip()
                 if doc.startswith("manual patch"):
                     continue
-                reg = re.compile("[[]patch:([a-z_A-Z.]+)[]]")
+                reg = re.compile("[\\[]patch:([a-z_A-Z.]+)[\\]]")
                 fall = reg.findall(doc)
                 assert (
                     len(fall) == 1
                 ), f"Unable to find patching information for {v} in \n{doc}"
                 fmod, f = get_function(fall[0])
+                if fmod is None and f is None:
+                    # The function does not exist in this version of transformers.
+                    # No patch is needed.
+                    continue
                 to_patch.append({"module": fmod, "function": f, "patch": v})
 
     name = mod.__name__
diff --git a/onnx_diagnostic/torch_export_patches/patches/patch_transformers.py b/onnx_diagnostic/torch_export_patches/patches/patch_transformers.py
diff --git a/onnx_diagnostic/torch_models/hghub/model_inputs.py b/onnx_diagnostic/torch_models/hghub/model_inputs.py
diff --git a/onnx_diagnostic/torch_models/validate.py b/onnx_diagnostic/torch_models/validate.py