update feature extraction

xadupre · xadupre · commit 92a750c67e95 · 2025-07-11T15:02:46.000+02:00
diff --git a/CHANGELOGS.rst b/CHANGELOGS.rst
@@ -4,6 +4,8 @@ Change Logs
 0.7.5
 +++++
 
+* :pr:`185`: remove the use of _seen_tokens in DynamicCache (removed in transformers>4.53),
+  updates dummpy inputs for feature-extraction
 * :pr:`184`: implements side-by-side
 
 0.7.4
diff --git a/_unittests/ut_tasks/try_tasks.py b/_unittests/ut_tasks/try_tasks.py
@@ -1,6 +1,8 @@
 import unittest
+import torch
 from onnx_diagnostic.ext_test_case import ExtTestCase, never_test
 from onnx_diagnostic.helpers import string_type
+from onnx_diagnostic.helpers.cache_helper import make_dynamic_cache, make_encoder_decoder_cache
 from onnx_diagnostic.helpers.torch_helper import steal_forward
 
 
@@ -378,6 +380,51 @@ def test_feature_extraction(self):
         model = BartModel.from_pretrained("facebook/bart-base")
         text = "Replace me by any text you'd like."
         encoded_input = tokenizer(text, return_tensors="pt")
+        sequence_length, sequence_length2 = 30, 4
+        sequence_length = 3
+        batch_size, encoder_attention_heads, encoder_ffn_dim = 1, 12, 64
+        batch_size, decoder_attention_heads, decoder_ffn_dim = 1, 12, 64
+        num_hidden_layers = 6
+        encoded_input["past_key_values"] = make_encoder_decoder_cache(
+            make_dynamic_cache(
+                [
+                    (
+                        torch.randn(
+                            batch_size,
+                            encoder_attention_heads,
+                            sequence_length,
+                            encoder_ffn_dim,
+                        ),
+                        torch.randn(
+                            batch_size,
+                            encoder_attention_heads,
+                            sequence_length,
+                            encoder_ffn_dim,
+                        ),
+                    )
+                    for i in range(num_hidden_layers)
+                ]
+            ),
+            make_dynamic_cache(
+                [
+                    (
+                        torch.randn(
+                            batch_size,
+                            decoder_attention_heads,
+                            sequence_length2,
+                            decoder_ffn_dim,
+                        ),
+                        torch.randn(
+                            batch_size,
+                            decoder_attention_heads,
+                            sequence_length2,
+                            decoder_ffn_dim,
+                        ),
+                    )
+                    for i in range(num_hidden_layers)
+                ]
+            ),
+        )
         print()
         print("-- inputs", string_type(encoded_input, with_shape=True, with_min_max=True))
         output = model(**encoded_input)
diff --git a/onnx_diagnostic/tasks/feature_extraction.py b/onnx_diagnostic/tasks/feature_extraction.py
@@ -1,17 +1,15 @@
 from typing import Any, Callable, Dict, Optional, Tuple
 import torch
 from ..helpers.config_helper import update_config, check_hasattr
+from ..helpers.cache_helper import make_dynamic_cache, make_encoder_decoder_cache
 
 __TASK__ = "feature-extraction"
 
 
 def reduce_model_config(config: Any) -> Dict[str, Any]:
     """Reduces a model size."""
-    check_hasattr(config, "num_attention_heads", "num_hidden_layers")
-    kwargs = dict(
-        num_hidden_layers=min(config.num_hidden_layers, 2),
-        num_attention_heads=min(config.num_attention_heads, 4),
-    )
+    check_hasattr(config, "num_hidden_layers")
+    kwargs = dict(num_hidden_layers=min(config.num_hidden_layers, 2))
     update_config(config, kwargs)
     return kwargs
 
@@ -22,6 +20,12 @@ def get_inputs(
     batch_size: int,
     sequence_length: int,
     dummy_max_token_id: int,
+    sequence_length2: int = 3,
+    decoder_attention_heads: Optional[int] = None,
+    encoder_attention_heads: Optional[int] = None,
+    encoder_ffn_dim: Optional[int] = None,
+    decoder_ffn_dim: Optional[int] = None,
+    num_hidden_layers: Optional[int] = None,
     add_second_input: int = 1,
     **kwargs,  # unused
 ):
@@ -50,6 +54,66 @@ def get_inputs(
         ),
         attention_mask=torch.ones((batch_size, sequence_length)).to(torch.int64),
     )
+    if (
+        encoder_attention_heads
+        and decoder_attention_heads
+        and encoder_ffn_dim
+        and decoder_ffn_dim
+        and num_hidden_layers
+    ):
+        inputs["past_key_values"] = make_encoder_decoder_cache(
+            make_dynamic_cache(
+                [
+                    (
+                        torch.randn(
+                            batch_size,
+                            encoder_attention_heads,
+                            sequence_length,
+                            encoder_ffn_dim,
+                        ),
+                        torch.randn(
+                            batch_size,
+                            encoder_attention_heads,
+                            sequence_length,
+                            encoder_ffn_dim,
+                        ),
+                    )
+                    for i in range(num_hidden_layers)
+                ]
+            ),
+            make_dynamic_cache(
+                [
+                    (
+                        torch.randn(
+                            batch_size,
+                            decoder_attention_heads,
+                            sequence_length2,
+                            decoder_ffn_dim,
+                        ),
+                        torch.randn(
+                            batch_size,
+                            decoder_attention_heads,
+                            sequence_length2,
+                            decoder_ffn_dim,
+                        ),
+                    )
+                    for i in range(num_hidden_layers)
+                ]
+            ),
+        )
+        cache_length = "cache_length_key"
+        cache_length2 = "cache_length_val"
+        shapes["past_key_values"] = [
+            [
+                [{0: batch, 2: cache_length} for _ in range(num_hidden_layers)],
+                [{0: batch, 2: cache_length} for _ in range(num_hidden_layers)],
+            ],
+            [
+                [{0: batch, 2: cache_length2} for _ in range(num_hidden_layers)],
+                [{0: batch, 2: cache_length2} for _ in range(num_hidden_layers)],
+            ],
+        ]
+
     res = dict(inputs=inputs, dynamic_shapes=shapes)
     if add_second_input:
         assert (
@@ -61,6 +125,12 @@ def get_inputs(
             batch_size=batch_size + 1,
             sequence_length=sequence_length + add_second_input,
             dummy_max_token_id=dummy_max_token_id,
+            sequence_length2=sequence_length2,
+            decoder_attention_heads=decoder_attention_heads,
+            encoder_attention_heads=encoder_attention_heads,
+            encoder_ffn_dim=encoder_ffn_dim,
+            decoder_ffn_dim=decoder_ffn_dim,
+            num_hidden_layers=num_hidden_layers,
             add_second_input=0,
             **kwargs,
         )["inputs"]
@@ -80,4 +150,15 @@ def random_input_kwargs(config: Any) -> Tuple[Dict[str, Any], Callable]:
         sequence_length=30,
         dummy_max_token_id=31999 if config is None else (config.vocab_size - 1),
     )
+    for att in [
+        "decoder_attention_heads",
+        "encoder_attention_heads",
+        "encoder_ffn_dim",
+        "decoder_ffn_dim",
+        "num_hidden_layers",
+    ]:
+        if hasattr(config, att):
+            kwargs[att] = getattr(config, att)
+    kwargs["decoder_ffn_dim"] = kwargs["encoder_ffn_dim"] = 64
+    print(kwargs)
     return kwargs, get_inputs
diff --git a/onnx_diagnostic/tasks/text2text_generation.py b/onnx_diagnostic/tasks/text2text_generation.py
@@ -69,8 +69,8 @@ def get_inputs(
     ), f"Not yet implemented for cls_cache={kwargs['cls_cache']!r}."
     batch = torch.export.Dim("batch", min=1, max=1024)
     seq_length = "seq_length"  # torch.export.Dim("seq_length", min=1, max=4096)
-    cache_length = "cache_length_key"  # torch.export.Dim("cache_length", min=1, max=4096)
-    cache_length2 = "cache_length_val"  # torch.export.Dim("cache_length2", min=1, max=4096)
+    cache_length = "cache_length_key"
+    cache_length2 = "cache_length_val"
 
     shapes = {
         "input_ids": {0: batch, 1: seq_length},
diff --git a/onnx_diagnostic/torch_models/validate.py b/onnx_diagnostic/torch_models/validate.py
@@ -1090,7 +1090,7 @@ def validate_onnx_model(
     """
     import onnxruntime
 
-    def _mk(key):
+    def _mk(key, flavour=flavour):
         return f"{key}_{flavour}" if flavour else key
 
     summary: Dict[str, Any] = {}
@@ -1145,7 +1145,7 @@ def _mk(key):
     )
     sess = _quiet_or_not_quiet(
         quiet,
-        _mk("onnx_ort_create"),
+        _mk("create_onnx_ort"),
         summary,
         data,
         (lambda source=source, providers=providers: cls_runtime(source, providers)),
@@ -1180,7 +1180,7 @@ def _mk(key):
 
         got = _quiet_or_not_quiet(
             quiet,
-            _mk(f"time_onnx_ort_run{suffix}"),
+            _mk(f"run_onnx_ort{suffix}"),
             summary,
             data,
             (lambda sess=sess, feeds=feeds: sess.run(None, feeds)),