better

xadupre · xadupre · commit 182439c54b47 · 2025-03-28T11:21:15.000+01:00
diff --git a/_unittests/ut_torch_models/test_hghub_model.py b/_unittests/ut_torch_models/test_hghub_model.py
@@ -4,7 +4,6 @@
 from onnx_diagnostic.ext_test_case import (
     ExtTestCase,
     hide_stdout,
-    long_test,
     requires_torch,
     requires_transformers,
 )
@@ -91,8 +90,18 @@ def test_get_untrained_model_with_inputs_codellama(self):
         self.assertIn((data["size"], data["n_weights"]), [(410532864, 102633216)])
 
     @hide_stdout()
-    @long_test()
+    def test_get_untrained_model_with_inputs_text2text_generation(self):
+        mid = "sshleifer/tiny-marian-en-de"
+        # mid = "Salesforce/codet5-small"
+        data = get_untrained_model_with_inputs(mid, verbose=1)
+        self.assertIn((data["size"], data["n_weights"]), [(473928, 118482)])
+        model, inputs = data["model"], data["inputs"]
+        raise unittest.SkipTest(f"not wroking for {mid!r}")
+        model(**inputs)
+
+    @hide_stdout()
     def test_get_untrained_model_Ltesting_models(self):
+        # UNHIDE=1 python _unittests/ut_torch_models/test_hghub_model.py -k L -f
         def _diff(c1, c2):
             rows = [f"types {c1.__class__.__name__} <> {c2.__class__.__name__}"]
             for k, v in c1.__dict__.items():
@@ -102,11 +111,22 @@ def _diff(c1, c2):
                     rows.append(f"{k} :: -- {v} ++ {getattr(c2, k, 'MISS')}")
             return "\n".join(rows)
 
-        # UNHIDE=1 LONGTEST=1 python _unittests/ut_torch_models/test_hghub_model.py -k L -f
         for mid in load_models_testing():
             with self.subTest(mid=mid):
+                if mid in {
+                    "hf-internal-testing/tiny-random-MaskFormerForInstanceSegmentation",
+                    "hf-internal-testing/tiny-random-MoonshineForConditionalGeneration",
+                    "fxmarty/pix2struct-tiny-random",
+                    "hf-internal-testing/tiny-random-ViTMSNForImageClassification",
+                    "hf-internal-testing/tiny-random-YolosModel",
+                }:
+                    print(f"-- not implemented yet for {mid!r}")
+                    continue
                 data = get_untrained_model_with_inputs(mid, verbose=1)
                 model, inputs = data["model"], data["inputs"]
+                if mid in {"sshleifer/tiny-marian-en-de"}:
+                    print(f"-- not fully implemented yet for {mid!r}")
+                    continue
                 try:
                     model(**inputs)
                 except Exception as e:
diff --git a/_unittests/ut_torch_models/try_tasks.py b/_unittests/ut_torch_models/try_tasks.py
@@ -29,20 +29,26 @@ def test_image_classiciation(self):
     def test_text2text_generation(self):
         # clear&&NEVERTEST=1 python _unittests/ut_torch_models/try_tasks.py -k text2t
 
+        import torch
         from transformers import RobertaTokenizer, T5ForConditionalGeneration
 
         tokenizer = RobertaTokenizer.from_pretrained("Salesforce/codet5-small")
         model = T5ForConditionalGeneration.from_pretrained("Salesforce/codet5-small")
 
         text = "def greet(user): print(f'hello <extra_id_0>!')"
         input_ids = tokenizer(text, return_tensors="pt").input_ids
+        mask = (
+            torch.tensor([1 for i in range(input_ids.shape[1])])
+            .to(torch.int64)
+            .reshape((1, -1))
+        )
 
         # simply generate a single sequence
         print()
-        print("-- inputs", string_type(input_ids, with_shape=True, with_min_max=True))
         with steel_forward(model):
-            generated_ids = model.generate(input_ids, max_length=100)
-        print("-- outputs", string_type(generated_ids, with_shape=True, with_min_max=True))
+            generated_ids = model.generate(
+                decoder_input_ids=input_ids, attention_mask=mask, max_length=100
+            )
         print(tokenizer.decode(generated_ids[0], skip_special_tokens=True))
 
 
diff --git a/_unittests/ut_xrun_doc/test_helpers.py b/_unittests/ut_xrun_doc/test_helpers.py
@@ -36,7 +36,7 @@
     rename_dynamic_dimensions,
     rename_dynamic_expression,
 )
-from onnx_diagnostic.cache_helpers import make_dynamic_cache
+from onnx_diagnostic.cache_helpers import make_dynamic_cache, make_encoder_decoder_cache
 
 TFLOAT = onnx.TensorProto.FLOAT
 
@@ -164,6 +164,8 @@ def test_flatten(self):
                 },
             ],
         )
+        diff = max_diff(inputs, inputs, flatten=True, verbose=10)
+        self.assertEqual(diff["abs"], 0)
         flat = flatten_object(inputs, drop_keys=True)
         diff = max_diff(inputs, flat, flatten=True, verbose=10)
         self.assertEqual(diff["abs"], 0)
@@ -442,6 +444,32 @@ def test_from_tensor(self):
             convert_endian(proto)
             dtype_to_tensor_dtype(dt)
 
+    @hide_stdout()
+    def test_flatten_encoder_decoder_cache(self):
+        inputs = (
+            torch.rand((3, 4), dtype=torch.float16),
+            [
+                torch.rand((5, 6), dtype=torch.float16),
+                torch.rand((5, 6, 7), dtype=torch.float16),
+                {
+                    "a": torch.rand((2,), dtype=torch.float16),
+                    "cache": make_encoder_decoder_cache(
+                        make_dynamic_cache([(torch.rand((4, 4, 4)), torch.rand((4, 4, 4)))]),
+                        make_dynamic_cache([(torch.rand((5, 5, 5)), torch.rand((5, 5, 5)))]),
+                    ),
+                },
+            ],
+        )
+        diff = max_diff(inputs, inputs, flatten=True, verbose=10)
+        self.assertEqual(diff["abs"], 0)
+        flat = flatten_object(inputs, drop_keys=True)
+        diff = max_diff(inputs, flat, flatten=True, verbose=10)
+        self.assertEqual(diff["abs"], 0)
+        d = string_diff(diff)
+        self.assertIsInstance(d, str)
+        s = string_type(inputs)
+        self.assertIn("EncoderDecoderCache", s)
+
 
 if __name__ == "__main__":
     unittest.main(verbosity=2)
diff --git a/onnx_diagnostic/cache_helpers.py b/onnx_diagnostic/cache_helpers.py
@@ -102,3 +102,15 @@ def make_dynamic_cache(
         for i, (key, value) in enumerate(key_value_pairs):
             cache.update(key, value, i)
         return cache
+
+
+def make_encoder_decoder_cache(
+    self_attention_cache: transformers.cache_utils.DynamicCache,
+    cross_attention_cache: transformers.cache_utils.DynamicCache,
+) -> transformers.cache_utils.EncoderDecoderCache:
+    """
+    Creates an EncoderDecoderCache.
+    """
+    return transformers.cache_utils.EncoderDecoderCache(
+        self_attention_cache=self_attention_cache, cross_attention_cache=cross_attention_cache
+    )
diff --git a/onnx_diagnostic/helpers.py b/onnx_diagnostic/helpers.py
@@ -1192,6 +1192,9 @@ def flatten_object(x: Any, drop_keys: bool = False) -> Any:
     if x.__class__.__name__ == "DynamicCache":
         res = flatten_object(x.key_cache) + flatten_object(x.value_cache)
         return tuple(res)
+    if x.__class__.__name__ == "EncoderDecoderCache":
+        res = flatten_object(x.self_attention_cache) + flatten_object(x.cross_attention_cache)
+        return tuple(res)
     if x.__class__.__name__ == "MambaCache":
         if isinstance(x.conv_states, list):
             res = flatten_object(x.conv_states) + flatten_object(x.ssm_states)
@@ -1735,6 +1738,31 @@ def max_diff(
             f"level={level}"
         )
 
+    if expected.__class__.__name__ == "EncoderDecoderCache":
+        if got.__class__.__name__ == "EncoderDecoderCache":
+            if verbose >= 6:
+                print(
+                    f"[max_diff] EncoderDecoderCache: "
+                    f"{string_type(expected)} ? {string_type(got)}"
+                )
+            return max_diff(
+                [expected.self_attention_cache, expected.cross_attention_cache],
+                [got.self_attention_cache, got.cross_attention_cache],
+                verbose=verbose,
+            )
+        if isinstance(got, tuple) and len(got) == 2:
+            return max_diff(
+                [expected.self_attention_cache, expected.cross_attention_cache],
+                [got[0], got[1]],
+                verbose=verbose,
+            )
+        raise AssertionError(
+            f"EncoderDecoderCache not fully implemented with classes "
+            f"{expected.__class__.__name__!r} and {got.__class__.__name__!r}, "
+            f"and expected={string_type(expected)}, got={string_type(got)},\n"
+            f"level={level}"
+        )
+
     if expected.__class__.__name__ in ("transformers.cache_utils.MambaCache", "MambaCache"):
         if verbose >= 6:
             print(f"[max_diff] MambaCache: {string_type(expected)} ? {string_type(got)}")
diff --git a/onnx_diagnostic/torch_models/hghub/hub_data.py b/onnx_diagnostic/torch_models/hghub/hub_data.py
@@ -105,6 +105,7 @@
 Swin2SRModel,image-feature-extraction
 SwinModel,image-feature-extraction
 Swinv2Model,image-feature-extraction
+T5ForConditionalGeneration,text2text-generation
 TableTransformerModel,image-feature-extraction
 UniSpeechForSequenceClassification,audio-classification
 ViTForImageClassification,image-classification
diff --git a/onnx_diagnostic/torch_models/hghub/model_inputs.py b/onnx_diagnostic/torch_models/hghub/model_inputs.py