sdpython · sdpython · Apr 15, 2025 · Apr 15, 2025 · Apr 15, 2025 · Apr 15, 2025
diff --git a/_doc/api/tasks/automatic_speech_recognition.rst b/_doc/api/tasks/automatic_speech_recognition.rst
@@ -1,6 +1,6 @@
 
-onnx_diagnostic.export.automatic_speech_recognition
-===================================================
+onnx_diagnostic.tasks.automatic_speech_recognition
+==================================================
 
 .. automodule:: onnx_diagnostic.tasks.automatic_speech_recognition
     :members:

diff --git a/_doc/api/tasks/fill_mask.rst b/_doc/api/tasks/fill_mask.rst
@@ -1,6 +1,6 @@
 
-onnx_diagnostic.export.fill_mask
-================================
+onnx_diagnostic.tasks.fill_mask
+===============================
 
 .. automodule:: onnx_diagnostic.tasks.fill_mask
     :members:

diff --git a/_doc/api/tasks/image_classification.rst b/_doc/api/tasks/image_classification.rst
@@ -1,6 +1,6 @@
 
-onnx_diagnostic.export.image_classification
-===========================================
+onnx_diagnostic.tasks.image_classification
+==========================================
 
 .. automodule:: onnx_diagnostic.tasks.image_classification
     :members:

diff --git a/_doc/api/tasks/index.rst b/_doc/api/tasks/index.rst
@@ -9,6 +9,7 @@ onnx_diagnostic.tasks
     fill_mask
     image_classification
     image_text_to_text
+    sentence_similarity
     text_classification
     text_generation
     text2text_generation

diff --git a/_doc/api/tasks/sentence_similarity.rst b/_doc/api/tasks/sentence_similarity.rst
@@ -0,0 +1,7 @@
+
+onnx_diagnostic.tasks.sentence_similarity
+=========================================
+
+.. automodule:: onnx_diagnostic.tasks.sentence_similarity
+    :members:
+    :no-undoc-members:
diff --git a/_doc/api/tasks/text2text_generation.rst b/_doc/api/tasks/text2text_generation.rst
@@ -1,6 +1,6 @@
 
-onnx_diagnostic.export.text2text_generation
-===========================================
+onnx_diagnostic.tasks.text2text_generation
+==========================================
 
 .. automodule:: onnx_diagnostic.tasks.text2text_generation
     :members:

diff --git a/_doc/api/tasks/text_classification.rst b/_doc/api/tasks/text_classification.rst
@@ -1,6 +1,6 @@
 
-onnx_diagnostic.export.text_classification
-==========================================
+onnx_diagnostic.tasks.text_classification
+=========================================
 
 .. automodule:: onnx_diagnostic.tasks.text_classification
     :members:

diff --git a/_doc/api/tasks/text_generation.rst b/_doc/api/tasks/text_generation.rst
@@ -1,6 +1,6 @@
 
-onnx_diagnostic.export.text_generation
-======================================
+onnx_diagnostic.tasks.text_generation
+=====================================
 
 .. automodule:: onnx_diagnostic.tasks.text_generation
     :members:

diff --git a/_doc/api/tasks/zero_shot_image_classification.rst b/_doc/api/tasks/zero_shot_image_classification.rst
@@ -1,6 +1,6 @@
 
-onnx_diagnostic.export.zero_shot_image_classification
-=====================================================
+onnx_diagnostic.tasks.zero_shot_image_classification
+====================================================
 
 .. automodule:: onnx_diagnostic.tasks.zero_shot_image_classification
     :members:

diff --git a/_doc/examples/plot_export_tiny_llm.py b/_doc/examples/plot_export_tiny_llm.py
@@ -31,7 +31,7 @@
 import transformers
 from onnx_diagnostic import doc
 from onnx_diagnostic.helpers import string_type
-from onnx_diagnostic.helpers.torch_test_helper import steel_forward
+from onnx_diagnostic.helpers.torch_test_helper import steal_forward
 from onnx_diagnostic.torch_models.llms import get_tiny_llm
 
 
@@ -77,9 +77,9 @@ def _forward_(*args, _f=None, **kwargs):
 model.forward = keep_model_forward
 
 # %%
-# Another syntax with :func:`onnx_diagnostic.helpers.torch_test_helper.steel_forward`.
+# Another syntax with :func:`onnx_diagnostic.helpers.torch_test_helper.steal_forward`.
 
-with steel_forward(model):
+with steal_forward(model):
     model.generate(inputs, max_length=50, temperature=1, top_k=50, top_p=0.95, do_sample=True)
 
 # %%

diff --git a/_unittests/ut_helpers/test_torch_test_helper.py b/_unittests/ut_helpers/test_torch_test_helper.py
@@ -8,7 +8,7 @@
     dummy_llm,
     to_numpy,
     is_torchdynamo_exporting,
-    steel_forward,
+    steal_forward,
     replace_string_by_dynamic,
     to_any,
     torch_deepcopy,
@@ -43,14 +43,14 @@ def test_to_numpy(self):
         self.assertEqual(a.dtype, ml_dtypes.bfloat16)
 
     @hide_stdout()
-    def test_steel_forward(self):
+    def test_steal_forward(self):
         class Model(torch.nn.Module):
             def forward(self, x, y):
                 return x + y
 
         inputs = torch.rand(3, 4), torch.rand(3, 4)
         model = Model()
-        with steel_forward(model):
+        with steal_forward(model):
             model(*inputs)
 
     def test_replace_string_by_dynamic(self):

diff --git a/_unittests/ut_tasks/test_tasks.py b/_unittests/ut_tasks/test_tasks.py
@@ -9,7 +9,6 @@ class TestTasks(ExtTestCase):
     @hide_stdout()
     def test_text2text_generation(self):
         mid = "sshleifer/tiny-marian-en-de"
-        # mid = "Salesforce/codet5-small"
         data = get_untrained_model_with_inputs(mid, verbose=1)
         self.assertIn((data["size"], data["n_weights"]), [(473928, 118482)])
         model, inputs = data["model"], data["inputs"]
@@ -85,7 +84,6 @@ def test_automatic_speech_recognition(self):
     @hide_stdout()
     def test_imagetext2text_generation(self):
         mid = "HuggingFaceM4/tiny-random-idefics"
-        # mid = "Salesforce/codet5-small"
         data = get_untrained_model_with_inputs(mid, verbose=1)
         self.assertIn((data["size"], data["n_weights"]), [(12742888, 3185722)])
         model, inputs = data["model"], data["inputs"]
@@ -94,7 +92,6 @@ def test_imagetext2text_generation(self):
     @hide_stdout()
     def test_fill_mask(self):
         mid = "google-bert/bert-base-multilingual-cased"
-        # mid = "Salesforce/codet5-small"
         data = get_untrained_model_with_inputs(mid, verbose=1)
         self.assertIn((data["size"], data["n_weights"]), [(428383212, 107095803)])
         model, inputs = data["model"], data["inputs"]
@@ -103,12 +100,28 @@ def test_fill_mask(self):
     @hide_stdout()
     def test_text_classification(self):
         mid = "Intel/bert-base-uncased-mrpc"
-        # mid = "Salesforce/codet5-small"
         data = get_untrained_model_with_inputs(mid, verbose=1)
         self.assertIn((data["size"], data["n_weights"]), [(154420232, 38605058)])
         model, inputs = data["model"], data["inputs"]
         model(**inputs)
 
+    @hide_stdout()
+    def test_sentence_similary(self):
+        mid = "sentence-transformers/all-MiniLM-L6-v1"
+        data = get_untrained_model_with_inputs(mid, verbose=1)
+        self.assertIn((data["size"], data["n_weights"]), [(62461440, 15615360)])
+        model, inputs = data["model"], data["inputs"]
+        model(**inputs)
+
+    @hide_stdout()
+    def test_falcon_mamba_dev(self):
+        mid = "tiiuae/falcon-mamba-tiny-dev"
+        data = get_untrained_model_with_inputs(mid, verbose=1)
+        model, inputs = data["model"], data["inputs"]
+        print(self.string_type(inputs, with_shape=True))
+        model(**inputs)
+        self.assertIn((data["size"], data["n_weights"]), [(138640384, 34660096)])
+
 
 if __name__ == "__main__":
     unittest.main(verbosity=2)
diff --git a/_unittests/ut_tasks/try_tasks.py b/_unittests/ut_tasks/try_tasks.py
@@ -1,7 +1,7 @@
 import unittest
 from onnx_diagnostic.ext_test_case import ExtTestCase, never_test
 from onnx_diagnostic.helpers import string_type
-from onnx_diagnostic.helpers.torch_test_helper import steel_forward
+from onnx_diagnostic.helpers.torch_test_helper import steal_forward
 
 
 class TestHuggingFaceHubModel(ExtTestCase):
@@ -92,7 +92,7 @@ def test_text2text_generation(self):
 
         # simply generate a single sequence
         print()
-        with steel_forward(model):
+        with steal_forward(model):
             generated_ids = model.generate(
                 decoder_input_ids=input_ids, attention_mask=mask, max_length=100
             )
@@ -121,7 +121,7 @@ def test_imagetext2text_generation(self):
             ["<image>", "<fake_token_around_image>"], add_special_tokens=False
         ).input_ids
         print()
-        with steel_forward(model):
+        with steal_forward(model):
             generated_ids = model.generate(
                 **inputs, max_new_tokens=10, bad_words_ids=bad_words_ids
             )
@@ -184,7 +184,7 @@ def test_automatic_speech_recognition(self):
 
         # generate token ids
         print()
-        with steel_forward(model):
+        with steal_forward(model):
             predicted_ids = model.generate(
                 input_features, forced_decoder_ids=forced_decoder_ids
             )
@@ -236,6 +236,129 @@ def test_text_classification(self):
         encoded_input["input_ids"][0]
         tokenizer.convert_ids_to_tokens(encoded_input["input_ids"][0])
 
+    @never_test()
+    def test_sentence_similary(self):
+        # clear&&NEVERTEST=1 python _unittests/ut_tasks/try_tasks.py -k ce_sim
+        # https://huggingface.co/sentence-transformers/all-MiniLM-L6-v1
+
+        from transformers import AutoTokenizer, AutoModel
+        import torch
+        import torch.nn.functional as F
+
+        # Mean Pooling - Take attention mask into account for correct averaging
+        def mean_pooling(model_output, attention_mask):
+            token_embeddings = model_output[
+                0
+            ]  # First element of model_output contains all token embeddings
+            input_mask_expanded = (
+                attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
+            )
+            return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(
+                input_mask_expanded.sum(1), min=1e-9
+            )
+
+        # Sentences we want sentence embeddings for
+        sentences = ["This is an example sentence", "Each sentence is converted"]
+
+        # Load model from HuggingFace Hub
+        tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v1")
+        model = AutoModel.from_pretrained("sentence-transformers/all-MiniLM-L6-v1")
+
+        # Tokenize sentences
+        encoded_input = tokenizer(
+            sentences, padding=True, truncation=True, return_tensors="pt"
+        )
+
+        # Compute token embeddings
+        with torch.no_grad():
+            print()
+            print("-- inputs", string_type(encoded_input, with_shape=True, with_min_max=True))
+            model_output = model(**encoded_input)
+            print("-- outputs", string_type(model_output, with_shape=True, with_min_max=True))
+
+        # Perform pooling
+        sentence_embeddings = mean_pooling(model_output, encoded_input["attention_mask"])
+
+        # Normalize embeddings
+        sentence_embeddings = F.normalize(sentence_embeddings, p=2, dim=1)
+
+        print("Sentence embeddings:")
+        print(sentence_embeddings)
+
+    @never_test()
+    def test_falcon_mamba_dev(self):
+        # clear&&NEVERTEST=1 python _unittests/ut_tasks/try_tasks.py -k falcon_mamba_dev
+        # https://huggingface.co/tiiuae/falcon-mamba-tiny-dev
+
+        from transformers import AutoTokenizer
+        import transformers
+        import torch
+
+        model = "tiiuae/falcon-mamba-tiny-dev"
+
+        tokenizer = AutoTokenizer.from_pretrained(model)
+        pipeline = transformers.pipeline(
+            "text-generation",
+            model=model,
+            tokenizer=tokenizer,
+            torch_dtype=torch.bfloat16,
+            trust_remote_code=True,
+            device_map="auto",
+        )
+        print()
+        with steal_forward(pipeline.model):
+            sequences = pipeline(
+                "Girafatron is obsessed with giraffes, "
+                "the most glorious animal on the face of this Earth. "
+                "Giraftron believes all other animals are irrelevant "
+                "when compared to the glorious majesty of the giraffe."
+                "\nDaniel: Hello, Girafatron!\nGirafatron:",
+                max_length=200,
+                do_sample=True,
+                top_k=10,
+                num_return_sequences=1,
+                eos_token_id=tokenizer.eos_token_id,
+            )
+        for seq in sequences:
+            print(f"Result: {seq['generated_text']}")
+
+    @never_test()
+    def test_falcon_mamba_7b(self):
+        # clear&&NEVERTEST=1 python _unittests/ut_tasks/try_tasks.py -k falcon_mamba_7b
+        # https://huggingface.co/tiiuae/falcon-mamba-7b
+
+        from transformers import AutoTokenizer
+        import transformers
+        import torch
+
+        model = "tiiuae/falcon-mamba-7b"
+
+        tokenizer = AutoTokenizer.from_pretrained(model)
+        pipeline = transformers.pipeline(
+            "text-generation",
+            model=model,
+            tokenizer=tokenizer,
+            torch_dtype=torch.bfloat16,
+            trust_remote_code=True,
+            device_map="auto",
+        )
+        print()
+        with steal_forward(pipeline.model):
+            sequences = pipeline(
+                "Girafatron is obsessed with giraffes, "
+                "the most glorious animal on the face of this Earth. "
+                "Giraftron believes all other animals are irrelevant "
+                "when compared to the glorious majesty of the giraffe."
+                "\nDaniel: Hello, Girafatron!\nGirafatron:",
+                max_length=200,
+                do_sample=True,
+                top_k=10,
+                num_return_sequences=1,
+                eos_token_id=tokenizer.eos_token_id,
+            )
+        for seq in sequences:
+            print(f"Result: {seq['generated_text']}")
+
 
 if __name__ == "__main__":
     unittest.main(verbosity=2)
diff --git a/_unittests/ut_xrun_doc/test_documentation_examples.py b/_unittests/ut_xrun_doc/test_documentation_examples.py
@@ -54,7 +54,10 @@ def run_test(self, fold: str, name: str, verbose=0) -> int:
                     # dot not installed, this part
                     # is tested in onnx framework
                     raise unittest.SkipTest(f"failed: {name!r} due to missing dot.")
-                if "We couldn't connect to 'https://huggingface.co'" in st:
+                if (
+                    "We couldn't connect to 'https://huggingface.co'" in st
+                    or "Cannot access content at: https://huggingface.co/" in st
+                ):
                     raise unittest.SkipTest(f"Connectivity issues due to\n{err}")
                 raise AssertionError(  # noqa: B904
                     "Example '{}' (cmd: {} - exec_prefix='{}') "

diff --git a/onnx_diagnostic/helpers/cache_helper.py b/onnx_diagnostic/helpers/cache_helper.py
@@ -136,3 +136,35 @@ def make_encoder_decoder_cache(
     return transformers.cache_utils.EncoderDecoderCache(
         self_attention_cache=self_attention_cache, cross_attention_cache=cross_attention_cache
     )
+
+
+def make_mamba_cache(
+    key_value_pairs: List[Tuple[torch.Tensor, torch.Tensor]],
+) -> transformers.cache_utils.MambaCache:
+    "Creates a :class:`transformers.cache_utils.MambaCache`."
+
+    class _config:
+        def __init__(self):
+            self.intermediate_size = key_value_pairs[0][0].shape[1]
+            self.conv_kernel = key_value_pairs[0][0].shape[-1]
+            self.state_size = key_value_pairs[0][1].shape[-1]
+            self.num_hidden_layers = len(key_value_pairs)
+            self.dtype = key_value_pairs[0][0].dtype
+
+    cache = transformers.cache_utils.MambaCache(
+        _config(),
+        max_batch_size=key_value_pairs[0][0].shape[0],
+        device=key_value_pairs[0][0].device,
+    )
+    for i in range(len(key_value_pairs)):
+        assert cache.conv_states[i].shape == key_value_pairs[i][0].shape, (
+            f"Shape mismatch, expected {cache.conv_states[i].shape}, "
+            f"got {key_value_pairs[i][0].shape}"
+        )
+        cache.conv_states[i][:, :, :] = key_value_pairs[i][0]
+        assert cache.ssm_states[i].shape == key_value_pairs[i][1].shape, (
+            f"Shape mismatch, expected {cache.ssm_states[i].shape}, "
+            f"got {key_value_pairs[i][1].shape}"
+        )
+        cache.ssm_states[i][:, :, :] = key_value_pairs[i][1]
+    return cache