From d214c04225b385dbf6ffc2f0fbcbaa849dd5e266 Mon Sep 17 00:00:00 2001
From: xadupre <xadupre@microsoft.com>
Date: Wed, 23 Apr 2025 11:51:40 +0200
Subject: [PATCH 01/10] improves documentation

---
 _doc/conf.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/_doc/conf.py b/_doc/conf.py
index e82b88db..9f895ec5 100644
--- a/_doc/conf.py
+++ b/_doc/conf.py
@@ -12,6 +12,7 @@
     "sphinx.ext.githubpages",
     "sphinx.ext.ifconfig",
     "sphinx.ext.intersphinx",
+    "sphinx.ext.linkcode",
     "sphinx.ext.mathjax",
     "sphinx.ext.viewcode",
     "sphinx.ext.todo",
@@ -63,8 +64,8 @@
 # ]
 
 # The following is used by sphinx.ext.linkcode to provide links to github
-linkcode_resolve = make_linkcode_resolve(
-    "onnx-diagnostic",
+_linkcode_resolve = make_linkcode_resolve(
+    "onnx_diagnostic",
     (
         "https://github.com/sdpython/onnx-diagnostic/"
         "blob/{revision}/{package}/"
@@ -72,6 +73,11 @@
     ),
 )
 
+
+def linkcode_resolve(domain, info):
+    return _linkcode_resolve(domain, info)
+
+
 latex_elements = {
     "papersize": "a4",
     "pointsize": "10pt",

From 87568cb7811bb4788cbad705f8acb037e692f19e Mon Sep 17 00:00:00 2001
From: xadupre <xadupre@microsoft.com>
Date: Thu, 24 Apr 2025 09:41:39 +0200
Subject: [PATCH 02/10] first step for moe

---
 _unittests/ut_tasks/try_tasks.py              | 101 +++++++++++-
 onnx_diagnostic/tasks/__init__.py             |   2 +
 onnx_diagnostic/tasks/mixture_of_expert.py    | 154 ++++++++++++++++++
 .../torch_models/hghub/hub_data.py            |   1 +
 4 files changed, 256 insertions(+), 2 deletions(-)
 create mode 100644 onnx_diagnostic/tasks/mixture_of_expert.py

diff --git a/_unittests/ut_tasks/try_tasks.py b/_unittests/ut_tasks/try_tasks.py
index 8c700a54..d1e8fa4f 100644
--- a/_unittests/ut_tasks/try_tasks.py
+++ b/_unittests/ut_tasks/try_tasks.py
@@ -99,8 +99,8 @@ def test_text2text_generation(self):
         print(tokenizer.decode(generated_ids[0], skip_special_tokens=True))
 
     @never_test()
-    def test_text_generation_phi4(self):
-        # clear&&NEVERTEST=1 python _unittests/ut_tasks/try_tasks.py -k phi4
+    def test_text_generation_phi4_mini(self):
+        # clear&&NEVERTEST=1 python _unittests/ut_tasks/try_tasks.py -k phi4_mini
 
         import torch
         from transformers import RobertaTokenizer, T5ForConditionalGeneration
@@ -124,6 +124,103 @@ def test_text_generation_phi4(self):
             )
         print(tokenizer.decode(generated_ids[0], skip_special_tokens=True))
 
+    @never_test()
+    def test_text_generation_phi4_moe(self):
+        # clear&&NEVERTEST=1 python _unittests/ut_tasks/try_tasks.py -k phi4_moe
+
+        import requests
+        import io
+        from PIL import Image
+        import soundfile as sf
+        from transformers import AutoModelForCausalLM, AutoProcessor, GenerationConfig
+        from urllib.request import urlopen
+
+        # Define model path
+        model_path = "microsoft/Phi-4-multimodal-instruct"
+
+        # Load model and processor
+        processor = AutoProcessor.from_pretrained(model_path, trust_remote_code=True)
+        model = AutoModelForCausalLM.from_pretrained(
+            model_path,
+            device_map="cuda",
+            torch_dtype="auto",
+            trust_remote_code=True,
+            # if you do not use Ampere or later GPUs, change attention to "eager"
+            # _attn_implementation='flash_attention_2',
+            _attn_implementation="eager",
+        ).cuda()
+
+        # Load generation config
+        generation_config = GenerationConfig.from_pretrained(model_path)
+
+        # Define prompt structure
+        user_prompt = "<|user|>"
+        assistant_prompt = "<|assistant|>"
+        prompt_suffix = "<|end|>"
+
+        # Part 1: Image Processing
+        print("\n--- IMAGE PROCESSING ---")
+        image_url = "https://www.ilankelman.org/stopsigns/australia.jpg"
+        prompt = (
+            f"{user_prompt}<|image_1|>What is shown in this image"
+            f"?{prompt_suffix}{assistant_prompt}"
+        )
+        print(f">>> Prompt\n{prompt}")
+
+        # Download and open image
+        image = Image.open(requests.get(image_url, stream=True).raw)
+        inputs = processor(text=prompt, images=image, return_tensors="pt").to("cuda:0")
+
+        # Generate response
+        print("--------- IMAGE PROCESSING ----------")
+        print()
+        with steal_forward(model):
+            generate_ids = model.generate(
+                **inputs,
+                max_new_tokens=1000,
+                generation_config=generation_config,
+            )
+        generate_ids = generate_ids[:, inputs["input_ids"].shape[1] :]
+        response = processor.batch_decode(
+            generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False
+        )[0]
+        print(f">>> Response\n{response}")
+
+        # Part 2: Audio Processing
+        print("\n--- AUDIO PROCESSING ---")
+        audio_url = (
+            "https://upload.wikimedia.org/wikipedia/commons/b/b0/"
+            "Barbara_Sahakian_BBC_Radio4_The_Life_Scientific_29_May_2012_b01j5j24.flac"
+        )
+        speech_prompt = (
+            "Transcribe the audio to text, and then translate the audio to French. "
+            "Use <sep> as a separator between the original transcript and the translation."
+        )
+        prompt = f"{user_prompt}<|audio_1|>{speech_prompt}{prompt_suffix}{assistant_prompt}"
+        print(f">>> Prompt\n{prompt}")
+
+        # Downlowd and open audio file
+        audio, samplerate = sf.read(io.BytesIO(urlopen(audio_url).read()))
+
+        # Process with the model
+        inputs = processor(text=prompt, audios=[(audio, samplerate)], return_tensors="pt").to(
+            "cuda:0"
+        )
+
+        print("--------- AUDIO PROCESSING ----------")
+        print()
+        with steal_forward(model):
+            generate_ids = model.generate(
+                **inputs,
+                max_new_tokens=1000,
+                generation_config=generation_config,
+            )
+        generate_ids = generate_ids[:, inputs["input_ids"].shape[1] :]
+        response = processor.batch_decode(
+            generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False
+        )[0]
+        print(f">>> Response\n{response}")
+
     @never_test()
     def test_imagetext2text_generation(self):
         # clear&&NEVERTEST=1 python _unittests/ut_tasks/try_tasks.py -k etext2t
diff --git a/onnx_diagnostic/tasks/__init__.py b/onnx_diagnostic/tasks/__init__.py
index 3235d255..2e6130ec 100644
--- a/onnx_diagnostic/tasks/__init__.py
+++ b/onnx_diagnostic/tasks/__init__.py
@@ -4,6 +4,7 @@
     fill_mask,
     image_classification,
     image_text_to_text,
+    mixture_of_expert,
     sentence_similarity,
     text_classification,
     text_generation,
@@ -16,6 +17,7 @@
     fill_mask,
     image_classification,
     image_text_to_text,
+    mixture_of_expert,
     sentence_similarity,
     text_classification,
     text_generation,
diff --git a/onnx_diagnostic/tasks/mixture_of_expert.py b/onnx_diagnostic/tasks/mixture_of_expert.py
new file mode 100644
index 00000000..247e5ac9
--- /dev/null
+++ b/onnx_diagnostic/tasks/mixture_of_expert.py
@@ -0,0 +1,154 @@
+from typing import Any, Callable, Dict, Optional, Tuple
+import torch
+from ..helpers.cache_helper import make_dynamic_cache
+from ..helpers.config_helper import update_config, check_hasattr, _pick
+
+__TASK__ = "MoE"
+
+
+def reduce_model_config(config: Any) -> Dict[str, Any]:
+    """Reduces a model size."""
+    kwargs: Dict[str, Any] = {}
+    if hasattr(config, "num_hidden_layers"):
+        config.num_hidden_layers = min(config.num_hidden_layers, 2)
+    if hasattr(config, "vision_config") and hasattr(config.vision_config, "num_hidden_layers"):
+        config.vision_config.num_hidden_layers = min(config.vision_config.num_hidden_layers, 2)
+    if hasattr(config, "audio_processor") and hasattr(
+        config.audio_processor, "num_hidden_layers"
+    ):
+        config.audio_processor.num_hidden_layers = min(
+            config.audio_processor.num_hidden_layers, 2
+        )
+    if hasattr(config, "audio_processor") and hasattr(config.audio_processor, "attention_dim"):
+        config.audio_processor.attention_dim = min(config.audio_processor.attention_dim, 2)
+    update_config(config, kwargs)
+    return kwargs
+
+
+def get_inputs(
+    model: torch.nn.Module,
+    config: Optional[Any],
+    dummy_max_token_id: int,
+    num_key_value_heads: int,
+    num_hidden_layers: int,
+    head_dim: int,
+    width: int,
+    height: int,
+    num_channels: int,
+    batch_size: int = 2,
+    sequence_length: int = 30,
+    sequence_length2: int = 3,
+    n_images: int = 2,
+    dynamic_rope: bool = False,
+    **kwargs,  # unused
+):
+    """
+    Generates input for task ``text-generation``.
+
+    :param model: model to get the missing information
+    :param config: configuration used to generate the model
+    :param head_dim: last dimension of the cache
+    :param dummy_max_token_id: dummy max token id
+    :param batch_size: batch size
+    :param sequence_length: sequence length
+    :param sequence_length2: new sequence length
+    :param n_images: number of images
+    :param width: width of the image
+    :param height: height of the image
+    :param num_channels: number of channels
+    :param dynamic_rope: use dynamic rope (see :class:`transformers.LlamaConfig`)
+    :return: dictionary
+    """
+    batch = torch.export.Dim("batch", min=1, max=1024)
+    seq_length = "seq_length"  # torch.export.Dim("seq_length", min=1, max=4096)
+    cache_length = "cache_length"  # torch.export.Dim("cache_length", min=1, max=4096)
+    images = "images"  # torch.export.Dim("images", min=1, max=4096)
+
+    shapes = {
+        "input_ids": {0: batch, 1: seq_length},
+        "attention_mask": {
+            0: batch,
+            1: "cache+seq",  # cache_length + seq_length
+        },
+        "position_ids": {
+            0: batch,
+            1: "cache+seq",  # cache_length + seq_length
+        },
+        "past_key_values": [
+            [{0: batch, 2: cache_length} for _ in range(num_hidden_layers)],
+            [{0: batch, 2: cache_length} for _ in range(num_hidden_layers)],
+        ],
+        "pixel_values": {0: batch, 1: images},
+        "image_attention_mask": {0: batch, 1: seq_length, 2: images},
+    }
+    inputs = dict(
+        input_ids=torch.randint(0, dummy_max_token_id, (batch_size, sequence_length2)).to(
+            torch.int64
+        ),
+        attention_mask=torch.ones((batch_size, sequence_length + sequence_length2)).to(
+            torch.int64
+        ),
+        position_ids=torch.arange(sequence_length, sequence_length + sequence_length2)
+        .to(torch.int64)
+        .expand((batch_size, -1)),
+        past_key_values=make_dynamic_cache(
+            [
+                (
+                    torch.randn(batch_size, num_key_value_heads, sequence_length, head_dim),
+                    torch.randn(batch_size, num_key_value_heads, sequence_length, head_dim),
+                )
+                for i in range(num_hidden_layers)
+            ]
+        ),
+        image_attention_mask=torch.ones((batch_size, sequence_length2, n_images)).to(
+            torch.int64
+        ),
+        pixel_values=torch.ones((batch_size, n_images, num_channels, width, height)).to(
+            torch.int64
+        ),
+    )
+    return dict(inputs=inputs, dynamic_shapes=shapes)
+
+
+def random_input_kwargs(config: Any) -> Tuple[Dict[str, Any], Callable]:
+    """
+    Inputs kwargs.
+
+    If the configuration is None, the function selects typical dimensions.
+    """
+    if config is not None:
+        check_hasattr(
+            config,
+            "vocab_size",
+            "hidden_size",
+            "num_attention_heads",
+            ("num_key_value_heads", "num_attention_heads"),
+            "intermediate_size",
+            "hidden_size",
+            "vision_config",
+            "audio_processor",
+        )
+        check_hasattr(config.vision_config, "image_size", "num_channels")
+    kwargs = dict(
+        batch_size=2,
+        sequence_length=30,
+        sequence_length2=3,
+        head_dim=(
+            16
+            if config is None
+            else getattr(config, "head_dim", config.hidden_size // config.num_attention_heads)
+        ),
+        dummy_max_token_id=31999 if config is None else config.vocab_size - 1,
+        num_hidden_layers=4 if config is None else config.num_hidden_layers,
+        num_key_value_heads=(
+            8
+            if config is None
+            else _pick(config, "num_key_value_heads", "num_attention_heads")
+        ),
+        intermediate_size=1024 if config is None else config.intermediate_size,
+        hidden_size=512 if config is None else config.hidden_size,
+        width=224 if config is None else config.vision_config.image_size,
+        height=224 if config is None else config.vision_config.image_size,
+        num_channels=3 if config is None else config.vision_config.num_channels,
+    )
+    return kwargs, get_inputs
diff --git a/onnx_diagnostic/torch_models/hghub/hub_data.py b/onnx_diagnostic/torch_models/hghub/hub_data.py
index 5c5590ff..51d5871c 100644
--- a/onnx_diagnostic/torch_models/hghub/hub_data.py
+++ b/onnx_diagnostic/torch_models/hghub/hub_data.py
@@ -76,6 +76,7 @@
     MobileNetV2Model,image-feature-extraction
     MobileViTForImageClassification,image-classification
     ModernBertForMaskedLM,fill-mask
+    Phi4MMForCausalLM,MoE
     MoonshineForConditionalGeneration,automatic-speech-recognition
     MptForCausalLM,text-generation
     MusicgenForConditionalGeneration,text-to-audio

From de3bf91775935290704f2bd9e2ce06f89608abaa Mon Sep 17 00:00:00 2001
From: xadupre <xadupre@microsoft.com>
Date: Thu, 24 Apr 2025 09:42:49 +0200
Subject: [PATCH 03/10] doc

---
 _doc/api/tasks/index.rst             | 1 +
 _doc/api/tasks/mixture_of_expert.rst | 7 +++++++
 2 files changed, 8 insertions(+)
 create mode 100644 _doc/api/tasks/mixture_of_expert.rst

diff --git a/_doc/api/tasks/index.rst b/_doc/api/tasks/index.rst
index c18e7426..49f09048 100644
--- a/_doc/api/tasks/index.rst
+++ b/_doc/api/tasks/index.rst
@@ -36,6 +36,7 @@ Or:
     fill_mask
     image_classification
     image_text_to_text
+    mixture_of_expert
     sentence_similarity
     text_classification
     text_generation
diff --git a/_doc/api/tasks/mixture_of_expert.rst b/_doc/api/tasks/mixture_of_expert.rst
new file mode 100644
index 00000000..2009925c
--- /dev/null
+++ b/_doc/api/tasks/mixture_of_expert.rst
@@ -0,0 +1,7 @@
+
+onnx_diagnostic.tasks.mixture_of_expert
+=======================================
+
+.. automodule:: onnx_diagnostic.tasks.mixture_of_expert
+    :members:
+    :no-undoc-members:

From 13ecbd8e5f9f5b1c6c5270073348ea0b1180834f Mon Sep 17 00:00:00 2001
From: xadupre <xadupre@microsoft.com>
Date: Thu, 24 Apr 2025 11:59:55 +0200
Subject: [PATCH 04/10] disable

---
 _doc/recipes/plot_dynamic_shapes_max.py       |  4 +-
 _unittests/ut_tasks/try_tasks.py              |  4 +
 .../tasks/automatic_speech_recognition.py     |  2 +-
 onnx_diagnostic/tasks/image_text_to_text.py   |  2 +-
 onnx_diagnostic/tasks/mixture_of_expert.py    | 93 ++-----------------
 5 files changed, 15 insertions(+), 90 deletions(-)

diff --git a/_doc/recipes/plot_dynamic_shapes_max.py b/_doc/recipes/plot_dynamic_shapes_max.py
index 880200ae..83844c7c 100644
--- a/_doc/recipes/plot_dynamic_shapes_max.py
+++ b/_doc/recipes/plot_dynamic_shapes_max.py
@@ -10,6 +10,8 @@
 in the exported program is something very aggreessive. Here is a case where
 it takes a wrong decision and how to get around it.
 
+**This bug was fixed after 4/24/2025**.
+
 Wrong Model
 +++++++++++
 """
@@ -183,4 +185,4 @@ def forward(self, x, y, fact):
 # is hidden in a custom operator.
 
 
-doc.plot_legend("dynamic shapes\nworkaround\nmax(d1, d2)", "dynamic shapes", "yellow")
+doc.plot_legend("max(d1, d2)\nwith d1, d2 dimensions", "dynamic shapes", "green")
diff --git a/_unittests/ut_tasks/try_tasks.py b/_unittests/ut_tasks/try_tasks.py
index d1e8fa4f..5c542e22 100644
--- a/_unittests/ut_tasks/try_tasks.py
+++ b/_unittests/ut_tasks/try_tasks.py
@@ -125,6 +125,10 @@ def test_text_generation_phi4_mini(self):
         print(tokenizer.decode(generated_ids[0], skip_special_tokens=True))
 
     @never_test()
+    @unittest.skip(
+        reason="AttributeError: 'Phi4MMModel' object has no attribute "
+        "'prepare_inputs_for_generation'"
+    )
     def test_text_generation_phi4_moe(self):
         # clear&&NEVERTEST=1 python _unittests/ut_tasks/try_tasks.py -k phi4_moe
 
diff --git a/onnx_diagnostic/tasks/automatic_speech_recognition.py b/onnx_diagnostic/tasks/automatic_speech_recognition.py
index 020bdd55..d5ec5475 100644
--- a/onnx_diagnostic/tasks/automatic_speech_recognition.py
+++ b/onnx_diagnostic/tasks/automatic_speech_recognition.py
@@ -36,7 +36,7 @@ def get_inputs(
     **kwargs,  # unused
 ):
     """
-    Generates inputs for task ``text2text-generation``.
+    Generates inputs for task ``automatic-speech-recognition``.
     Example:
 
     ::
diff --git a/onnx_diagnostic/tasks/image_text_to_text.py b/onnx_diagnostic/tasks/image_text_to_text.py
index 0e781982..2e4e5eae 100644
--- a/onnx_diagnostic/tasks/image_text_to_text.py
+++ b/onnx_diagnostic/tasks/image_text_to_text.py
@@ -35,7 +35,7 @@ def get_inputs(
     **kwargs,  # unused
 ):
     """
-    Generates input for task ``text-generation``.
+    Generates input for task ``image-text-to-text``.
 
     :param model: model to get the missing information
     :param config: configuration used to generate the model
diff --git a/onnx_diagnostic/tasks/mixture_of_expert.py b/onnx_diagnostic/tasks/mixture_of_expert.py
index 247e5ac9..3216e3d6 100644
--- a/onnx_diagnostic/tasks/mixture_of_expert.py
+++ b/onnx_diagnostic/tasks/mixture_of_expert.py
@@ -1,7 +1,7 @@
 from typing import Any, Callable, Dict, Optional, Tuple
 import torch
-from ..helpers.cache_helper import make_dynamic_cache
-from ..helpers.config_helper import update_config, check_hasattr, _pick
+# from ..helpers.cache_helper import make_dynamic_cache
+from ..helpers.config_helper import update_config  # , check_hasattr, _pick
 
 __TASK__ = "MoE"
 
@@ -43,7 +43,7 @@ def get_inputs(
     **kwargs,  # unused
 ):
     """
-    Generates input for task ``text-generation``.
+    Generates input for task ``MoE``.
 
     :param model: model to get the missing information
     :param config: configuration used to generate the model
@@ -59,55 +59,7 @@ def get_inputs(
     :param dynamic_rope: use dynamic rope (see :class:`transformers.LlamaConfig`)
     :return: dictionary
     """
-    batch = torch.export.Dim("batch", min=1, max=1024)
-    seq_length = "seq_length"  # torch.export.Dim("seq_length", min=1, max=4096)
-    cache_length = "cache_length"  # torch.export.Dim("cache_length", min=1, max=4096)
-    images = "images"  # torch.export.Dim("images", min=1, max=4096)
-
-    shapes = {
-        "input_ids": {0: batch, 1: seq_length},
-        "attention_mask": {
-            0: batch,
-            1: "cache+seq",  # cache_length + seq_length
-        },
-        "position_ids": {
-            0: batch,
-            1: "cache+seq",  # cache_length + seq_length
-        },
-        "past_key_values": [
-            [{0: batch, 2: cache_length} for _ in range(num_hidden_layers)],
-            [{0: batch, 2: cache_length} for _ in range(num_hidden_layers)],
-        ],
-        "pixel_values": {0: batch, 1: images},
-        "image_attention_mask": {0: batch, 1: seq_length, 2: images},
-    }
-    inputs = dict(
-        input_ids=torch.randint(0, dummy_max_token_id, (batch_size, sequence_length2)).to(
-            torch.int64
-        ),
-        attention_mask=torch.ones((batch_size, sequence_length + sequence_length2)).to(
-            torch.int64
-        ),
-        position_ids=torch.arange(sequence_length, sequence_length + sequence_length2)
-        .to(torch.int64)
-        .expand((batch_size, -1)),
-        past_key_values=make_dynamic_cache(
-            [
-                (
-                    torch.randn(batch_size, num_key_value_heads, sequence_length, head_dim),
-                    torch.randn(batch_size, num_key_value_heads, sequence_length, head_dim),
-                )
-                for i in range(num_hidden_layers)
-            ]
-        ),
-        image_attention_mask=torch.ones((batch_size, sequence_length2, n_images)).to(
-            torch.int64
-        ),
-        pixel_values=torch.ones((batch_size, n_images, num_channels, width, height)).to(
-            torch.int64
-        ),
-    )
-    return dict(inputs=inputs, dynamic_shapes=shapes)
+    raise NotImplementedError(f"get_inputs not yet implemented for task {__TASK__!r}.")
 
 
 def random_input_kwargs(config: Any) -> Tuple[Dict[str, Any], Callable]:
@@ -116,39 +68,6 @@ def random_input_kwargs(config: Any) -> Tuple[Dict[str, Any], Callable]:
 
     If the configuration is None, the function selects typical dimensions.
     """
-    if config is not None:
-        check_hasattr(
-            config,
-            "vocab_size",
-            "hidden_size",
-            "num_attention_heads",
-            ("num_key_value_heads", "num_attention_heads"),
-            "intermediate_size",
-            "hidden_size",
-            "vision_config",
-            "audio_processor",
-        )
-        check_hasattr(config.vision_config, "image_size", "num_channels")
-    kwargs = dict(
-        batch_size=2,
-        sequence_length=30,
-        sequence_length2=3,
-        head_dim=(
-            16
-            if config is None
-            else getattr(config, "head_dim", config.hidden_size // config.num_attention_heads)
-        ),
-        dummy_max_token_id=31999 if config is None else config.vocab_size - 1,
-        num_hidden_layers=4 if config is None else config.num_hidden_layers,
-        num_key_value_heads=(
-            8
-            if config is None
-            else _pick(config, "num_key_value_heads", "num_attention_heads")
-        ),
-        intermediate_size=1024 if config is None else config.intermediate_size,
-        hidden_size=512 if config is None else config.hidden_size,
-        width=224 if config is None else config.vision_config.image_size,
-        height=224 if config is None else config.vision_config.image_size,
-        num_channels=3 if config is None else config.vision_config.num_channels,
+    raise NotImplementedError(
+        f"random_input_kwargs not yet implemented for task {__TASK__!r}."
     )
-    return kwargs, get_inputs

From 1a9b82102adfdef9c8be3aa10e8e3a00e5242105 Mon Sep 17 00:00:00 2001
From: xadupre <xadupre@microsoft.com>
Date: Thu, 24 Apr 2025 12:21:05 +0200
Subject: [PATCH 05/10] add 3.10

---
 .github/workflows/ci.yml                   | 2 +-
 onnx_diagnostic/tasks/mixture_of_expert.py | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 4b03de8f..d75460b4 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -15,7 +15,7 @@ jobs:
     strategy:
       matrix:
         os: [ubuntu-latest]
-        python: ['3.11', '3.12']
+        python: ['3.10', '3.11', '3.12']
         transformers: ['4.48.3', '4.51.3', 'main']
         torch: ['2.6', 'main']
 
diff --git a/onnx_diagnostic/tasks/mixture_of_expert.py b/onnx_diagnostic/tasks/mixture_of_expert.py
index 3216e3d6..d7d302ac 100644
--- a/onnx_diagnostic/tasks/mixture_of_expert.py
+++ b/onnx_diagnostic/tasks/mixture_of_expert.py
@@ -1,5 +1,6 @@
 from typing import Any, Callable, Dict, Optional, Tuple
 import torch
+
 # from ..helpers.cache_helper import make_dynamic_cache
 from ..helpers.config_helper import update_config  # , check_hasattr, _pick
 

From a7f1b3608806b4a27ed7f7681c605ddf942b76d0 Mon Sep 17 00:00:00 2001
From: xadupre <xadupre@microsoft.com>
Date: Thu, 24 Apr 2025 12:37:55 +0200
Subject: [PATCH 06/10] fix ut

---
 _unittests/ut_tasks/try_tasks.py                | 2 +-
 _unittests/ut_torch_models/test_test_helpers.py | 7 ++++++-
 onnx_diagnostic/ext_test_case.py                | 2 +-
 onnx_diagnostic/torch_models/test_helper.py     | 4 +++-
 4 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/_unittests/ut_tasks/try_tasks.py b/_unittests/ut_tasks/try_tasks.py
index 5c542e22..e929ad2a 100644
--- a/_unittests/ut_tasks/try_tasks.py
+++ b/_unittests/ut_tasks/try_tasks.py
@@ -203,7 +203,7 @@ def test_text_generation_phi4_moe(self):
         prompt = f"{user_prompt}<|audio_1|>{speech_prompt}{prompt_suffix}{assistant_prompt}"
         print(f">>> Prompt\n{prompt}")
 
-        # Downlowd and open audio file
+        # Download and open audio file
         audio, samplerate = sf.read(io.BytesIO(urlopen(audio_url).read()))
 
         # Process with the model
diff --git a/_unittests/ut_torch_models/test_test_helpers.py b/_unittests/ut_torch_models/test_test_helpers.py
index 319c74cc..1a2a99d9 100644
--- a/_unittests/ut_torch_models/test_test_helpers.py
+++ b/_unittests/ut_torch_models/test_test_helpers.py
@@ -22,7 +22,10 @@ class TestTestHelper(ExtTestCase):
     def test_get_inputs_for_task(self):
         fcts = supported_tasks()
         for task in self.subloop(sorted(fcts)):
-            data = get_inputs_for_task(task)
+            try:
+                data = get_inputs_for_task(task)
+            except NotImplementedError:
+                continue
             self.assertIsInstance(data, dict)
             self.assertIn("inputs", data)
             self.assertIn("dynamic_shapes", data)
@@ -99,9 +102,11 @@ def test_validate_model_custom(self):
             patch=True,
             stop_if_static=2 if pv.Version(torch.__version__) > pv.Version("2.6.1") else 0,
             optimization="default",
+            quiet=False,
         )
         self.assertIsInstance(summary, dict)
         self.assertIsInstance(data, dict)
+        self.assertIn("disc_onnx_ort_run_abs", summary)
         self.assertLess(summary["disc_onnx_ort_run_abs"], 1e-4)
         onnx_filename = data["onnx_filename"]
         output_path = f"{onnx_filename}.ortopt.onnx"
diff --git a/onnx_diagnostic/ext_test_case.py b/onnx_diagnostic/ext_test_case.py
index 991f625d..39d9845e 100644
--- a/onnx_diagnostic/ext_test_case.py
+++ b/onnx_diagnostic/ext_test_case.py
@@ -461,7 +461,7 @@ def requires_sklearn(version: str, msg: str = "") -> Callable:
     return lambda x: x
 
 
-def requires_experimental(version: str = "", msg: str = "") -> Callable:
+def requires_experimental(version: str = "0.0.0", msg: str = "") -> Callable:
     """Skips a unit test if :epkg:`experimental-experiment` is not recent enough."""
     import packaging.version as pv
 
diff --git a/onnx_diagnostic/torch_models/test_helper.py b/onnx_diagnostic/torch_models/test_helper.py
index ab885aa8..a39266c2 100644
--- a/onnx_diagnostic/torch_models/test_helper.py
+++ b/onnx_diagnostic/torch_models/test_helper.py
@@ -521,7 +521,9 @@ def validate_model(
         if verbose:
             print("[validate_model] done (dump)")
 
-    if not exporter or not exporter.startswith(("onnx-", "custom-")):
+    if not exporter or (
+        not exporter.startswith(("onnx-", "custom-")) and exporter != "custom"
+    ):
         if verbose:
             print("[validate_model] -- done (final)")
         if dump_stats:

From f2e038cf5b4e5c9945148849927897f02146de0b Mon Sep 17 00:00:00 2001
From: xadupre <xadupre@microsoft.com>
Date: Thu, 24 Apr 2025 12:47:54 +0200
Subject: [PATCH 07/10] exclude

---
 .github/workflows/ci.yml                             | 12 ++++++++++--
 .../test_patch_serialization.py                      |  2 +-
 2 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index d75460b4..69cce2f9 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -17,8 +17,16 @@ jobs:
         os: [ubuntu-latest]
         python: ['3.10', '3.11', '3.12']
         transformers: ['4.48.3', '4.51.3', 'main']
-        torch: ['2.6', 'main']
-
+        torch: ['2.6', '2.7', 'main']
+      exclude:
+        - python: '3.10'
+          transformers: 'main'
+        - python: '3.10'
+          torch: '2.7'
+        - python: '3.11'
+          transformers: '4.51.3'
+        - python: '3.11'
+          torch: '2.7'
     steps:
       - uses: actions/checkout@v3
 
diff --git a/_unittests/ut_torch_export_patches/test_patch_serialization.py b/_unittests/ut_torch_export_patches/test_patch_serialization.py
index e4d207a3..619bd76d 100644
--- a/_unittests/ut_torch_export_patches/test_patch_serialization.py
+++ b/_unittests/ut_torch_export_patches/test_patch_serialization.py
@@ -175,7 +175,7 @@ def test_base_sliding_window_cache_unflatten_flatten(self):
             self.assertEqualAny([cache], cache2)
 
     @ignore_warnings(UserWarning)
-    @requires_torch("2.7")
+    @requires_torch("2.8")
     def test_sliding_window_cache_export(self):
         class Model(torch.nn.Module):
             def forward(self, cache):

From c2bec0942da1ddb6e76c861ced3fc6d25c3c68a2 Mon Sep 17 00:00:00 2001
From: xadupre <xadupre@microsoft.com>
Date: Thu, 24 Apr 2025 12:51:24 +0200
Subject: [PATCH 08/10] ci

---
 .github/workflows/ci.yml | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 69cce2f9..c6ad7dbc 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -18,15 +18,6 @@ jobs:
         python: ['3.10', '3.11', '3.12']
         transformers: ['4.48.3', '4.51.3', 'main']
         torch: ['2.6', '2.7', 'main']
-      exclude:
-        - python: '3.10'
-          transformers: 'main'
-        - python: '3.10'
-          torch: '2.7'
-        - python: '3.11'
-          transformers: '4.51.3'
-        - python: '3.11'
-          torch: '2.7'
     steps:
       - uses: actions/checkout@v3
 

From f38681a90d9589bf0e8e783baacf53daa445f789 Mon Sep 17 00:00:00 2001
From: xadupre <xadupre@microsoft.com>
Date: Thu, 24 Apr 2025 13:31:50 +0200
Subject: [PATCH 09/10] update ci

---
 .github/workflows/ci.yml | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index c6ad7dbc..318371c2 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -18,6 +18,15 @@ jobs:
         python: ['3.10', '3.11', '3.12']
         transformers: ['4.48.3', '4.51.3', 'main']
         torch: ['2.6', '2.7', 'main']
+        exclude:
+          - python: '3.10'
+            transformers: 'main'
+          - python: '3.10'
+            torch: '2.7'
+          - python: '3.11'
+            transformers: '4.51.3'
+          - python: '3.11'
+            torch: '2.7'
     steps:
       - uses: actions/checkout@v3
 

From 2e92eda22b04c0aa3b5a20e7ac46fae2f5679186 Mon Sep 17 00:00:00 2001
From: xadupre <xadupre@microsoft.com>
Date: Thu, 24 Apr 2025 13:44:50 +0200
Subject: [PATCH 10/10] add feature extraction

---
 .github/workflows/ci.yml                      |  2 +
 _doc/api/tasks/feature_extraction.rst         |  7 ++
 _doc/api/tasks/index.rst                      |  1 +
 _unittests/ut_tasks/test_tasks.py             | 12 ++++
 _unittests/ut_tasks/try_tasks.py              | 16 +++++
 onnx_diagnostic/tasks/__init__.py             |  2 +
 onnx_diagnostic/tasks/feature_extraction.py   | 65 +++++++++++++++++++
 .../torch_models/hghub/hub_data.py            |  1 +
 8 files changed, 106 insertions(+)
 create mode 100644 _doc/api/tasks/feature_extraction.rst
 create mode 100644 onnx_diagnostic/tasks/feature_extraction.py

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 318371c2..705bba35 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -27,6 +27,8 @@ jobs:
             transformers: '4.51.3'
           - python: '3.11'
             torch: '2.7'
+          - python: '3.12'
+            torch: '2.6'
     steps:
       - uses: actions/checkout@v3
 
diff --git a/_doc/api/tasks/feature_extraction.rst b/_doc/api/tasks/feature_extraction.rst
new file mode 100644
index 00000000..ffb9ca3e
--- /dev/null
+++ b/_doc/api/tasks/feature_extraction.rst
@@ -0,0 +1,7 @@
+
+onnx_diagnostic.tasks.feature_extraction
+========================================
+
+.. automodule:: onnx_diagnostic.tasks.feature_extraction
+    :members:
+    :no-undoc-members:
diff --git a/_doc/api/tasks/index.rst b/_doc/api/tasks/index.rst
index 49f09048..952dbee2 100644
--- a/_doc/api/tasks/index.rst
+++ b/_doc/api/tasks/index.rst
@@ -34,6 +34,7 @@ Or:
 
     automatic_speech_recognition
     fill_mask
+    feature_extraction
     image_classification
     image_text_to_text
     mixture_of_expert
diff --git a/_unittests/ut_tasks/test_tasks.py b/_unittests/ut_tasks/test_tasks.py
index dc748be7..1ff3bdf9 100644
--- a/_unittests/ut_tasks/test_tasks.py
+++ b/_unittests/ut_tasks/test_tasks.py
@@ -116,6 +116,18 @@ def test_fill_mask(self):
                 model, (), kwargs=inputs, dynamic_shapes=use_dyn_not_str(ds), strict=False
             )
 
+    @hide_stdout()
+    def test_feature_extraction(self):
+        mid = "facebook/bart-base"
+        data = get_untrained_model_with_inputs(mid, verbose=1)
+        self.assertIn((data["size"], data["n_weights"]), [(557681664, 139420416)])
+        model, inputs, ds = data["model"], data["inputs"], data["dynamic_shapes"]
+        model(**inputs)
+        with bypass_export_some_errors(patch_transformers=True, verbose=10):
+            torch.export.export(
+                model, (), kwargs=inputs, dynamic_shapes=use_dyn_not_str(ds), strict=False
+            )
+
     @hide_stdout()
     def test_text_classification(self):
         mid = "Intel/bert-base-uncased-mrpc"
diff --git a/_unittests/ut_tasks/try_tasks.py b/_unittests/ut_tasks/try_tasks.py
index e929ad2a..77a6f088 100644
--- a/_unittests/ut_tasks/try_tasks.py
+++ b/_unittests/ut_tasks/try_tasks.py
@@ -338,6 +338,22 @@ def test_fill_mask(self):
         output = model(**encoded_input)
         print("-- outputs", string_type(output, with_shape=True, with_min_max=True))
 
+    @never_test()
+    def test_feature_extraction(self):
+        # clear&&NEVERTEST=1 python _unittests/ut_tasks/try_tasks.py -k feature_ex
+        # https://huggingface.co/google-bert/bert-base-multilingual-cased
+
+        from transformers import BartTokenizer, BartModel
+
+        tokenizer = BartTokenizer.from_pretrained("facebook/bart-base")
+        model = BartModel.from_pretrained("facebook/bart-base")
+        text = "Replace me by any text you'd like."
+        encoded_input = tokenizer(text, return_tensors="pt")
+        print()
+        print("-- inputs", string_type(encoded_input, with_shape=True, with_min_max=True))
+        output = model(**encoded_input)
+        print("-- outputs", string_type(output, with_shape=True, with_min_max=True))
+
     @never_test()
     def test_text_classification(self):
         # clear&&NEVERTEST=1 python _unittests/ut_tasks/try_tasks.py -k text_cl
diff --git a/onnx_diagnostic/tasks/__init__.py b/onnx_diagnostic/tasks/__init__.py
index 2e6130ec..b9d80857 100644
--- a/onnx_diagnostic/tasks/__init__.py
+++ b/onnx_diagnostic/tasks/__init__.py
@@ -1,6 +1,7 @@
 from typing import Any, Callable, Dict, List, Tuple
 from . import (
     automatic_speech_recognition,
+    feature_extraction,
     fill_mask,
     image_classification,
     image_text_to_text,
@@ -14,6 +15,7 @@
 
 __TASKS__ = [
     automatic_speech_recognition,
+    feature_extraction,
     fill_mask,
     image_classification,
     image_text_to_text,
diff --git a/onnx_diagnostic/tasks/feature_extraction.py b/onnx_diagnostic/tasks/feature_extraction.py
new file mode 100644
index 00000000..510a9f1f
--- /dev/null
+++ b/onnx_diagnostic/tasks/feature_extraction.py
@@ -0,0 +1,65 @@
+from typing import Any, Callable, Dict, Optional, Tuple
+import torch
+from ..helpers.config_helper import update_config, check_hasattr
+
+__TASK__ = "feature-extraction"
+
+
+def reduce_model_config(config: Any) -> Dict[str, Any]:
+    """Reduces a model size."""
+    check_hasattr(config, "num_attention_heads", "num_hidden_layers")
+    kwargs = dict(
+        num_hidden_layers=min(config.num_hidden_layers, 2),
+        num_attention_heads=min(config.num_attention_heads, 4),
+    )
+    update_config(config, kwargs)
+    return kwargs
+
+
+def get_inputs(
+    model: torch.nn.Module,
+    config: Optional[Any],
+    batch_size: int,
+    sequence_length: int,
+    dummy_max_token_id: int,
+    **kwargs,  # unused
+):
+    """
+    Generates inputs for task ``feature-extraction``.
+    Example:
+
+    ::
+
+        input_ids:T7s1x13[101,72654:A16789.23076923077],
+        token_type_ids:T7s1x13[0,0:A0.0],
+        attention_mask:T7s1x13[1,1:A1.0])
+    """
+    batch = torch.export.Dim("batch", min=1, max=1024)
+    seq_length = "sequence_length"
+    shapes = {
+        "input_ids": {0: batch, 1: seq_length},
+        "attention_mask": {0: batch, 1: seq_length},
+    }
+    inputs = dict(
+        input_ids=torch.randint(0, dummy_max_token_id, (batch_size, sequence_length)).to(
+            torch.int64
+        ),
+        attention_mask=torch.ones((batch_size, sequence_length)).to(torch.int64),
+    )
+    return dict(inputs=inputs, dynamic_shapes=shapes)
+
+
+def random_input_kwargs(config: Any) -> Tuple[Dict[str, Any], Callable]:
+    """
+    Inputs kwargs.
+
+    If the configuration is None, the function selects typical dimensions.
+    """
+    if config is not None:
+        check_hasattr(config, "vocab_size")
+    kwargs = dict(
+        batch_size=2,
+        sequence_length=30,
+        dummy_max_token_id=31999 if config is None else (config.vocab_size - 1),
+    )
+    return kwargs, get_inputs
diff --git a/onnx_diagnostic/torch_models/hghub/hub_data.py b/onnx_diagnostic/torch_models/hghub/hub_data.py
index 51d5871c..5ec2abf2 100644
--- a/onnx_diagnostic/torch_models/hghub/hub_data.py
+++ b/onnx_diagnostic/torch_models/hghub/hub_data.py
@@ -13,6 +13,7 @@
     ASTModel,feature-extraction
     AlbertModel,feature-extraction
     BeitForImageClassification,image-classification
+    BartModel,feature-extraction
     BertForMaskedLM,fill-mask
     BertForSequenceClassification,text-classification
     BertModel,sentence-similarity