improves documentation, introduce mixture of expert as a task, add python 3.10 (#71)

xadupre · web-flow · commit 345b783f6379 · 2025-04-24T13:58:30.000+02:00
* improves documentation

* first step for moe

* doc

* disable

* add 3.10

* fix ut

* exclude

* ci

* update ci

* add feature extraction
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -15,10 +15,20 @@ jobs:
     strategy:
       matrix:
         os: [ubuntu-latest]
-        python: ['3.11', '3.12']
+        python: ['3.10', '3.11', '3.12']
         transformers: ['4.48.3', '4.51.3', 'main']
-        torch: ['2.6', 'main']
-
+        torch: ['2.6', '2.7', 'main']
+        exclude:
+          - python: '3.10'
+            transformers: 'main'
+          - python: '3.10'
+            torch: '2.7'
+          - python: '3.11'
+            transformers: '4.51.3'
+          - python: '3.11'
+            torch: '2.7'
+          - python: '3.12'
+            torch: '2.6'
     steps:
       - uses: actions/checkout@v3
 
diff --git a/_doc/api/tasks/feature_extraction.rst b/_doc/api/tasks/feature_extraction.rst
@@ -0,0 +1,7 @@
+
+onnx_diagnostic.tasks.feature_extraction
+========================================
+
+.. automodule:: onnx_diagnostic.tasks.feature_extraction
+    :members:
+    :no-undoc-members:
diff --git a/_doc/api/tasks/index.rst b/_doc/api/tasks/index.rst
@@ -34,8 +34,10 @@ Or:
 
     automatic_speech_recognition
     fill_mask
+    feature_extraction
     image_classification
     image_text_to_text
+    mixture_of_expert
     sentence_similarity
     text_classification
     text_generation
diff --git a/_doc/api/tasks/mixture_of_expert.rst b/_doc/api/tasks/mixture_of_expert.rst
@@ -0,0 +1,7 @@
+
+onnx_diagnostic.tasks.mixture_of_expert
+=======================================
+
+.. automodule:: onnx_diagnostic.tasks.mixture_of_expert
+    :members:
+    :no-undoc-members:
diff --git a/_doc/conf.py b/_doc/conf.py
@@ -12,6 +12,7 @@
     "sphinx.ext.githubpages",
     "sphinx.ext.ifconfig",
     "sphinx.ext.intersphinx",
+    "sphinx.ext.linkcode",
     "sphinx.ext.mathjax",
     "sphinx.ext.viewcode",
     "sphinx.ext.todo",
@@ -63,15 +64,20 @@
 # ]
 
 # The following is used by sphinx.ext.linkcode to provide links to github
-linkcode_resolve = make_linkcode_resolve(
-    "onnx-diagnostic",
+_linkcode_resolve = make_linkcode_resolve(
+    "onnx_diagnostic",
     (
         "https://github.com/sdpython/onnx-diagnostic/"
         "blob/{revision}/{package}/"
         "{path}#L{lineno}"
     ),
 )
 
+
+def linkcode_resolve(domain, info):
+    return _linkcode_resolve(domain, info)
+
+
 latex_elements = {
     "papersize": "a4",
     "pointsize": "10pt",
diff --git a/_doc/recipes/plot_dynamic_shapes_max.py b/_doc/recipes/plot_dynamic_shapes_max.py
@@ -10,6 +10,8 @@
 in the exported program is something very aggreessive. Here is a case where
 it takes a wrong decision and how to get around it.
 
+**This bug was fixed after 4/24/2025**.
+
 Wrong Model
 +++++++++++
 """
@@ -183,4 +185,4 @@ def forward(self, x, y, fact):
 # is hidden in a custom operator.
 
 
-doc.plot_legend("dynamic shapes\nworkaround\nmax(d1, d2)", "dynamic shapes", "yellow")
+doc.plot_legend("max(d1, d2)\nwith d1, d2 dimensions", "dynamic shapes", "green")
diff --git a/_unittests/ut_tasks/test_tasks.py b/_unittests/ut_tasks/test_tasks.py
@@ -116,6 +116,18 @@ def test_fill_mask(self):
                 model, (), kwargs=inputs, dynamic_shapes=use_dyn_not_str(ds), strict=False
             )
 
+    @hide_stdout()
+    def test_feature_extraction(self):
+        mid = "facebook/bart-base"
+        data = get_untrained_model_with_inputs(mid, verbose=1)
+        self.assertIn((data["size"], data["n_weights"]), [(557681664, 139420416)])
+        model, inputs, ds = data["model"], data["inputs"], data["dynamic_shapes"]
+        model(**inputs)
+        with bypass_export_some_errors(patch_transformers=True, verbose=10):
+            torch.export.export(
+                model, (), kwargs=inputs, dynamic_shapes=use_dyn_not_str(ds), strict=False
+            )
+
     @hide_stdout()
     def test_text_classification(self):
         mid = "Intel/bert-base-uncased-mrpc"
diff --git a/_unittests/ut_tasks/try_tasks.py b/_unittests/ut_tasks/try_tasks.py
@@ -99,8 +99,8 @@ def test_text2text_generation(self):
         print(tokenizer.decode(generated_ids[0], skip_special_tokens=True))
 
     @never_test()
-    def test_text_generation_phi4(self):
-        # clear&&NEVERTEST=1 python _unittests/ut_tasks/try_tasks.py -k phi4
+    def test_text_generation_phi4_mini(self):
+        # clear&&NEVERTEST=1 python _unittests/ut_tasks/try_tasks.py -k phi4_mini
 
         import torch
         from transformers import RobertaTokenizer, T5ForConditionalGeneration
@@ -124,6 +124,107 @@ def test_text_generation_phi4(self):
             )
         print(tokenizer.decode(generated_ids[0], skip_special_tokens=True))
 
+    @never_test()
+    @unittest.skip(
+        reason="AttributeError: 'Phi4MMModel' object has no attribute "
+        "'prepare_inputs_for_generation'"
+    )
+    def test_text_generation_phi4_moe(self):
+        # clear&&NEVERTEST=1 python _unittests/ut_tasks/try_tasks.py -k phi4_moe
+
+        import requests
+        import io
+        from PIL import Image
+        import soundfile as sf
+        from transformers import AutoModelForCausalLM, AutoProcessor, GenerationConfig
+        from urllib.request import urlopen
+
+        # Define model path
+        model_path = "microsoft/Phi-4-multimodal-instruct"
+
+        # Load model and processor
+        processor = AutoProcessor.from_pretrained(model_path, trust_remote_code=True)
+        model = AutoModelForCausalLM.from_pretrained(
+            model_path,
+            device_map="cuda",
+            torch_dtype="auto",
+            trust_remote_code=True,
+            # if you do not use Ampere or later GPUs, change attention to "eager"
+            # _attn_implementation='flash_attention_2',
+            _attn_implementation="eager",
+        ).cuda()
+
+        # Load generation config
+        generation_config = GenerationConfig.from_pretrained(model_path)
+
+        # Define prompt structure
+        user_prompt = "<|user|>"
+        assistant_prompt = "<|assistant|>"
+        prompt_suffix = "<|end|>"
+
+        # Part 1: Image Processing
+        print("\n--- IMAGE PROCESSING ---")
+        image_url = "https://www.ilankelman.org/stopsigns/australia.jpg"
+        prompt = (
+            f"{user_prompt}<|image_1|>What is shown in this image"
+            f"?{prompt_suffix}{assistant_prompt}"
+        )
+        print(f">>> Prompt\n{prompt}")
+
+        # Download and open image
+        image = Image.open(requests.get(image_url, stream=True).raw)
+        inputs = processor(text=prompt, images=image, return_tensors="pt").to("cuda:0")
+
+        # Generate response
+        print("--------- IMAGE PROCESSING ----------")
+        print()
+        with steal_forward(model):
+            generate_ids = model.generate(
+                **inputs,
+                max_new_tokens=1000,
+                generation_config=generation_config,
+            )
+        generate_ids = generate_ids[:, inputs["input_ids"].shape[1] :]
+        response = processor.batch_decode(
+            generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False
+        )[0]
+        print(f">>> Response\n{response}")
+
+        # Part 2: Audio Processing
+        print("\n--- AUDIO PROCESSING ---")
+        audio_url = (
+            "https://upload.wikimedia.org/wikipedia/commons/b/b0/"
+            "Barbara_Sahakian_BBC_Radio4_The_Life_Scientific_29_May_2012_b01j5j24.flac"
+        )
+        speech_prompt = (
+            "Transcribe the audio to text, and then translate the audio to French. "
+            "Use <sep> as a separator between the original transcript and the translation."
+        )
+        prompt = f"{user_prompt}<|audio_1|>{speech_prompt}{prompt_suffix}{assistant_prompt}"
+        print(f">>> Prompt\n{prompt}")
+
+        # Download and open audio file
+        audio, samplerate = sf.read(io.BytesIO(urlopen(audio_url).read()))
+
+        # Process with the model
+        inputs = processor(text=prompt, audios=[(audio, samplerate)], return_tensors="pt").to(
+            "cuda:0"
+        )
+
+        print("--------- AUDIO PROCESSING ----------")
+        print()
+        with steal_forward(model):
+            generate_ids = model.generate(
+                **inputs,
+                max_new_tokens=1000,
+                generation_config=generation_config,
+            )
+        generate_ids = generate_ids[:, inputs["input_ids"].shape[1] :]
+        response = processor.batch_decode(
+            generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False
+        )[0]
+        print(f">>> Response\n{response}")
+
     @never_test()
     def test_imagetext2text_generation(self):
         # clear&&NEVERTEST=1 python _unittests/ut_tasks/try_tasks.py -k etext2t
@@ -237,6 +338,22 @@ def test_fill_mask(self):
         output = model(**encoded_input)
         print("-- outputs", string_type(output, with_shape=True, with_min_max=True))
 
+    @never_test()
+    def test_feature_extraction(self):
+        # clear&&NEVERTEST=1 python _unittests/ut_tasks/try_tasks.py -k feature_ex
+        # https://huggingface.co/google-bert/bert-base-multilingual-cased
+
+        from transformers import BartTokenizer, BartModel
+
+        tokenizer = BartTokenizer.from_pretrained("facebook/bart-base")
+        model = BartModel.from_pretrained("facebook/bart-base")
+        text = "Replace me by any text you'd like."
+        encoded_input = tokenizer(text, return_tensors="pt")
+        print()
+        print("-- inputs", string_type(encoded_input, with_shape=True, with_min_max=True))
+        output = model(**encoded_input)
+        print("-- outputs", string_type(output, with_shape=True, with_min_max=True))
+
     @never_test()
     def test_text_classification(self):
         # clear&&NEVERTEST=1 python _unittests/ut_tasks/try_tasks.py -k text_cl
diff --git a/_unittests/ut_torch_export_patches/test_patch_serialization.py b/_unittests/ut_torch_export_patches/test_patch_serialization.py
@@ -175,7 +175,7 @@ def test_base_sliding_window_cache_unflatten_flatten(self):
             self.assertEqualAny([cache], cache2)
 
     @ignore_warnings(UserWarning)
-    @requires_torch("2.7")
+    @requires_torch("2.8")
     def test_sliding_window_cache_export(self):
         class Model(torch.nn.Module):
             def forward(self, cache):
diff --git a/_unittests/ut_torch_models/test_test_helpers.py b/_unittests/ut_torch_models/test_test_helpers.py
@@ -22,7 +22,10 @@ class TestTestHelper(ExtTestCase):
     def test_get_inputs_for_task(self):
         fcts = supported_tasks()
         for task in self.subloop(sorted(fcts)):
-            data = get_inputs_for_task(task)
+            try:
+                data = get_inputs_for_task(task)
+            except NotImplementedError:
+                continue
             self.assertIsInstance(data, dict)
             self.assertIn("inputs", data)
             self.assertIn("dynamic_shapes", data)
@@ -99,9 +102,11 @@ def test_validate_model_custom(self):
             patch=True,
             stop_if_static=2 if pv.Version(torch.__version__) > pv.Version("2.6.1") else 0,
             optimization="default",
+            quiet=False,
         )
         self.assertIsInstance(summary, dict)
         self.assertIsInstance(data, dict)
+        self.assertIn("disc_onnx_ort_run_abs", summary)
         self.assertLess(summary["disc_onnx_ort_run_abs"], 1e-4)
         onnx_filename = data["onnx_filename"]
         output_path = f"{onnx_filename}.ortopt.onnx"
diff --git a/onnx_diagnostic/ext_test_case.py b/onnx_diagnostic/ext_test_case.py
@@ -461,7 +461,7 @@ def requires_sklearn(version: str, msg: str = "") -> Callable:
     return lambda x: x
 
 
-def requires_experimental(version: str = "", msg: str = "") -> Callable:
+def requires_experimental(version: str = "0.0.0", msg: str = "") -> Callable:
     """Skips a unit test if :epkg:`experimental-experiment` is not recent enough."""
     import packaging.version as pv
 
diff --git a/onnx_diagnostic/tasks/__init__.py b/onnx_diagnostic/tasks/__init__.py
@@ -1,9 +1,11 @@
 from typing import Any, Callable, Dict, List, Tuple
 from . import (
     automatic_speech_recognition,
+    feature_extraction,
     fill_mask,
     image_classification,
     image_text_to_text,
+    mixture_of_expert,
     sentence_similarity,
     text_classification,
     text_generation,
@@ -13,9 +15,11 @@
 
 __TASKS__ = [
     automatic_speech_recognition,
+    feature_extraction,
     fill_mask,
     image_classification,
     image_text_to_text,
+    mixture_of_expert,
     sentence_similarity,
     text_classification,
     text_generation,
diff --git a/onnx_diagnostic/tasks/automatic_speech_recognition.py b/onnx_diagnostic/tasks/automatic_speech_recognition.py
@@ -36,7 +36,7 @@ def get_inputs(
     **kwargs,  # unused
 ):
     """
-    Generates inputs for task ``text2text-generation``.
+    Generates inputs for task ``automatic-speech-recognition``.
     Example:
 
     ::
diff --git a/onnx_diagnostic/tasks/feature_extraction.py b/onnx_diagnostic/tasks/feature_extraction.py
@@ -0,0 +1,65 @@
+from typing import Any, Callable, Dict, Optional, Tuple
+import torch
+from ..helpers.config_helper import update_config, check_hasattr
+
+__TASK__ = "feature-extraction"
+
+
+def reduce_model_config(config: Any) -> Dict[str, Any]:
+    """Reduces a model size."""
+    check_hasattr(config, "num_attention_heads", "num_hidden_layers")
+    kwargs = dict(
+        num_hidden_layers=min(config.num_hidden_layers, 2),
+        num_attention_heads=min(config.num_attention_heads, 4),
+    )
+    update_config(config, kwargs)
+    return kwargs
+
+
+def get_inputs(
+    model: torch.nn.Module,
+    config: Optional[Any],
+    batch_size: int,
+    sequence_length: int,
+    dummy_max_token_id: int,
+    **kwargs,  # unused
+):
+    """
+    Generates inputs for task ``feature-extraction``.
+    Example:
+
+    ::
+
+        input_ids:T7s1x13[101,72654:A16789.23076923077],
+        token_type_ids:T7s1x13[0,0:A0.0],
+        attention_mask:T7s1x13[1,1:A1.0])
+    """
+    batch = torch.export.Dim("batch", min=1, max=1024)
+    seq_length = "sequence_length"
+    shapes = {
+        "input_ids": {0: batch, 1: seq_length},
+        "attention_mask": {0: batch, 1: seq_length},
+    }
+    inputs = dict(
+        input_ids=torch.randint(0, dummy_max_token_id, (batch_size, sequence_length)).to(
+            torch.int64
+        ),
+        attention_mask=torch.ones((batch_size, sequence_length)).to(torch.int64),
+    )
+    return dict(inputs=inputs, dynamic_shapes=shapes)
+
+
+def random_input_kwargs(config: Any) -> Tuple[Dict[str, Any], Callable]:
+    """
+    Inputs kwargs.
+
+    If the configuration is None, the function selects typical dimensions.
+    """
+    if config is not None:
+        check_hasattr(config, "vocab_size")
+    kwargs = dict(
+        batch_size=2,
+        sequence_length=30,
+        dummy_max_token_id=31999 if config is None else (config.vocab_size - 1),
+    )
+    return kwargs, get_inputs
diff --git a/onnx_diagnostic/tasks/image_text_to_text.py b/onnx_diagnostic/tasks/image_text_to_text.py
diff --git a/onnx_diagnostic/tasks/mixture_of_expert.py b/onnx_diagnostic/tasks/mixture_of_expert.py
diff --git a/onnx_diagnostic/torch_models/hghub/hub_data.py b/onnx_diagnostic/torch_models/hghub/hub_data.py
diff --git a/onnx_diagnostic/torch_models/test_helper.py b/onnx_diagnostic/torch_models/test_helper.py