really use pretrained version

xadupre · xadupre · commit 1480fdfc591c · 2025-06-21T11:19:20.000+02:00
diff --git a/onnx_diagnostic/_command_lines_parser.py b/onnx_diagnostic/_command_lines_parser.py
@@ -333,7 +333,24 @@ def get_parser_validate() -> ArgumentParser:
             of supported tasks.
             """
         ),
-        epilog="If the model id is specified, one untrained version of it is instantiated.",
+        epilog=textwrap.dedent(
+            """
+            If the model id is specified, one untrained version of it is instantiated.
+            Examples:
+
+            python -m onnx_diagnostic validate -m microsoft/Phi-4-mini-reasoning \\
+                --run -v 1 -o dump_test --no-quiet --repeat 2 --warmup 2 \\
+                --dtype float16 --device cuda --patch --export onnx-dynamo --opt ir
+
+            python -m onnx_diagnostic validate -m microsoft/Phi-4-mini-reasoning \\
+                --run -v 1 -o dump_test --no-quiet --repeat 2 --warmup 2 \\
+                --dtype float16 --device cuda --patch --export custom --opt default
+
+            python -m onnx_diagnostic validate -m microsoft/Phi-4-mini-reasoning \\
+                --run -v 1 -o dump_test --no-quiet --repeat 2 --warmup 2 \\
+                --dtype float16 --device cuda --export modelbuilder
+            """
+        ),
         formatter_class=RawTextHelpFormatter,
     )
     parser.add_argument("-m", "--mid", type=str, help="model id, usually <author>/<name>")
@@ -372,6 +389,12 @@ def get_parser_validate() -> ArgumentParser:
         type=int,
         help="Raises an exception if a dynamic dimension becomes static.",
     )
+    parser.add_argument(
+        "--same-as-trained",
+        default=False,
+        action=BooleanOptionalAction,
+        help="Validates a model identical to the trained model but not trained.",
+    )
     parser.add_argument(
         "--trained",
         default=False,
@@ -487,7 +510,8 @@ def _cmd_validate(argv: List[Any]):
             do_run=args.run,
             verbose=args.verbose,
             quiet=args.quiet,
-            trained=args.trained,
+            same_as_pretrained=args.same_as_trained,
+            use_pretrained=args.trained,
             dtype=args.dtype,
             device=args.device,
             patch=args.patch,
@@ -619,7 +643,13 @@ def get_parser_agg() -> ArgumentParser:
             and produces values. Every row has a date.
             """
         ),
-        epilog="example\n  python -m onnx_diagnostic agg test_agg.xlsx raw/*.zip -v 1",
+        epilog=textwrap.dedent(
+            """
+            examples:\n
+
+                python -m onnx_diagnostic agg test_agg.xlsx raw/*.zip -v 1
+            """
+        ),
         formatter_class=RawTextHelpFormatter,
     )
     parser.add_argument("output", help="output excel file")
diff --git a/onnx_diagnostic/tasks/image_text_to_text.py b/onnx_diagnostic/tasks/image_text_to_text.py
@@ -132,16 +132,30 @@ def random_input_kwargs(config: Any) -> Tuple[Dict[str, Any], Callable]:
     If the configuration is None, the function selects typical dimensions.
     """
     if config is not None:
-        check_hasattr(
-            config,
-            "vocab_size",
-            "hidden_size",
-            "num_attention_heads",
-            ("num_key_value_heads", "num_attention_heads"),
-            "intermediate_size",
-            "hidden_size",
-            "vision_config",
-        )
+        if hasattr(config, "text_config"):
+            check_hasattr(
+                config.text_config,
+                "vocab_size",
+                "hidden_size",
+                "num_attention_heads",
+                ("num_key_value_heads", "num_attention_heads"),
+                "intermediate_size",
+                "hidden_size",
+            )
+            check_hasattr(config, "vision_config")
+            text_config = True
+        else:
+            check_hasattr(
+                config,
+                "vocab_size",
+                "hidden_size",
+                "num_attention_heads",
+                ("num_key_value_heads", "num_attention_heads"),
+                "intermediate_size",
+                "hidden_size",
+                "vision_config",
+            )
+            text_config = False
         check_hasattr(config.vision_config, "image_size", "num_channels")
     kwargs = dict(
         batch_size=2,
@@ -150,17 +164,55 @@ def random_input_kwargs(config: Any) -> Tuple[Dict[str, Any], Callable]:
         head_dim=(
             16
             if config is None
-            else getattr(config, "head_dim", config.hidden_size // config.num_attention_heads)
+            else getattr(
+                config,
+                "head_dim",
+                (config.text_config.hidden_size if text_config else config.config.hidden_size)
+                // (
+                    config.text_config.num_attention_heads
+                    if text_config
+                    else config.config.num_attention_heads
+                ),
+            )
+        ),
+        dummy_max_token_id=(
+            31999
+            if config is None
+            else (config.text_config.vocab_size if text_config else config.config.vocab_size)
+            - 1
+        ),
+        num_hidden_layers=(
+            4
+            if config is None
+            else (
+                config.text_config.num_hidden_layers
+                if text_config
+                else config.config.num_hidden_layers
+            )
         ),
-        dummy_max_token_id=31999 if config is None else config.vocab_size - 1,
-        num_hidden_layers=4 if config is None else config.num_hidden_layers,
         num_key_value_heads=(
             8
             if config is None
-            else _pick(config, "num_key_value_heads", "num_attention_heads")
+            else (
+                _pick(config.text_config, "num_key_value_heads", "num_attention_heads")
+                if text_config
+                else _pick(config, "num_key_value_heads", "num_attention_heads")
+            )
+        ),
+        intermediate_size=(
+            1024
+            if config is None
+            else (
+                config.text_config.intermediate_size
+                if text_config
+                else config.config.intermediate_size
+            )
+        ),
+        hidden_size=(
+            512
+            if config is None
+            else (config.text_config.hidden_size if text_config else config.hidden_size)
         ),
-        intermediate_size=1024 if config is None else config.intermediate_size,
-        hidden_size=512 if config is None else config.hidden_size,
         width=224 if config is None else config.vision_config.image_size,
         height=224 if config is None else config.vision_config.image_size,
         num_channels=3 if config is None else config.vision_config.num_channels,
diff --git a/onnx_diagnostic/torch_models/hghub/hub_api.py b/onnx_diagnostic/torch_models/hghub/hub_api.py
@@ -138,12 +138,15 @@ def _guess_task_from_config(config: Any) -> Optional[str]:
 
 
 @functools.cache
-def task_from_arch(arch: str, default_value: Optional[str] = None) -> str:
+def task_from_arch(
+    arch: str, default_value: Optional[str] = None, model_id: Optional[str] = None
+) -> str:
     """
     This function relies on stored information. That information needs to be refresh.
 
     :param arch: architecture name
     :param default_value: default value in case the task cannot be determined
+    :param model_id: unused unless the architecture does not help.
     :return: task
 
     .. runpython::
@@ -156,9 +159,16 @@ def task_from_arch(arch: str, default_value: Optional[str] = None) -> str:
     <onnx_diagnostic.torch_models.hghub.hub_data.load_architecture_task>`.
     """
     data = load_architecture_task()
+    if arch not in data and model_id:
+        # Let's try with the model id.
+        return task_from_id(model_id)
     if default_value is not None:
         return data.get(arch, default_value)
-    assert arch in data, f"Architecture {arch!r} is unknown, last refresh in {__date__}"
+    assert arch in data, (
+        f"Architecture {arch!r} is unknown, last refresh in {__date__}. "
+        f"``onnx_diagnostic.torch_models.hghub.hub_data.__data_arch__`` "
+        f"needs to be updated (model_id={(model_id or '?')!r})."
+    )
     return data[arch]
 
 
@@ -176,6 +186,7 @@ def task_from_id(
         if the task cannot be determined
     :param pretrained: uses the config
     :param fall_back_to_pretrained: falls back to pretrained config
+    :param exc: raises an excpetion if True
     :return: task
     """
     if not pretrained:
@@ -191,11 +202,18 @@ def task_from_id(
         guess = _guess_task_from_config(config)
         if guess is not None:
             return guess
+        data = load_architecture_task()
+        if model_id in data:
+            return data[model_id]
         assert config.architectures is not None and len(config.architectures) == 1, (
             f"Cannot return the task of {model_id!r}, pipeline_tag is not setup, "
-            f"architectures={config.architectures} in config={config}"
+            f"architectures={config.architectures} in config={config}. "
+            f"The task can be added in "
+            f"``onnx_diagnostic.torch_models.hghub.hub_data.__data_arch__``."
+        )
+        return task_from_arch(
+            config.architectures[0], default_value=default_value, model_id=model_id
         )
-        return task_from_arch(config.architectures[0], default_value=default_value)
 
 
 def task_from_tags(tags: Union[str, List[str]]) -> str:
diff --git a/onnx_diagnostic/torch_models/hghub/hub_data.py b/onnx_diagnostic/torch_models/hghub/hub_data.py
@@ -3,7 +3,7 @@
 import textwrap
 from typing import Dict, List
 
-__date__ = "2025-03-26"
+__date__ = "2025-06-21"
 
 __data_arch_values__ = {"ResNetForImageClassification": dict(image_size=224)}
 
@@ -52,6 +52,8 @@
     GPTNeoModel,feature-extraction
     GPTNeoXForCausalLM,text-generation
     GemmaForCausalLM,text-generation
+    Gemma2ForCausalLM,text-generation
+    Gemma3ForConditionalGeneration,image-text-to-text
     GraniteForCausalLM,text-generation
     GroupViTModel,feature-extraction
     HieraForImageClassification,image-classification
@@ -144,7 +146,8 @@
     XLMRobertaModel,sentence-similarity
     Wav2Vec2ForCTC,automatic-speech-recognition
     YolosForObjectDetection,object-detection
-    YolosModel,image-feature-extraction"""
+    YolosModel,image-feature-extraction
+    emilyalsentzer/Bio_ClinicalBERT,fill-mask"""
 )
 
 __data_tasks__ = [
diff --git a/onnx_diagnostic/torch_models/hghub/model_inputs.py b/onnx_diagnostic/torch_models/hghub/model_inputs.py
@@ -1,5 +1,6 @@
 import inspect
 import os
+import pprint
 from typing import Any, Dict, Optional, Tuple
 import torch
 import transformers
@@ -22,6 +23,7 @@ def get_untrained_model_with_inputs(
     model_kwargs: Optional[Dict[str, Any]] = None,
     verbose: int = 0,
     dynamic_rope: Optional[bool] = None,
+    use_pretrained: bool = False,
     same_as_pretrained: bool = False,
     use_preinstalled: bool = True,
     add_second_input: bool = False,
@@ -43,6 +45,7 @@ def get_untrained_model_with_inputs(
     :param dynamic_rope: use dynamic rope (see :class:`transformers.LlamaConfig`)
     :param same_as_pretrained: if True, do not change the default values
         to get a smaller model
+    :param use_pretrained: download the pretrained weights as well
     :param use_preinstalled: use preinstalled configurations
     :param add_second_input: provides a second inputs to check a model
         supports different shapes
@@ -68,6 +71,10 @@ def get_untrained_model_with_inputs(
         print("-- dynamic shapes:", pprint.pformat(data['dynamic_shapes']))
         print("-- configuration:", pprint.pformat(data['configuration']))
     """
+    assert not use_preinstalled or not use_only_preinstalled, (
+        f"model_id={model_id!r}, pretinstalled model is only avaialble "
+        f"if use_only_preinstalled is False."
+    )
     if verbose:
         print(f"[get_untrained_model_with_inputs] model_id={model_id!r}")
         if use_preinstalled:
@@ -99,7 +106,7 @@ def get_untrained_model_with_inputs(
         print(f"[get_untrained_model_with_inputs] architectures={archs!r}")
         print(f"[get_untrained_model_with_inputs] cls={config.__class__.__name__!r}")
     if task is None:
-        task = task_from_arch(archs[0])
+        task = task_from_arch(archs[0], model_id=model_id)
     if verbose:
         print(f"[get_untrained_model_with_inputs] task={task!r}")
 
@@ -114,7 +121,6 @@ def get_untrained_model_with_inputs(
         )
 
     # updating the configuration
-
     mkwargs = reduce_model_config(config, task) if not same_as_pretrained else {}
     if model_kwargs:
         for k, v in model_kwargs.items():
@@ -139,27 +145,28 @@ def get_untrained_model_with_inputs(
                 f"{config._attn_implementation!r}"  # type: ignore[union-attr]
             )
 
+    if use_pretrained:
+        model = transformers.AutoModel.from_pretrained(model_id, **mkwargs)
+    else:
+        if archs is not None:
+            model = getattr(transformers, archs[0])(config)
+        else:
+            assert same_as_pretrained and use_pretrained, (
+                f"Model {model_id!r} cannot be built, the model cannot be built. "
+                f"It must be downloaded. Use same_as_pretrained=True "
+                f"and use_pretrained=True."
+            )
+
     # input kwargs
     kwargs, fct = random_input_kwargs(config, task)
     if verbose:
         print(f"[get_untrained_model_with_inputs] use fct={fct}")
         if os.environ.get("PRINT_CONFIG") in (1, "1"):
-            import pprint
-
             print(f"-- input kwargs for task {task!r}")
             pprint.pprint(kwargs)
     if inputs_kwargs:
         kwargs.update(inputs_kwargs)
 
-    if archs is not None:
-        model = getattr(transformers, archs[0])(config)
-    else:
-        assert same_as_pretrained, (
-            f"Model {model_id!r} cannot be built, the model cannot be built. "
-            f"It must be downloaded. Use same_as_pretrained=True."
-        )
-        model = None
-
     # This line is important. Some models may produce different
     # outputs even with the same inputs in training mode.
     model.eval()
diff --git a/onnx_diagnostic/torch_models/validate.py b/onnx_diagnostic/torch_models/validate.py
@@ -259,7 +259,8 @@ def validate_model(
     verbose: int = 0,
     dtype: Optional[Union[str, torch.dtype]] = None,
     device: Optional[Union[str, torch.device]] = None,
-    trained: bool = False,
+    same_as_pretrained: bool = False,
+    use_pretrained: bool = False,
     optimization: Optional[str] = None,
     quiet: bool = False,
     patch: bool = False,
@@ -294,7 +295,9 @@ def validate_model(
     :param verbose: verbosity level
     :param dtype: uses this dtype to check the model
     :param device: do the verification on this device
-    :param trained: use the trained model, not the untrained one
+    :param same_as_pretrained: use a model equivalent to the trained,
+        this is not always possible
+    :param use_pretrained: use the trained model, not the untrained one
     :param optimization: optimization to apply to the exported model,
         depend on the the exporter
     :param quiet: if quiet, catches exception if any issue
@@ -353,7 +356,8 @@ def validate_model(
             version_do_run=str(do_run),
             version_dtype=str(dtype or ""),
             version_device=str(device or ""),
-            version_trained=str(trained),
+            version_same_as_pretrained=str(same_as_pretrained),
+            version_use_pretrained=str(use_pretrained),
             version_optimization=optimization or "",
             version_quiet=str(quiet),
             version_patch=str(patch),
@@ -408,11 +412,12 @@ def validate_model(
         summary,
         None,
         (
-            lambda mid=model_id, v=verbose, task=task, tr=trained, iop=iop, sub=subfolder, i2=inputs2: (  # noqa: E501
+            lambda mid=model_id, v=verbose, task=task, uptr=use_pretrained, tr=same_as_pretrained, iop=iop, sub=subfolder, i2=inputs2: (  # noqa: E501
                 get_untrained_model_with_inputs(
                     mid,
                     verbose=v,
                     task=task,
+                    use_pretrained=uptr,
                     same_as_pretrained=tr,
                     inputs_kwargs=iop,
                     model_kwargs=mop,