From cc64994dc24951887bda1fc6d4cd101f3585516f Mon Sep 17 00:00:00 2001
From: xadupre <xadupre@microsoft.com>
Date: Fri, 19 Sep 2025 14:16:43 +0200
Subject: [PATCH 1/6] Handle more models

---
 onnx_diagnostic/_command_lines_parser.py |  7 +++++++
 onnx_diagnostic/torch_models/validate.py | 12 +++++++++++-
 2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/onnx_diagnostic/_command_lines_parser.py b/onnx_diagnostic/_command_lines_parser.py
index db488e46..1c98a90c 100644
--- a/onnx_diagnostic/_command_lines_parser.py
+++ b/onnx_diagnostic/_command_lines_parser.py
@@ -542,6 +542,12 @@ def get_parser_validate() -> ArgumentParser:
         "the onnx exporter should use.",
         default="",
     )
+    parser.add_argument(
+        "--ort-logs",
+        default=False,
+        action=BooleanOptionalAction,
+        help="Enables onnxruntime logging when the session is created",
+    )
     return parser
 
 
@@ -601,6 +607,7 @@ def _cmd_validate(argv: List[Any]):
             repeat=args.repeat,
             warmup=args.warmup,
             inputs2=args.inputs2,
+            ort_logs=args.ort_logs,
             output_names=(
                 None if len(args.outnames.strip()) < 2 else args.outnames.strip().split(",")
             ),
diff --git a/onnx_diagnostic/torch_models/validate.py b/onnx_diagnostic/torch_models/validate.py
index b0b69e50..63c624db 100644
--- a/onnx_diagnostic/torch_models/validate.py
+++ b/onnx_diagnostic/torch_models/validate.py
@@ -292,6 +292,7 @@ def validate_model(
     warmup: int = 0,
     inputs2: int = 1,
     output_names: Optional[List[str]] = None,
+    ort_logs: bool = False,
 ) -> Tuple[Dict[str, Union[int, float, str]], Dict[str, Any]]:
     """
     Validates a model.
@@ -344,6 +345,7 @@ def validate_model(
         this ensures that the model does support dynamism, the value is used
         as an increment to the first set of values (added to dimensions)
     :param output_names: output names the onnx exporter should use
+    :param ort_logs: increases onnxruntime verbosity when creating the session
     :return: two dictionaries, one with some metrics,
         another one with whatever the function produces
 
@@ -758,6 +760,7 @@ def validate_model(
             repeat=repeat,
             warmup=warmup,
             inputs2=inputs2,
+            ort_logs=ort_logs,
         )
         summary.update(summary_valid)
 
@@ -1158,6 +1161,7 @@ def validate_onnx_model(
     repeat: int = 1,
     warmup: int = 0,
     inputs2: int = 1,
+    ort_logs: bool = False,
 ) -> Tuple[Dict[str, Any], Dict[str, Any]]:
     """
     Verifies that an onnx model produces the same
@@ -1176,6 +1180,7 @@ def validate_onnx_model(
     :param inputs2: to validate the model on the second input set
         to make sure the exported model supports dynamism, the value is
         used as an increment added to the first set of inputs (added to dimensions)
+    :param ort_logs: triggers the logs for onnxruntime
     :return: two dictionaries, one with some metrics,
         another one with whatever the function produces
     """
@@ -1232,8 +1237,13 @@ def _mk(key, flavour=flavour):
 
         if verbose:
             print("[validate_onnx_model] runtime is onnxruntime")
-        cls_runtime = lambda model, providers: onnxruntime.InferenceSession(
+        sess_opts = onnxruntime.SessionOptions()
+        if ort_logs:
+            sess_opts.log_severity_level = 0
+            sess_opts.log_verbosity_level = 4
+        cls_runtime = lambda model, providers, _o=sess_opts: onnxruntime.InferenceSession(
             (model.SerializeToString() if isinstance(model, onnx.ModelProto) else model),
+            _o,
             providers=providers,
         )
     elif runtime == "torch":

From 6d85d19523a6c21134b1b5da8cf7a584a63247cd Mon Sep 17 00:00:00 2001
From: xadupre <xadupre@microsoft.com>
Date: Fri, 19 Sep 2025 17:16:42 +0200
Subject: [PATCH 2/6] Add task image-to-video

---
 onnx_diagnostic/tasks/__init__.py             |   6 +-
 onnx_diagnostic/tasks/image_to_video.py       | 127 ++++++++++++++++++
 onnx_diagnostic/torch_models/hghub/hub_api.py |  91 ++++++++++---
 .../torch_models/hghub/hub_data.py            |   3 +-
 .../torch_models/hghub/model_inputs.py        | 102 +++++++++-----
 .../torch_models/hghub/model_specific.py      |  27 ++++
 6 files changed, 294 insertions(+), 62 deletions(-)
 create mode 100644 onnx_diagnostic/tasks/image_to_video.py

diff --git a/onnx_diagnostic/tasks/__init__.py b/onnx_diagnostic/tasks/__init__.py
index 9895f363..3f9da3f1 100644
--- a/onnx_diagnostic/tasks/__init__.py
+++ b/onnx_diagnostic/tasks/__init__.py
@@ -5,6 +5,8 @@
     fill_mask,
     image_classification,
     image_text_to_text,
+    image_to_video,
+    mask_generation,
     mixture_of_expert,
     object_detection,
     sentence_similarity,
@@ -14,7 +16,6 @@
     text_to_image,
     text2text_generation,
     zero_shot_image_classification,
-    mask_generation,
 )
 
 __TASKS__ = [
@@ -23,6 +24,8 @@
     fill_mask,
     image_classification,
     image_text_to_text,
+    image_to_video,
+    mask_generation,
     mixture_of_expert,
     object_detection,
     sentence_similarity,
@@ -32,7 +35,6 @@
     text_to_image,
     text2text_generation,
     zero_shot_image_classification,
-    mask_generation,
 ]
 
 
diff --git a/onnx_diagnostic/tasks/image_to_video.py b/onnx_diagnostic/tasks/image_to_video.py
new file mode 100644
index 00000000..e88e8840
--- /dev/null
+++ b/onnx_diagnostic/tasks/image_to_video.py
@@ -0,0 +1,127 @@
+from typing import Any, Callable, Dict, Optional, Tuple
+import torch
+from ..helpers.config_helper import (
+    update_config,
+    check_hasattr,
+    default_num_hidden_layers as nhl,
+)
+
+__TASK__ = "image-to-video"
+
+
+def reduce_model_config(config: Any) -> Dict[str, Any]:
+    """Reduces a model size."""
+    if not hasattr(config, "num_hidden_layers") and not hasattr(config, "num_layers"):
+        # We cannot reduce.
+        return {}
+    check_hasattr(config, ("num_hidden_layers", "num_layers"))
+    kwargs = {}
+    if hasattr(config, "num_layers"):
+        kwargs["num_layers"] = min(config.num_layers, nhl())
+    if hasattr(config, "num_hidden_layers"):
+        kwargs["num_hidden_layers"] = min(config.num_hidden_layers, nhl())
+
+    update_config(config, kwargs)
+    return kwargs
+
+
+def get_inputs(
+    model: torch.nn.Module,
+    config: Optional[Any],
+    text_embed_dim: int,
+    latent_channels: int,
+    batch_size: int = 2,
+    image_height: int = 704,
+    image_width: int = 1280,
+    latent_frames: int = 1,
+    text_maxlen: int = 512,
+    add_second_input: int = 1,
+    **kwargs,  # unused
+):
+    """
+    Generates inputs for task ``image-to-video``.
+    """
+    assert (
+        "cls_cache" not in kwargs
+    ), f"Not yet implemented for cls_cache={kwargs['cls_cache']!r}."
+    latent_height = image_height // 8
+    latent_width = image_width // 8
+    dtype = torch.float32
+
+    inputs = dict(
+        hidden_states=torch.randn(
+            batch_size,
+            latent_channels,
+            latent_frames,
+            latent_height,
+            latent_width,
+            dtype=dtype,
+        ),
+        timestep=torch.tensor([1.0] * batch_size, dtype=dtype),
+        encoder_hidden_states=torch.randn(
+            batch_size, text_maxlen, text_embed_dim, dtype=dtype
+        ),
+        padding_mask=torch.ones(1, 1, image_height, image_width, dtype=dtype),
+        fps=torch.tensor([16] * batch_size, dtype=dtype),
+        condition_mask=torch.randn(
+            batch_size, 1, latent_frames, latent_height, latent_width, dtype=dtype
+        ),
+    )
+    shapes = dict(
+        hidden_states={
+            0: "batch_size",
+            2: "latent_frames",
+            3: "latent_height",
+            4: "latent_width",
+        },
+        timestep={0: "batch_size"},
+        encoder_hidden_states={0: "batch_size"},
+        padding_mask={0: "batch_size", 2: "height", 3: "width"},
+        fps={0: "batch_size"},
+        condition_mask={
+            0: "batch_size",
+            2: "latent_frames",
+            3: "latent_height",
+            4: "latent_width",
+        },
+    )
+    res = dict(inputs=inputs, dynamic_shapes=shapes)
+
+    if add_second_input:
+        assert (
+            add_second_input > 0
+        ), f"Not implemented for add_second_input={add_second_input}."
+        res["inputs2"] = get_inputs(
+            model=model,
+            config=config,
+            text_embed_dim=text_embed_dim,
+            latent_channels=latent_channels,
+            batch_size=batch_size,
+            image_height=image_height,
+            image_width=image_width,
+            latent_frames=latent_frames,
+            text_maxlen=text_maxlen,
+            add_second_input=0,
+            **kwargs,
+        )["inputs"]
+    return res
+
+
+def random_input_kwargs(config: Any) -> Tuple[Dict[str, Any], Callable]:
+    """
+    Inputs kwargs.
+
+    If the configuration is None, the function selects typical dimensions.
+    """
+    if config is not None:
+        check_hasattr(config, "in_channels", "text_embed_dim"),
+    kwargs = dict(
+        text_embed_dim=1024 if config is None else config.text_embed_dim,
+        latent_channels=16 if config is None else config.in_channels - 1,
+        batch_size=1,
+        image_height=8 * 50,
+        image_width=8 * 80,
+        latent_frames=1,
+        text_maxlen=512,
+    )
+    return kwargs, get_inputs
diff --git a/onnx_diagnostic/torch_models/hghub/hub_api.py b/onnx_diagnostic/torch_models/hghub/hub_api.py
index da05f7c0..94f0aa7e 100644
--- a/onnx_diagnostic/torch_models/hghub/hub_api.py
+++ b/onnx_diagnostic/torch_models/hghub/hub_api.py
@@ -177,6 +177,51 @@ def task_from_arch(
     return data[arch]
 
 
+def _trygetattr(config, attname):
+    try:
+        return getattr(config, attname)
+    except AttributeError:
+        return None
+
+
+def architecture_from_config(config) -> Optional[str]:
+    """Guesses the architecture (class) of the model described by this config."""
+    if isinstance(config, dict):
+        if "_class_name" in config:
+            return config["_class_name"]
+        if "architecture" in config:
+            return config["architecture"]
+        if config.get("architectures", []):
+            return config["architectures"][0]
+    if hasattr(config, "_class_name"):
+        return config._class_name
+    if hasattr(config, "architecture"):
+        return config.architecture
+    if hasattr(config, "architectures") and config.architectures:
+        return config.architectures[0]
+    if hasattr(config, "__dict__"):
+        if "_class_name" in config.__dict__:
+            return config.__dict__["_class_name"]
+        if "architecture" in config.__dict__:
+            return config.__dict__["architecture"]
+        if config.__dict__.get("architectures", []):
+            return config.__dict__["architectures"][0]
+    return None
+
+
+def find_package_source(config) -> Optional[str]:
+    """Guesses the package the class models from."""
+    if isinstance(config, dict):
+        if "_diffusers_version" in config:
+            return "diffusers"
+    if hasattr(config, "_diffusers_version"):
+        return "diffusers"
+    if hasattr(config, "__dict__"):
+        if "_diffusers_version" in config.__dict__:
+            return "diffusers"
+    return "transformers"
+
+
 def task_from_id(
     model_id: str,
     default_value: Optional[str] = None,
@@ -202,28 +247,30 @@ def task_from_id(
             if not fall_back_to_pretrained:
                 raise
     config = get_pretrained_config(model_id, subfolder=subfolder)
-    try:
-        return config.pipeline_tag
-    except AttributeError:
-        guess = _guess_task_from_config(config)
-        if guess is not None:
-            return guess
-        data = load_architecture_task()
-        if model_id in data:
-            return data[model_id]
-        if type(config) is dict and "_class_name" in config:
-            return task_from_arch(config["_class_name"], default_value=default_value)
-        if not config.architectures or not config.architectures:
-            # Some hardcoded values until a better solution is found.
-            if model_id.startswith("google/bert_"):
-                return "fill-mask"
-        assert config.architectures is not None and len(config.architectures) == 1, (
-            f"Cannot return the task of {model_id!r}, pipeline_tag is not setup, "
-            f"architectures={config.architectures} in config={config}. "
-            f"The task can be added in "
-            f"``onnx_diagnostic.torch_models.hghub.hub_data.__data_arch__``."
-        )
-        return task_from_arch(config.architectures[0], default_value=default_value)
+    tag = _trygetattr(config, "pipeline_tag")
+    if tag is not None:
+        return tag
+
+    guess = _guess_task_from_config(config)
+    if guess is not None:
+        return guess
+    data = load_architecture_task()
+    if subfolder:
+        full_id = f"{model_id}//{subfolder}"
+        if full_id in data:
+            return data[full_id]
+    if model_id in data:
+        return data[model_id]
+    arch = architecture_from_config(config)
+    if arch is None:
+        if model_id.startswith("google/bert_"):
+            return "fill-mask"
+    assert arch is not None, (
+        f"Cannot return the task of {model_id!r}, pipeline_tag is not setup, "
+        f"config={config}. The task can be added in "
+        f"``onnx_diagnostic.torch_models.hghub.hub_data.__data_arch__``."
+    )
+    return task_from_arch(arch, default_value=default_value)
 
 
 def task_from_tags(tags: Union[str, List[str]]) -> str:
diff --git a/onnx_diagnostic/torch_models/hghub/hub_data.py b/onnx_diagnostic/torch_models/hghub/hub_data.py
index 1f29d832..9d5b3716 100644
--- a/onnx_diagnostic/torch_models/hghub/hub_data.py
+++ b/onnx_diagnostic/torch_models/hghub/hub_data.py
@@ -156,7 +156,8 @@
     YolosForObjectDetection,object-detection
     YolosModel,image-feature-extraction
     Alibaba-NLP/gte-large-en-v1.5,sentence-similarity
-    emilyalsentzer/Bio_ClinicalBERT,fill-mask"""
+    emilyalsentzer/Bio_ClinicalBERT,fill-mask
+    nvidia/Cosmos-Predict2-2B-Video2World//transformer,image-to-video"""
 )
 
 __data_tasks__ = [
diff --git a/onnx_diagnostic/torch_models/hghub/model_inputs.py b/onnx_diagnostic/torch_models/hghub/model_inputs.py
index 730b659e..e08902a3 100644
--- a/onnx_diagnostic/torch_models/hghub/model_inputs.py
+++ b/onnx_diagnostic/torch_models/hghub/model_inputs.py
@@ -2,13 +2,21 @@
 import inspect
 import os
 import pprint
+import time
 from typing import Any, Dict, Optional, Tuple
 import torch
 import transformers
 from ...helpers.config_helper import update_config, build_diff_config
 from ...tasks import reduce_model_config, random_input_kwargs
-from .hub_api import task_from_arch, task_from_id, get_pretrained_config, download_code_modelid
-from .model_specific import HANDLED_MODELS, load_specific_model
+from .hub_api import (
+    task_from_arch,
+    task_from_id,
+    get_pretrained_config,
+    download_code_modelid,
+    architecture_from_config,
+    find_package_source,
+)
+from .model_specific import HANDLED_MODELS, load_specific_model, instantiate_specific_model
 
 
 def _code_needing_rewriting(model: Any) -> Any:
@@ -96,27 +104,18 @@ def get_untrained_model_with_inputs(
             model, task, config = load_specific_model(model_id, verbose=verbose)
 
     if model is None:
-        if hasattr(config, "architecture") and config.architecture:
-            archs = [config.architecture]
-        if type(config) is dict:
-            assert (
-                "_class_name" in config
-            ), f"Unable to get the architecture from config={config}"
-            archs = [config["_class_name"]]
-        else:
-            archs = config.architectures  # type: ignore
-        task = None
-        if archs is None:
-            task = task_from_id(model_id)
-        assert task is not None or (archs is not None and len(archs) == 1), (
+        arch = architecture_from_config(config)
+        if arch is None:
+            task = task_from_id(model_id, subfolder=subfolder)
+        assert task is not None or arch is not None, (
             f"Unable to determine the architecture for model {model_id!r}, "
-            f"architectures={archs!r}, conf={config}"
+            f"archs={arch!r}, conf={config}"
         )
         if verbose:
-            print(f"[get_untrained_model_with_inputs] architectures={archs!r}")
+            print(f"[get_untrained_model_with_inputs] architecture={arch!r}")
             print(f"[get_untrained_model_with_inputs] cls={config.__class__.__name__!r}")
         if task is None:
-            task = task_from_arch(archs[0], model_id=model_id, subfolder=subfolder)
+            task = task_from_arch(arch, model_id=model_id, subfolder=subfolder)
         if verbose:
             print(f"[get_untrained_model_with_inputs] task={task!r}")
 
@@ -170,36 +169,58 @@ def get_untrained_model_with_inputs(
                 f"{getattr(config, '_attn_implementation', '?')!r}"  # type: ignore[union-attr]
             )
 
-        if type(config) is dict and "_diffusers_version" in config:
+        if find_package_source(config) == "diffusers":
             import diffusers
 
             package_source = diffusers
         else:
             package_source = transformers
 
+        if verbose:
+            print(
+                f"[get_untrained_model_with_inputs] package_source={package_source.__name__} é"
+                f"from {package_source.__file__}"
+            )
         if use_pretrained:
+            begin = time.perf_counter()
+            if verbose:
+                print(
+                    f"[get_untrained_model_with_inputs] pretrained model_id {model_id!r}, "
+                    f"subfolder={subfolder!r}"
+                )
             model = transformers.AutoModel.from_pretrained(
-                model_id, trust_remote_code=True, **mkwargs
+                model_id, subfolder=subfolder, trust_remote_code=True, **mkwargs
             )
+            if verbose:
+                print(
+                    f"[get_untrained_model_with_inputs] -- done in "
+                    f"{time.perf_counter() - begin}s"
+                )
         else:
-            if archs is not None:
+            begin = time.perf_counter()
+            if verbose:
+                print(
+                    f"[get_untrained_model_with_inputs] instantiate model_id {model_id!r}, "
+                    f"subfolder={subfolder!r}"
+                )
+            if arch is not None:
                 try:
-                    cls_model = getattr(package_source, archs[0])
+                    cls_model = getattr(package_source, arch)
                 except AttributeError as e:
                     # The code of the models is not in transformers but in the
                     # repository of the model. We need to download it.
                     pyfiles = download_code_modelid(model_id, verbose=verbose)
                     if pyfiles:
-                        if "." in archs[0]:
-                            cls_name = archs[0]
+                        if "." in arch:
+                            cls_name = arch
                         else:
                             modeling = [_ for _ in pyfiles if "/modeling_" in _]
                             assert len(modeling) == 1, (
                                 f"Unable to guess the main file implemented class "
-                                f"{archs[0]!r} from {pyfiles}, found={modeling}."
+                                f"{arch!r} from {pyfiles}, found={modeling}."
                             )
                             last_name = os.path.splitext(os.path.split(modeling[0])[-1])[0]
-                            cls_name = f"{last_name}.{archs[0]}"
+                            cls_name = f"{last_name}.{arch}"
                         if verbose:
                             print(
                                 f"[get_untrained_model_with_inputs] "
@@ -217,7 +238,7 @@ def get_untrained_model_with_inputs(
                         )
                     else:
                         raise AttributeError(
-                            f"Unable to find class 'tranformers.{archs[0]}'. "
+                            f"Unable to find class 'tranformers.{arch}'. "
                             f"The code needs to be downloaded, config="
                             f"\n{pprint.pformat(config)}."
                         ) from e
@@ -225,20 +246,27 @@ def get_untrained_model_with_inputs(
                 assert same_as_pretrained and use_pretrained, (
                     f"Model {model_id!r} cannot be built, the model cannot be built. "
                     f"It must be downloaded. Use same_as_pretrained=True "
-                    f"and use_pretrained=True."
+                    f"and use_pretrained=True, arch={arch!r}, config={config}"
+                )
+            if verbose:
+                print(
+                    f"[get_untrained_model_with_inputs] -- done in "
+                    f"{time.perf_counter() - begin}s"
                 )
 
             seed = int(os.environ.get("SEED", "17"))
             torch.manual_seed(seed)
-            try:
-                if type(config) is dict:
-                    model = cls_model(**config)
-                else:
-                    model = cls_model(config)
-            except RuntimeError as e:
-                raise RuntimeError(
-                    f"Unable to instantiate class {cls_model.__name__} with\n{config}"
-                ) from e
+            model = instantiate_specific_model(cls_model, config)
+            if model is None:
+                try:
+                    if type(config) is dict:
+                        model = cls_model(**config)
+                    else:
+                        model = cls_model(config)
+                except RuntimeError as e:
+                    raise RuntimeError(
+                        f"Unable to instantiate class {cls_model.__name__} with\n{config}"
+                    ) from e
 
     # input kwargs
     seed = int(os.environ.get("SEED", "17")) + 1
diff --git a/onnx_diagnostic/torch_models/hghub/model_specific.py b/onnx_diagnostic/torch_models/hghub/model_specific.py
index 8cbb9665..9e055d0c 100644
--- a/onnx_diagnostic/torch_models/hghub/model_specific.py
+++ b/onnx_diagnostic/torch_models/hghub/model_specific.py
@@ -1,6 +1,33 @@
 from typing import Any, Dict, Tuple
 
 
+def instantiate_specific_model(cls_model: type, config: Any) -> object:
+    """
+    Instantiates some model requiring some specific code.
+    """
+    if cls_model.__name__ == "CosmosTransformer3DModel":
+        return instantiate_CosmosTransformer3DModel(cls_model, config)
+    return None
+
+
+def instantiate_CosmosTransformer3DModel(cls_model: type, config: Any) -> object:
+    kwargs = dict(
+        in_channels=config.in_channels,
+        out_channels=config.out_channels,
+        attention_head_dim=config.attention_head_dim,
+        mlp_ratio=config.mlp_ratio,
+        num_layers=config.num_layers,
+        text_embed_dim=config.text_embed_dim,
+        adaln_lora_dim=config.adaln_lora_dim,
+        max_size=config.max_size,
+        patch_size=config.patch_size,
+        rope_scale=config.rope_scale,
+        concat_padding_mask=config.concat_padding_mask,
+        extra_pos_embed_type=config.extra_pos_embed_type,
+    )
+    return cls_model(**kwargs)
+
+
 class SpecificConfig:
     """Creates a specific configuration for the loaded model."""
 

From 909321d65134d17ec7579f803d4448366d45937c Mon Sep 17 00:00:00 2001
From: xadupre <xadupre@microsoft.com>
Date: Fri, 19 Sep 2025 17:49:50 +0200
Subject: [PATCH 3/6] add unit test

---
 CHANGELOGS.rst                                |  2 +
 .../ut_tasks/test_tasks_image_to_video.py     | 66 +++++++++++++++++++
 .../torch_models/hghub/model_inputs.py        |  2 +-
 3 files changed, 69 insertions(+), 1 deletion(-)
 create mode 100644 _unittests/ut_tasks/test_tasks_image_to_video.py

diff --git a/CHANGELOGS.rst b/CHANGELOGS.rst
index a0809cb4..2cfbdc43 100644
--- a/CHANGELOGS.rst
+++ b/CHANGELOGS.rst
@@ -4,6 +4,8 @@ Change Logs
 0.7.11
 ++++++
 
+* :pr:`223`: adds task image-to-video
+* :pr:`220`: adds option --ort-logs to display onnxruntime logs when creating the session
 * :pr:`220`: adds a patch for PR `#40791 <https://github.com/huggingface/transformers/pull/40791>`_ in transformers
 
 0.7.10
diff --git a/_unittests/ut_tasks/test_tasks_image_to_video.py b/_unittests/ut_tasks/test_tasks_image_to_video.py
new file mode 100644
index 00000000..4d31c8bc
--- /dev/null
+++ b/_unittests/ut_tasks/test_tasks_image_to_video.py
@@ -0,0 +1,66 @@
+import unittest
+import torch
+import transformers
+from onnx_diagnostic.ext_test_case import (
+    ExtTestCase,
+    hide_stdout,
+    requires_diffusers,
+    requires_torch,
+)
+from onnx_diagnostic.torch_models.hghub.model_inputs import get_untrained_model_with_inputs
+from onnx_diagnostic.torch_export_patches import torch_export_patches
+from onnx_diagnostic.torch_export_patches.patch_inputs import use_dyn_not_str
+
+
+class TestTasksImageToVideo(ExtTestCase):
+    @hide_stdout()
+    @requires_diffusers("0.35")
+    @requires_torch("2.8.99")
+    def test_cosmos_predict(self):
+        kwargs = {
+            "_diffusers_version": "0.34.0.dev0",
+            "_class_name": "CosmosTransformer3DModel",
+            "max_size": [128, 240, 240],
+            "text_embed_dim": 128,
+            "use_cache": True,
+            "in_channels": 3,
+            "out_channels": 16,
+            "num_layers": 2,
+            "model_type": "dia",
+            "patch_size": [1, 2, 2],
+            "rope_scale": [1.0, 3.0, 3.0],
+            "attention_head_dim": 16,
+            "mlp_ratio": 0.4,
+            "initializer_range": 0.02,
+            "num_attention_heads": 16,
+            "is_encoder_decoder": True,
+            "adaln_lora_dim": 16,
+            "concat_padding_mask": True,
+            "extra_pos_embed_type": None,
+        }
+        config = transformers.DiaConfig(**kwargs)
+        mid = "nvidia/Cosmos-Predict2-2B-Video2World"
+        data = get_untrained_model_with_inputs(
+            mid,
+            verbose=1,
+            add_second_input=True,
+            subfolder="transformer",
+            config=config,
+            inputs_kwargs=dict(image_height=8 * 50, image_width=8 * 80),
+        )
+        self.assertEqual(data["task"], "image-to-video")
+        model, inputs, ds = data["model"], data["inputs"], data["dynamic_shapes"]
+        model(**inputs)
+        model(**data["inputs2"])
+        with torch.fx.experimental._config.patch(
+            backed_size_oblivious=True
+        ), torch_export_patches(
+            patch_transformers=True, patch_diffusers=True, verbose=10, stop_if_static=1
+        ):
+            torch.export.export(
+                model, (), kwargs=inputs, dynamic_shapes=use_dyn_not_str(ds), strict=False
+            )
+
+
+if __name__ == "__main__":
+    unittest.main(verbosity=2)
diff --git a/onnx_diagnostic/torch_models/hghub/model_inputs.py b/onnx_diagnostic/torch_models/hghub/model_inputs.py
index e08902a3..60cada10 100644
--- a/onnx_diagnostic/torch_models/hghub/model_inputs.py
+++ b/onnx_diagnostic/torch_models/hghub/model_inputs.py
@@ -282,7 +282,7 @@ def get_untrained_model_with_inputs(
 
     # This line is important. Some models may produce different
     # outputs even with the same inputs in training mode.
-    model.eval()
+    model.eval()  # type: ignore[uion-attr]
     res = fct(model, config, add_second_input=add_second_input, **kwargs)
 
     res["input_kwargs"] = kwargs

From c7b4be591836070534de33253019b4335cfbf8c7 Mon Sep 17 00:00:00 2001
From: xadupre <xadupre@microsoft.com>
Date: Fri, 19 Sep 2025 18:06:06 +0200
Subject: [PATCH 4/6] mypy

---
 onnx_diagnostic/torch_models/hghub/hub_data.py     | 1 +
 onnx_diagnostic/torch_models/hghub/model_inputs.py | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/onnx_diagnostic/torch_models/hghub/hub_data.py b/onnx_diagnostic/torch_models/hghub/hub_data.py
index 9d5b3716..d22c0f25 100644
--- a/onnx_diagnostic/torch_models/hghub/hub_data.py
+++ b/onnx_diagnostic/torch_models/hghub/hub_data.py
@@ -30,6 +30,7 @@
     ConvBertModel,feature-extraction
     ConvNextForImageClassification,image-classification
     ConvNextV2Model,image-feature-extraction
+    CosmosTransformer3DModel,image-to-video,
     CvtModel,feature-extraction
     DPTModel,image-feature-extraction
     Data2VecAudioModel,feature-extraction
diff --git a/onnx_diagnostic/torch_models/hghub/model_inputs.py b/onnx_diagnostic/torch_models/hghub/model_inputs.py
index 60cada10..3ab2ec5f 100644
--- a/onnx_diagnostic/torch_models/hghub/model_inputs.py
+++ b/onnx_diagnostic/torch_models/hghub/model_inputs.py
@@ -282,7 +282,7 @@ def get_untrained_model_with_inputs(
 
     # This line is important. Some models may produce different
     # outputs even with the same inputs in training mode.
-    model.eval()  # type: ignore[uion-attr]
+    model.eval()  # type: ignore[union-attr]
     res = fct(model, config, add_second_input=add_second_input, **kwargs)
 
     res["input_kwargs"] = kwargs

From 46e24d35955c4abb2cc3906cf19f3b88127f2345 Mon Sep 17 00:00:00 2001
From: xadupre <xadupre@microsoft.com>
Date: Fri, 19 Sep 2025 18:18:20 +0200
Subject: [PATCH 5/6] remove comma

---
 onnx_diagnostic/torch_models/hghub/hub_data.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/onnx_diagnostic/torch_models/hghub/hub_data.py b/onnx_diagnostic/torch_models/hghub/hub_data.py
index d22c0f25..299c37eb 100644
--- a/onnx_diagnostic/torch_models/hghub/hub_data.py
+++ b/onnx_diagnostic/torch_models/hghub/hub_data.py
@@ -30,7 +30,7 @@
     ConvBertModel,feature-extraction
     ConvNextForImageClassification,image-classification
     ConvNextV2Model,image-feature-extraction
-    CosmosTransformer3DModel,image-to-video,
+    CosmosTransformer3DModel,image-to-video
     CvtModel,feature-extraction
     DPTModel,image-feature-extraction
     Data2VecAudioModel,feature-extraction

From b4e4c534f8e3ba66fc33a1431b5d384aab99be2b Mon Sep 17 00:00:00 2001
From: xadupre <xadupre@microsoft.com>
Date: Fri, 19 Sep 2025 18:26:47 +0200
Subject: [PATCH 6/6] too old

---
 _unittests/ut_tasks/test_tasks_image_to_video.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/_unittests/ut_tasks/test_tasks_image_to_video.py b/_unittests/ut_tasks/test_tasks_image_to_video.py
index 4d31c8bc..dc40697f 100644
--- a/_unittests/ut_tasks/test_tasks_image_to_video.py
+++ b/_unittests/ut_tasks/test_tasks_image_to_video.py
@@ -6,6 +6,7 @@
     hide_stdout,
     requires_diffusers,
     requires_torch,
+    requires_transformers,
 )
 from onnx_diagnostic.torch_models.hghub.model_inputs import get_untrained_model_with_inputs
 from onnx_diagnostic.torch_export_patches import torch_export_patches
@@ -15,8 +16,9 @@
 class TestTasksImageToVideo(ExtTestCase):
     @hide_stdout()
     @requires_diffusers("0.35")
+    @requires_transformers("4.55")
     @requires_torch("2.8.99")
-    def test_cosmos_predict(self):
+    def test_image_to_video(self):
         kwargs = {
             "_diffusers_version": "0.34.0.dev0",
             "_class_name": "CosmosTransformer3DModel",