sdpython
diff --git a/‎.github/workflows/documentation.yml‎
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/documentation.yml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎CHANGELOGS.rst‎
Lines changed: 3 additions & 0 deletions b/‎CHANGELOGS.rst‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎_scripts/compare_model_execution.py‎
Lines changed: 131 additions & 0 deletions b/‎_scripts/compare_model_execution.py‎
Lines changed: 131 additions & 0 deletions
diff --git a/‎_unittests/ut_tasks/test_tasks_image_to_video.py‎
Lines changed: 68 additions & 0 deletions b/‎_unittests/ut_tasks/test_tasks_image_to_video.py‎
Lines changed: 68 additions & 0 deletions
diff --git a/‎onnx_diagnostic/_command_lines_parser.py‎
Lines changed: 8 additions & 1 deletion b/‎onnx_diagnostic/_command_lines_parser.py‎
Lines changed: 8 additions & 1 deletion
diff --git a/‎onnx_diagnostic/helpers/cache_helper.py‎
Lines changed: 8 additions & 6 deletions b/‎onnx_diagnostic/helpers/cache_helper.py‎
Lines changed: 8 additions & 6 deletions
diff --git a/‎onnx_diagnostic/tasks/__init__.py‎
Lines changed: 4 additions & 2 deletions b/‎onnx_diagnostic/tasks/__init__.py‎
Lines changed: 4 additions & 2 deletions
@@ -118,9 +118,9 @@ jobs:
             grep ERROR doc.txt | grep -v 'l-plot-tiny-llm-export'
             exit 1
           fi
-          if [[ $(grep WARNING doc.txt | grep -v 'l-plot-tiny-llm-export' | grep -v 'Inline emphasis start-string' | grep -v 'Definition list ends without a blank line' | grep -v 'Unexpected section title or transition' | grep -v 'Inline strong start-string') ]]; then
+          if [[ $(grep WARNING doc.txt | grep -v 'l-plot-tiny-llm-export' | grep -v 'Inline emphasis start-string' | grep -v 'Definition list ends without a blank line' | grep -v 'Unexpected section title or transition' | grep -v 'Inline strong start-string' | grep -v 'MambaCache') ]]; then
             echo "Documentation produces warnings."
-            grep WARNING doc.txt | grep -v 'l-plot-tiny-llm-export' | grep -v 'Inline emphasis start-string' | grep -v 'Definition list ends without a blank line' | grep -v 'Unexpected section title or transition' | grep -v 'Inline strong start-string'
+            grep WARNING doc.txt | grep -v 'l-plot-tiny-llm-export' | grep -v 'Inline emphasis start-string' | grep -v 'Definition list ends without a blank line' | grep -v 'Unexpected section title or transition' | grep -v 'Inline strong start-string' | grep -v 'MambaCache'
             exit 1
           fi
 
 
@@ -4,6 +4,9 @@ Change Logs
 0.7.11
 ++++++
 
+* :pr:`224`: support model_id with // to specify a subfolder 
+* :pr:`223`: adds task image-to-video
+* :pr:`220`: adds option --ort-logs to display onnxruntime logs when creating the session
 * :pr:`220`: adds a patch for PR `#40791 <https://github.com/huggingface/transformers/pull/40791>`_ in transformers
 
 0.7.10
 
@@ -0,0 +1,131 @@
+"""
+Compares two ONNX models.
+"""
+
+print("-- import onnx")
+import onnx
+
+print("-- import onnx.helper")
+from onnx.helper import tensor_dtype_to_np_dtype
+
+print("-- import onnxruntime")
+import onnxruntime
+
+print("-- import torch")
+import torch
+
+print("-- import transformers")
+import transformers
+
+print("-- import huggingface_hub")
+import huggingface_hub
+
+print("-- import onnx-diagnostic.helper")
+from onnx_diagnostic.helpers.helper import flatten_object, string_type, max_diff, string_diff
+
+print("-- import onnx-diagnostic.torch_models.hghub")
+from onnx_diagnostic.torch_models.hghub import get_untrained_model_with_inputs
+
+print("-- done")
+
+model_id = "arnir0/Tiny-LLM"
+onnx1 = (
+    "dump_test/arnir0_Tiny-LLM-custom-default-f16-cuda-op20/"
+    "arnir0_Tiny-LLM-custom-default-f16-cuda-op20.onnx"
+)
+onnx2 = (
+    "dump_test/arnir0_Tiny-LLM-custom-default-f16-cuda-op21/"
+    "arnir0_Tiny-LLM-custom-default-f16-cuda-op21.onnx"
+)
+providers = ["CUDAExecutionProvider", "CPUExecutionProvider"]
+
+print(f"-- load {onnx1!r}")
+onx1 = onnx.load(onnx1)
+print(f"-- load {onnx2!r}")
+onx2 = onnx.load(onnx2)
+
+print(f"-- getting inputs for model_id {model_id!r}")
+data = get_untrained_model_with_inputs(model_id)
+inputs = data["inputs"]
+print(f"-- inputs: {string_type(inputs, with_shape=True)}")
+flatten_inputs = flatten_object(inputs, drop_keys=True)
+print(f"-- flat inputs: {string_type(flatten_inputs, with_shape=True)}")
+
+names = [i.name for i in onx1.graph.input]
+itypes = [i.type.tensor_type.elem_type for i in onx1.graph.input]
+assert names == [
+    i.name for i in onx2.graph.input
+], f"Not the same names for both models {names} != {[i.name for i in onx2.graph.input]}"
+feeds = {
+    n: t.numpy().astype(tensor_dtype_to_np_dtype(itype))
+    for n, itype, t in zip(names, itypes, flatten_inputs)
+}
+print(f"-- feeds: {string_type(feeds, with_shape=True)}")
+
+print(f"-- creating session 1 from {onnx1!r}")
+opts = onnxruntime.SessionOptions()
+opts.optimized_model_filepath = "debug1_full.onnx"
+opts.log_severity_level = 0
+opts.log_verbosity_level = 0
+sess1 = onnxruntime.InferenceSession(onnx1, opts, providers=providers)
+print(f"-- creating session 2 from {onnx2!r}")
+opts.optimized_model_filepath = "debug2_full.onnx"
+opts.log_severity_level = 0
+opts.log_verbosity_level = 0
+sess2 = onnxruntime.InferenceSession(onnx2, opts, providers=providers)
+
+print("-- run session1")
+expected1 = sess1.run(None, feeds)
+print(f"-- got {string_type(expected1, with_shape=True)}")
+print("-- run session2")
+expected2 = sess2.run(None, feeds)
+print(f"-- got {string_type(expected2, with_shape=True)}")
+
+print("-- compute differences")
+diff = max_diff(expected1, expected2)
+print(f"-- diff={string_diff(diff)}")
+
+
+def get_names(onx: onnx.ModelProto) -> list[str]:
+    names = []
+    for node in onx.graph.node:
+        for o in node.output:
+            names.append((o, node.op_type, node.name))
+    return names
+
+
+if diff["abs"] > 0.1:
+    print("--")
+    print("-- import select_model_inputs_outputs")
+    from onnx_extended.tools.onnx_nodes import select_model_inputs_outputs
+
+    print("-- looking into intermediate results")
+    names1 = get_names(onx1)
+    names2 = get_names(onx1)
+    common = [n for n in names1 if n in (set(names1) & set(names2))]
+    print(f"-- {len(common)} names / {len(names1)}-{len(names2)}")
+    print(f"-- first names {common[:5]}")
+    for name, op_type, op_name in common:
+        x1 = select_model_inputs_outputs(onx1, [name])
+        x2 = select_model_inputs_outputs(onx2, [name])
+        s1 = onnxruntime.InferenceSession(x1.SerializeToString(), providers=providers)
+        s2 = onnxruntime.InferenceSession(x2.SerializeToString(), providers=providers)
+        e1 = s1.run(None, feeds)
+        e2 = s2.run(None, feeds)
+        diff = max_diff(e1, e2)
+        print(
+            f"-- name={name!r}: diff={string_diff(diff)} "
+            f"- op_type={op_type!r}, op_name={op_name!r}"
+        )
+        if diff["abs"] > 0.1:
+            opts = onnxruntime.SessionOptions()
+            opts.optimized_model_filepath = "debug1.onnx"
+            onnxruntime.InferenceSession(x1.SerializeToString(), opts, providers=providers)
+            opts.optimized_model_filepath = "debug2.onnx"
+            onnxruntime.InferenceSession(x2.SerializeToString(), opts, providers=providers)
+            print("--")
+            print("-- break here")
+            print(f"-- feeds {string_type(feeds, with_shape=True)}")
+            print(f"-- e1={string_type(e1, with_shape=True, with_min_max=True)}")
+            print(f"-- e2={string_type(e2, with_shape=True, with_min_max=True)}")
+            break
@@ -0,0 +1,68 @@
+import unittest
+import torch
+import transformers
+from onnx_diagnostic.ext_test_case import (
+    ExtTestCase,
+    hide_stdout,
+    requires_diffusers,
+    requires_torch,
+    requires_transformers,
+)
+from onnx_diagnostic.torch_models.hghub.model_inputs import get_untrained_model_with_inputs
+from onnx_diagnostic.torch_export_patches import torch_export_patches
+from onnx_diagnostic.torch_export_patches.patch_inputs import use_dyn_not_str
+
+
+class TestTasksImageToVideo(ExtTestCase):
+    @hide_stdout()
+    @requires_diffusers("0.35")
+    @requires_transformers("4.55")
+    @requires_torch("2.8.99")
+    def test_image_to_video(self):
+        kwargs = {
+            "_diffusers_version": "0.34.0.dev0",
+            "_class_name": "CosmosTransformer3DModel",
+            "max_size": [128, 240, 240],
+            "text_embed_dim": 128,
+            "use_cache": True,
+            "in_channels": 3,
+            "out_channels": 16,
+            "num_layers": 2,
+            "model_type": "dia",
+            "patch_size": [1, 2, 2],
+            "rope_scale": [1.0, 3.0, 3.0],
+            "attention_head_dim": 16,
+            "mlp_ratio": 0.4,
+            "initializer_range": 0.02,
+            "num_attention_heads": 16,
+            "is_encoder_decoder": True,
+            "adaln_lora_dim": 16,
+            "concat_padding_mask": True,
+            "extra_pos_embed_type": None,
+        }
+        config = transformers.DiaConfig(**kwargs)
+        mid = "nvidia/Cosmos-Predict2-2B-Video2World"
+        data = get_untrained_model_with_inputs(
+            mid,
+            verbose=1,
+            add_second_input=True,
+            subfolder="transformer",
+            config=config,
+            inputs_kwargs=dict(image_height=8 * 50, image_width=8 * 80),
+        )
+        self.assertEqual(data["task"], "image-to-video")
+        model, inputs, ds = data["model"], data["inputs"], data["dynamic_shapes"]
+        model(**inputs)
+        model(**data["inputs2"])
+        with torch.fx.experimental._config.patch(
+            backed_size_oblivious=True
+        ), torch_export_patches(
+            patch_transformers=True, patch_diffusers=True, verbose=10, stop_if_static=1
+        ):
+            torch.export.export(
+                model, (), kwargs=inputs, dynamic_shapes=use_dyn_not_str(ds), strict=False
+            )
+
+
+if __name__ == "__main__":
+    unittest.main(verbosity=2)
@@ -474,7 +474,7 @@ def get_parser_validate() -> ArgumentParser:
     )
     parser.add_argument(
         "--runtime",
-        choices=["onnxruntime", "torch", "ref"],
+        choices=["onnxruntime", "torch", "ref", "orteval", "orteval10"],
         default="onnxruntime",
         help="onnx runtime to use, `onnxruntime` by default",
     )
@@ -542,6 +542,12 @@ def get_parser_validate() -> ArgumentParser:
         "the onnx exporter should use.",
         default="",
     )
+    parser.add_argument(
+        "--ort-logs",
+        default=False,
+        action=BooleanOptionalAction,
+        help="Enables onnxruntime logging when the session is created",
+    )
     return parser
 
 
@@ -601,6 +607,7 @@ def _cmd_validate(argv: List[Any]):
             repeat=args.repeat,
             warmup=args.warmup,
             inputs2=args.inputs2,
+            ort_logs=args.ort_logs,
             output_names=(
                 None if len(args.outnames.strip()) < 2 else args.outnames.strip().split(",")
             ),
 
@@ -4,11 +4,6 @@
 import transformers
 import transformers.cache_utils
 
-try:
-    from transformers.models.mamba.modeling_mamba import MambaCache
-except ImportError:
-    from transformers.cache_utils import MambaCache
-
 
 class CacheKeyValue:
     """
@@ -354,8 +349,15 @@ def make_encoder_decoder_cache(
     )
 
 
-def make_mamba_cache(key_value_pairs: List[Tuple[torch.Tensor, torch.Tensor]]) -> MambaCache:
+def make_mamba_cache(
+    key_value_pairs: List[Tuple[torch.Tensor, torch.Tensor]],
+) -> "MambaCache":  # noqa: F821
     "Creates a ``MambaCache``."
+    # import is moved here because this part is slow.
+    try:
+        from transformers.models.mamba.modeling_mamba import MambaCache
+    except ImportError:
+        from transformers.cache_utils import MambaCache
     dtype = key_value_pairs[0][0].dtype
 
     class _config:
 
@@ -5,6 +5,8 @@
     fill_mask,
     image_classification,
     image_text_to_text,
+    image_to_video,
+    mask_generation,
     mixture_of_expert,
     object_detection,
     sentence_similarity,
@@ -14,7 +16,6 @@
     text_to_image,
     text2text_generation,
     zero_shot_image_classification,
-    mask_generation,
 )
 
 __TASKS__ = [
@@ -23,6 +24,8 @@
     fill_mask,
     image_classification,
     image_text_to_text,
+    image_to_video,
+    mask_generation,
     mixture_of_expert,
     object_detection,
     sentence_similarity,
@@ -32,7 +35,6 @@
     text_to_image,
     text2text_generation,
     zero_shot_image_classification,
-    mask_generation,
 ]