sdpython · sdpython · Mar 31, 2025 · Mar 31, 2025 · Mar 31, 2025 · Mar 31, 2025
diff --git a/CHANGELOGS.rst b/CHANGELOGS.rst
@@ -4,6 +4,8 @@ Change Logs
 0.3.0
 +++++
 
+* :pr:`28`: adds command line to print out the configuration for a model id,
+  support image-text-to-text
 * :pr:`26`: creates a folder ``helpers`` to gather all the functions
   used in many places
 * :pr:`25`: improve patches for DynamicCache
@@ -12,7 +14,7 @@ Change Logs
   ``convert_dynamic_axes_into_dynamic_shapes`` to convert dynamic axes
   into dynamic shapes, add support for ``T5ForConditionalGeneration``
 * :pr:`23`: dummy inputs for ``image-classification``
-* :pr:`22`: api to create untrained model copying the architecture
+* :pr:`22`, :pr:`27`: api to create untrained model copying the architecture
   of the trained models and dummy inputs for them,
   support for ``text-generation``
 

diff --git a/_doc/cmds/config.rst b/_doc/cmds/config.rst
@@ -0,0 +1,88 @@
+-m onnx_diagnostic config ... prints the config for a model id
+==============================================================
+
+Description
++++++++++++
+
+The command lines prints out the configuration file a model id
+available on :epkg:`HuggingFace`.
+
+.. runpython::
+
+    from onnx_diagnostic._command_lines_parser import get_parser_config
+
+    get_parser_config().print_help()
+
+Example
++++++++
+
+.. code-block:: bash
+
+    python -m onnx_diagnostic config HuggingFaceM4/tiny-random-idefics
+
+.. code-block:: text
+
+    IdeficsConfig {
+    "additional_vocab_size": 2,
+    "alpha_initializer": "ones",
+    "alpha_type": "vector",
+    "alphas_initializer_range": 0.0,
+    "architectures": [
+        "IdeficsForVisionText2Text"
+    ],
+    "bos_token_id": 1,
+    "cross_layer_activation_function": "swiglu",
+    "cross_layer_interval": 1,
+    "dropout": 0.0,
+    "eos_token_id": 2,
+    "ffn_dim": 64,
+    "freeze_lm_head": false,
+    "freeze_text_layers": false,
+    "freeze_text_module_exceptions": [],
+    "freeze_vision_layers": false,
+    "freeze_vision_module_exceptions": [],
+    "hidden_act": "silu",
+    "hidden_size": 16,
+    "initializer_range": 0.02,
+    "intermediate_size": 11008,
+    "max_new_tokens": 128,
+    "max_position_embeddings": 128,
+    "model_type": "idefics",
+    "num_attention_heads": 4,
+    "num_hidden_layers": 2,
+    "pad_token_id": 0,
+    "perceiver_config": {
+        "model_type": "idefics_perciever",
+        "qk_layer_norms_perceiver": false,
+        "resampler_depth": 2,
+        "resampler_head_dim": 8,
+        "resampler_n_heads": 2,
+        "resampler_n_latents": 16,
+        "use_resampler": false
+    },
+    "qk_layer_norms": false,
+    "rms_norm_eps": 1e-06,
+    "tie_word_embeddings": false,
+    "torch_dtype": "float16",
+    "transformers_version": "4.51.0.dev0",
+    "use_cache": true,
+    "use_resampler": true,
+    "vision_config": {
+        "attention_dropout": 0.0,
+        "embed_dim": 32,
+        "hidden_act": "gelu",
+        "image_size": 30,
+        "initializer_factor": 1.0,
+        "initializer_range": 0.02,
+        "intermediate_size": 37,
+        "layer_norm_eps": 1e-05,
+        "model_type": "idefics_vision",
+        "num_attention_heads": 4,
+        "num_channels": 3,
+        "num_hidden_layers": 5,
+        "patch_size": 2,
+        "vision_model_name": "hf-internal-testing/tiny-random-clip"
+    },
+    "vocab_size": 32000,
+    "word_embed_proj_dim": 16
+    }
diff --git a/_doc/cmds/index.rst b/_doc/cmds/index.rst
@@ -0,0 +1,11 @@
+Command Lines
+=============
+
+.. code-block:: bash
+
+    python -m onnx_diagnostic
+
+.. toctree::
+    :maxdepth: 1
+
+    config
diff --git a/_doc/index.rst b/_doc/index.rst
@@ -29,6 +29,7 @@ Source are `sdpython/onnx-diagnostic
     :caption: Contents
 
     api/index
+    cmds/index
     auto_examples/index
 
 .. toctree::

diff --git a/_unittests/ut_torch_models/test_hghub_model.py b/_unittests/ut_torch_models/test_hghub_model.py
@@ -100,6 +100,15 @@ def test_get_untrained_model_with_inputs_text2text_generation(self):
         raise unittest.SkipTest(f"not working for {mid!r}")
         model(**inputs)
 
+    @hide_stdout()
+    def test_get_untrained_model_with_inputs_imagetext2text_generation(self):
+        mid = "HuggingFaceM4/tiny-random-idefics"
+        # mid = "Salesforce/codet5-small"
+        data = get_untrained_model_with_inputs(mid, verbose=1)
+        self.assertIn((data["size"], data["n_weights"]), [(12742888, 3185722)])
+        model, inputs = data["model"], data["inputs"]
+        model(**inputs)
+
     @hide_stdout()
     @requires_torch("2.7", "reduce test time")
     @requires_transformers("4.50", "reduce test time")

diff --git a/_unittests/ut_torch_models/try_tasks.py b/_unittests/ut_torch_models/try_tasks.py
@@ -6,7 +6,7 @@
 
 class TestHuggingFaceHubModel(ExtTestCase):
     @never_test()
-    def test_image_classiciation(self):
+    def test_image_classification(self):
         # clear&&NEVERTEST=1 python _unittests/ut_torch_models/try_tasks.py -k image_c
 
         from transformers import ViTImageProcessor, ViTModel
@@ -51,6 +51,37 @@ def test_text2text_generation(self):
             )
         print(tokenizer.decode(generated_ids[0], skip_special_tokens=True))
 
+    @never_test()
+    def test_imagetext2text_generation(self):
+        # clear&&NEVERTEST=1 python _unittests/ut_torch_models/try_tasks.py -k etext2t
+        # https://huggingface.co/docs/transformers/main/en/tasks/idefics
+
+        import torch
+        from transformers import IdeficsForVisionText2Text, AutoProcessor
+
+        mid = "HuggingFaceM4/tiny-random-idefics"
+        processor = AutoProcessor.from_pretrained(mid)
+        model = IdeficsForVisionText2Text.from_pretrained(
+            mid, torch_dtype=torch.bfloat16, device_map="auto"
+        )
+
+        prompt = [
+            "https://images.unsplash.com/photo-1583160247711-2191776b4b91?ixlib=rb-4.0.3"
+            "&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=3542&q=80",
+        ]
+        inputs = processor(text=prompt, return_tensors="pt").to("cuda")
+        bad_words_ids = processor.tokenizer(
+            ["<image>", "<fake_token_around_image>"], add_special_tokens=False
+        ).input_ids
+        print()
+        with steel_forward(model):
+            generated_ids = model.generate(
+                **inputs, max_new_tokens=10, bad_words_ids=bad_words_ids
+            )
+        generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)
+
+        print(generated_text[0])
+
 
 if __name__ == "__main__":
     unittest.main(verbosity=2)
diff --git a/_unittests/ut_xrun_doc/test_command_lines.py b/_unittests/ut_xrun_doc/test_command_lines.py
@@ -0,0 +1,60 @@
+import unittest
+from contextlib import redirect_stdout
+from io import StringIO
+from onnx_diagnostic.ext_test_case import ExtTestCase
+from onnx_diagnostic._command_lines_parser import (
+    get_main_parser,
+    get_parser_find,
+    get_parser_lighten,
+    get_parser_print,
+    get_parser_unlighten,
+    get_parser_config,
+)
+
+
+class TestCommandLines(ExtTestCase):
+    def test_main_parser(self):
+        st = StringIO()
+        with redirect_stdout(st):
+            get_main_parser().print_help()
+        text = st.getvalue()
+        self.assertIn("lighten", text)
+
+    def test_parser_lighten(self):
+        st = StringIO()
+        with redirect_stdout(st):
+            get_parser_lighten().print_help()
+        text = st.getvalue()
+        self.assertIn("model", text)
+
+    def test_parser_unlighten(self):
+        st = StringIO()
+        with redirect_stdout(st):
+            get_parser_unlighten().print_help()
+        text = st.getvalue()
+        self.assertIn("model", text)
+
+    def test_parser_print(self):
+        st = StringIO()
+        with redirect_stdout(st):
+            get_parser_print().print_help()
+        text = st.getvalue()
+        self.assertIn("pretty", text)
+
+    def test_parser_find(self):
+        st = StringIO()
+        with redirect_stdout(st):
+            get_parser_find().print_help()
+        text = st.getvalue()
+        self.assertIn("names", text)
+
+    def test_parser_config(self):
+        st = StringIO()
+        with redirect_stdout(st):
+            get_parser_config().print_help()
+        text = st.getvalue()
+        self.assertIn("mid", text)
+
+
+if __name__ == "__main__":
+    unittest.main(verbosity=2)
diff --git a/onnx_diagnostic/__main__.py b/onnx_diagnostic/__main__.py
@@ -0,0 +1,4 @@
+from ._command_lines_parser import main
+
+if __name__ == "__main__":
+    main()