add second input

xadupre · xadupre · commit 75456d761b33 · 2025-04-24T17:09:24.000+02:00
diff --git a/_doc/api/tasks/index.rst b/_doc/api/tasks/index.rst
@@ -9,8 +9,11 @@ All submodules contains the three following functions:
 * ``random_input_kwargs(config) -> kwargs, get_inputs``:
   produces values ``get_inputs`` can take to generate dummy inputs
   suitable for a model defined by its configuration
-* ``get_inputs(model, config, *args, **kwargs) -> dict(inputs=..., dynamic_shapes=...)``:
-  generates the dummy inputs and dynamic shapes for a specific model and configuration.
+* ``get_inputs(model, config, *args, add_second_input=False, **kwargs) -> dict(inputs=..., dynamic_shapes=...)``:
+  generates the dummy inputs and dynamic shapes for a specific model and configuration,
+  if ``add_second_input`` is True, the function should return a different set of inputs,
+  with different values for the dynamic dimension. This is usually better to
+  rely on the function as the dynamic dimensions may be correlated.
 
 For a specific task, you would write:
 
diff --git a/_unittests/ut_tasks/test_tasks.py b/_unittests/ut_tasks/test_tasks.py
@@ -10,11 +10,41 @@ class TestTasks(ExtTestCase):
     @hide_stdout()
     def test_text2text_generation(self):
         mid = "sshleifer/tiny-marian-en-de"
-        data = get_untrained_model_with_inputs(mid, verbose=1)
+        data = get_untrained_model_with_inputs(mid, verbose=1, add_second_input=True)
+        self.assertEqual(data["task"], "text2text-generation")
         self.assertIn((data["size"], data["n_weights"]), [(473928, 118482)])
         model, inputs, ds = data["model"], data["inputs"], data["dynamic_shapes"]
         raise unittest.SkipTest(f"not working for {mid!r}")
         model(**inputs)
+        model(**data["inputs2"])
+        with bypass_export_some_errors(patch_transformers=True, verbose=10):
+            torch.export.export(
+                model, (), kwargs=inputs, dynamic_shapes=use_dyn_not_str(ds), strict=False
+            )
+
+    @hide_stdout()
+    def test_text_generation(self):
+        mid = "arnir0/Tiny-LLM"
+        data = get_untrained_model_with_inputs(mid, verbose=1, add_second_input=True)
+        self.assertEqual(data["task"], "text-generation")
+        self.assertIn((data["size"], data["n_weights"]), [(51955968, 12988992)])
+        model, inputs, ds = data["model"], data["inputs"], data["dynamic_shapes"]
+        model(**inputs)
+        model(**data["inputs2"])
+        with bypass_export_some_errors(patch_transformers=True, verbose=10):
+            torch.export.export(
+                model, (), kwargs=inputs, dynamic_shapes=use_dyn_not_str(ds), strict=False
+            )
+
+    @hide_stdout()
+    def test_image_classification(self):
+        mid = "hf-internal-testing/tiny-random-BeitForImageClassification"
+        data = get_untrained_model_with_inputs(mid, verbose=1, add_second_input=True)
+        self.assertEqual(data["task"], "image-classification")
+        self.assertIn((data["size"], data["n_weights"]), [(56880, 14220)])
+        model, inputs, ds = data["model"], data["inputs"], data["dynamic_shapes"]
+        model(**inputs)
+        model(**data["inputs2"])
         with bypass_export_some_errors(patch_transformers=True, verbose=10):
             torch.export.export(
                 model, (), kwargs=inputs, dynamic_shapes=use_dyn_not_str(ds), strict=False
@@ -23,9 +53,11 @@ def test_text2text_generation(self):
     @hide_stdout()
     def test_automatic_speech_recognition(self):
         mid = "openai/whisper-tiny"
-        data = get_untrained_model_with_inputs(mid, verbose=1)
+        data = get_untrained_model_with_inputs(mid, verbose=1, add_second_input=True)
+        self.assertEqual(data["task"], "automatic-speech-recognition")
         self.assertIn((data["size"], data["n_weights"]), [(132115968, 33028992)])
         model, inputs, ds = data["model"], data["inputs"], data["dynamic_shapes"]
+        model(**data["inputs2"])
         Dim = torch.export.Dim
         self.maxDiff = None
         self.assertIn("{0:Dim(batch),1:DYN(seq_length)}", self.string_type(ds))
@@ -91,13 +123,15 @@ def test_automatic_speech_recognition(self):
             )
 
     @hide_stdout()
-    def test_imagetext2text_generation(self):
+    def test_image_text_to_text(self):
         mid = "HuggingFaceM4/tiny-random-idefics"
-        data = get_untrained_model_with_inputs(mid, verbose=1)
+        data = get_untrained_model_with_inputs(mid, verbose=1, add_second_input=True)
+        self.assertEqual(data["task"], "image-text-to-text")
         self.assertIn((data["size"], data["n_weights"]), [(12742888, 3185722)])
         model, inputs, ds = data["model"], data["inputs"], data["dynamic_shapes"]
         model(**inputs)
-        if not has_torch("2.10"):
+        model(**data["inputs2"])
+        if not has_torch("2.8"):
             raise unittest.SkipTest("sym_max does not work with dynamic dimension")
         with bypass_export_some_errors(patch_transformers=True, verbose=10):
             torch.export.export(
@@ -107,10 +141,12 @@ def test_imagetext2text_generation(self):
     @hide_stdout()
     def test_fill_mask(self):
         mid = "google-bert/bert-base-multilingual-cased"
-        data = get_untrained_model_with_inputs(mid, verbose=1)
+        data = get_untrained_model_with_inputs(mid, verbose=1, add_second_input=True)
+        self.assertEqual(data["task"], "fill-mask")
         self.assertIn((data["size"], data["n_weights"]), [(428383212, 107095803)])
         model, inputs, ds = data["model"], data["inputs"], data["dynamic_shapes"]
         model(**inputs)
+        model(**data["inputs2"])
         with bypass_export_some_errors(patch_transformers=True, verbose=10):
             torch.export.export(
                 model, (), kwargs=inputs, dynamic_shapes=use_dyn_not_str(ds), strict=False
@@ -119,10 +155,12 @@ def test_fill_mask(self):
     @hide_stdout()
     def test_feature_extraction(self):
         mid = "facebook/bart-base"
-        data = get_untrained_model_with_inputs(mid, verbose=1)
+        data = get_untrained_model_with_inputs(mid, verbose=1, add_second_input=True)
+        self.assertEqual(data["task"], "feature-extraction")
         self.assertIn((data["size"], data["n_weights"]), [(557681664, 139420416)])
         model, inputs, ds = data["model"], data["inputs"], data["dynamic_shapes"]
         model(**inputs)
+        model(**data["inputs2"])
         with bypass_export_some_errors(patch_transformers=True, verbose=10):
             torch.export.export(
                 model, (), kwargs=inputs, dynamic_shapes=use_dyn_not_str(ds), strict=False
@@ -131,10 +169,12 @@ def test_feature_extraction(self):
     @hide_stdout()
     def test_text_classification(self):
         mid = "Intel/bert-base-uncased-mrpc"
-        data = get_untrained_model_with_inputs(mid, verbose=1)
+        data = get_untrained_model_with_inputs(mid, verbose=1, add_second_input=True)
+        self.assertEqual(data["task"], "text-classification")
         self.assertIn((data["size"], data["n_weights"]), [(154420232, 38605058)])
         model, inputs, ds = data["model"], data["inputs"], data["dynamic_shapes"]
         model(**inputs)
+        model(**data["inputs2"])
         with bypass_export_some_errors(patch_transformers=True, verbose=10):
             torch.export.export(
                 model, (), kwargs=inputs, dynamic_shapes=use_dyn_not_str(ds), strict=False
@@ -143,10 +183,12 @@ def test_text_classification(self):
     @hide_stdout()
     def test_sentence_similary(self):
         mid = "sentence-transformers/all-MiniLM-L6-v1"
-        data = get_untrained_model_with_inputs(mid, verbose=1)
+        data = get_untrained_model_with_inputs(mid, verbose=1, add_second_input=True)
+        self.assertEqual(data["task"], "sentence-similarity")
         self.assertIn((data["size"], data["n_weights"]), [(62461440, 15615360)])
         model, inputs, ds = data["model"], data["inputs"], data["dynamic_shapes"]
         model(**inputs)
+        model(**data["inputs2"])
         with bypass_export_some_errors(patch_transformers=True, verbose=10):
             torch.export.export(
                 model, (), kwargs=inputs, dynamic_shapes=use_dyn_not_str(ds), strict=False
@@ -155,9 +197,11 @@ def test_sentence_similary(self):
     @hide_stdout()
     def test_falcon_mamba_dev(self):
         mid = "tiiuae/falcon-mamba-tiny-dev"
-        data = get_untrained_model_with_inputs(mid, verbose=1)
+        data = get_untrained_model_with_inputs(mid, verbose=1, add_second_input=True)
+        self.assertEqual(data["task"], "text-generation")
         model, inputs, ds = data["model"], data["inputs"], data["dynamic_shapes"]
         model(**inputs)
+        model(**data["inputs2"])
         self.assertIn((data["size"], data["n_weights"]), [(138640384, 34660096)])
         if not has_transformers("4.55"):
             raise unittest.SkipTest("The model has control flow.")
@@ -166,6 +210,20 @@ def test_falcon_mamba_dev(self):
                 model, (), kwargs=inputs, dynamic_shapes=use_dyn_not_str(ds), strict=False
             )
 
+    @hide_stdout()
+    def test_zero_shot_image_classification(self):
+        mid = "openai/clip-vit-base-patch16"
+        data = get_untrained_model_with_inputs(mid, verbose=1, add_second_input=True)
+        self.assertEqual(data["task"], "zero-shot-image-classification")
+        self.assertIn((data["size"], data["n_weights"]), [(188872708, 47218177)])
+        model, inputs, ds = data["model"], data["inputs"], data["dynamic_shapes"]
+        model(**inputs)
+        model(**data["inputs2"])
+        with bypass_export_some_errors(patch_transformers=True, verbose=10):
+            torch.export.export(
+                model, (), kwargs=inputs, dynamic_shapes=use_dyn_not_str(ds), strict=False
+            )
+
 
 if __name__ == "__main__":
     unittest.main(verbosity=2)
diff --git a/onnx_diagnostic/tasks/automatic_speech_recognition.py b/onnx_diagnostic/tasks/automatic_speech_recognition.py
@@ -69,7 +69,6 @@ def get_inputs(
             use_cache:bool,return_dict:bool
         )
     """
-    assert not add_second_input, "add_second_input=True not yet implemented"
     batch = torch.export.Dim("batch", min=1, max=1024)
     seq_length = "seq_length"
 
@@ -128,7 +127,24 @@ def get_inputs(
         # encoder_last_hidden_state=torch.randn(batch_size, sequence_length2, encoder_dim),
         # encoder_outputs=torch.randn(batch_size, sequence_length2, encoder_dim),
     )
-    return dict(inputs=inputs, dynamic_shapes=shapes)
+    res = dict(inputs=inputs, dynamic_shapes=shapes)
+    if add_second_input:
+        res["inputs2"] = get_inputs(
+            model=model,
+            config=config,
+            dummy_max_token_id=dummy_max_token_id,
+            max_source_positions=max_source_positions,
+            d_model=d_model,
+            num_hidden_layers=num_hidden_layers,
+            encoder_attention_heads=encoder_attention_heads,
+            encoder_layers=encoder_layers,
+            decoder_layers=decoder_layers,
+            head_dim=head_dim,
+            batch_size=batch_size + 1,
+            sequence_length=sequence_length + 1,
+            **kwargs,
+        )["inputs"]
+    return res
 
 
 def random_input_kwargs(config: Any) -> Tuple[Dict[str, Any], Callable]:
diff --git a/onnx_diagnostic/tasks/feature_extraction.py b/onnx_diagnostic/tasks/feature_extraction.py
@@ -35,7 +35,6 @@ def get_inputs(
         token_type_ids:T7s1x13[0,0:A0.0],
         attention_mask:T7s1x13[1,1:A1.0])
     """
-    assert not add_second_input, "add_second_input=True not yet implemented"
     batch = torch.export.Dim("batch", min=1, max=1024)
     seq_length = "sequence_length"
     shapes = {
@@ -48,7 +47,17 @@ def get_inputs(
         ),
         attention_mask=torch.ones((batch_size, sequence_length)).to(torch.int64),
     )
-    return dict(inputs=inputs, dynamic_shapes=shapes)
+    res = dict(inputs=inputs, dynamic_shapes=shapes)
+    if add_second_input:
+        res["inputs2"] = get_inputs(
+            model=model,
+            config=config,
+            batch_size=batch_size + 1,
+            sequence_length=sequence_length + 1,
+            dummy_max_token_id=dummy_max_token_id,
+            **kwargs,
+        )["inputs"]
+    return res
 
 
 def random_input_kwargs(config: Any) -> Tuple[Dict[str, Any], Callable]:
diff --git a/onnx_diagnostic/tasks/fill_mask.py b/onnx_diagnostic/tasks/fill_mask.py
@@ -35,7 +35,6 @@ def get_inputs(
         token_type_ids:T7s1x13[0,0:A0.0],
         attention_mask:T7s1x13[1,1:A1.0])
     """
-    assert not add_second_input, "add_second_input=True not yet implemented"
     batch = torch.export.Dim("batch", min=1, max=1024)
     seq_length = "sequence_length"
     shapes = {
@@ -50,7 +49,17 @@ def get_inputs(
         token_type_ids=torch.zeros((batch_size, sequence_length)).to(torch.int64),
         attention_mask=torch.ones((batch_size, sequence_length)).to(torch.int64),
     )
-    return dict(inputs=inputs, dynamic_shapes=shapes)
+    res = dict(inputs=inputs, dynamic_shapes=shapes)
+    if add_second_input:
+        res["inputs2"] = get_inputs(
+            model=model,
+            config=config,
+            batch_size=batch_size + 1,
+            sequence_length=sequence_length + 1,
+            dummy_max_token_id=dummy_max_token_id,
+            **kwargs,
+        )["inputs"]
+    return res
 
 
 def random_input_kwargs(config: Any) -> Tuple[Dict[str, Any], Callable]:
diff --git a/onnx_diagnostic/tasks/image_classification.py b/onnx_diagnostic/tasks/image_classification.py
@@ -41,7 +41,6 @@ def get_inputs(
     :param input_height: input height
     :return: dictionary
     """
-    assert not add_second_input, "add_second_input=True not yet implemented"
     assert isinstance(
         input_width, int
     ), f"Unexpected type for input_width {type(input_width)}{config}"
@@ -61,7 +60,19 @@ def get_inputs(
             -1, 1
         ),
     )
-    return dict(inputs=inputs, dynamic_shapes=shapes)
+    res = dict(inputs=inputs, dynamic_shapes=shapes)
+    if add_second_input:
+        res["inputs2"] = get_inputs(
+            model=model,
+            config=config,
+            input_width=input_width + 1,
+            input_height=input_height + 1,
+            input_channels=input_channels,
+            batch_size=batch_size + 1,
+            dynamic_rope=dynamic_rope,
+            **kwargs,
+        )["inputs"]
+    return res
 
 
 def random_input_kwargs(config: Any) -> Tuple[Dict[str, Any], Callable]:
diff --git a/onnx_diagnostic/tasks/image_text_to_text.py b/onnx_diagnostic/tasks/image_text_to_text.py
@@ -100,7 +100,26 @@ def get_inputs(
             torch.int64
         ),
     )
-    return dict(inputs=inputs, dynamic_shapes=shapes)
+    res = dict(inputs=inputs, dynamic_shapes=shapes)
+    if add_second_input:
+        res["inputs2"] = get_inputs(
+            model=model,
+            config=config,
+            dummy_max_token_id=dummy_max_token_id,
+            num_key_value_heads=num_key_value_heads,
+            num_hidden_layers=num_hidden_layers,
+            head_dim=head_dim,
+            width=width,
+            height=height,
+            num_channels=num_channels,
+            batch_size=batch_size + 1,
+            sequence_length=sequence_length + 1,
+            sequence_length2=sequence_length2 + 1,
+            n_images=n_images + 1,
+            dynamic_rope=dynamic_rope,
+            **kwargs,
+        )["inputs"]
+    return res
 
 
 def random_input_kwargs(config: Any) -> Tuple[Dict[str, Any], Callable]:
diff --git a/onnx_diagnostic/tasks/sentence_similarity.py b/onnx_diagnostic/tasks/sentence_similarity.py
@@ -35,7 +35,6 @@ def get_inputs(
         token_type_ids:T7s1x13[0,0:A0.0],
         attention_mask:T7s1x13[1,1:A1.0])
     """
-    assert not add_second_input, "add_second_input=True not yet implemented"
     batch = torch.export.Dim("batch", min=1, max=1024)
     seq_length = "seq_length"
     shapes = {
@@ -50,7 +49,17 @@ def get_inputs(
         token_type_ids=torch.zeros((batch_size, sequence_length)).to(torch.int64),
         attention_mask=torch.ones((batch_size, sequence_length)).to(torch.int64),
     )
-    return dict(inputs=inputs, dynamic_shapes=shapes)
+    res = dict(inputs=inputs, dynamic_shapes=shapes)
+    if add_second_input:
+        res["inputs2"] = get_inputs(
+            model=model,
+            config=config,
+            batch_size=batch_size + 1,
+            sequence_length=sequence_length + 1,
+            dummy_max_token_id=dummy_max_token_id,
+            **kwargs,
+        )["inputs"]
+    return res
 
 
 def random_input_kwargs(config: Any) -> Tuple[Dict[str, Any], Callable]:
diff --git a/onnx_diagnostic/tasks/text2text_generation.py b/onnx_diagnostic/tasks/text2text_generation.py
@@ -126,7 +126,22 @@ def get_inputs(
         # encoder_last_hidden_state=torch.randn(batch_size, sequence_length2, encoder_dim),
         # encoder_outputs=torch.randn(batch_size, sequence_length2, encoder_dim),
     )
-    return dict(inputs=inputs, dynamic_shapes=shapes)
+    res = dict(inputs=inputs, dynamic_shapes=shapes)
+    if add_second_input:
+        res["inputs2"] = get_inputs(
+            model=model,
+            config=config,
+            dummy_max_token_id=dummy_max_token_id,
+            num_key_value_heads=num_key_value_heads,
+            num_hidden_layers=num_hidden_layers,
+            head_dim=head_dim,
+            encoder_dim=encoder_dim,
+            batch_size=batch_size + 1,
+            sequence_length=sequence_length + 1,
+            sequence_length2=sequence_length2 + 1,
+            **kwargs,
+        )["inputs"]
+    return res
 
 
 def random_input_kwargs(config: Any) -> Tuple[Dict[str, Any], Callable]:
diff --git a/onnx_diagnostic/tasks/text_classification.py b/onnx_diagnostic/tasks/text_classification.py
diff --git a/onnx_diagnostic/tasks/text_generation.py b/onnx_diagnostic/tasks/text_generation.py
diff --git a/onnx_diagnostic/tasks/zero_shot_image_classification.py b/onnx_diagnostic/tasks/zero_shot_image_classification.py