Add a set of inputs with batch size 1

xadupre · xadupre · commit fc28812f7327 · 2025-10-06T18:55:07.000+02:00
diff --git a/_unittests/ut_tasks/test_tasks.py b/_unittests/ut_tasks/test_tasks.py
@@ -48,6 +48,7 @@ def test_text_generation(self):
                 model, (), kwargs=inputs, dynamic_shapes=use_dyn_not_str(ds), strict=False
             )
 
+    @hide_stdout()
     def test_text_generation_empty_cache(self):
         mid = "arnir0/Tiny-LLM"
         data = get_untrained_model_with_inputs(mid, add_second_input=True)
@@ -69,6 +70,28 @@ def test_text_generation_empty_cache(self):
             got = ep.module()(**torch_deepcopy(inputs))
             self.assertEqualArrayAny(expected, got)
 
+    @hide_stdout()
+    def test_text_generation_batch1(self):
+        mid = "arnir0/Tiny-LLM"
+        data = get_untrained_model_with_inputs(mid, add_second_input=True)
+        model, inputs = data["model"], data["inputs"]
+        self.assertIn("inputs_batch1", data)
+        empty_inputs = torch_deepcopy(data["inputs_batch1"])
+        model(**torch_deepcopy(empty_inputs))
+        expected = model(**torch_deepcopy(inputs))
+        self.assertEqual(
+            {"attention_mask", "past_key_values", "input_ids", "position_ids"}, set(inputs)
+        )
+        with torch_export_patches(patch_transformers=True, verbose=1):
+            ep = torch.export.export(
+                model,
+                (),
+                kwargs=torch_deepcopy(inputs),
+                dynamic_shapes=use_dyn_not_str(data["dynamic_shapes"]),
+            )
+            got = ep.module()(**torch_deepcopy(inputs))
+            self.assertEqualArrayAny(expected, got)
+
     @hide_stdout()
     def test_automatic_speech_recognition_float32(self):
         mid = "openai/whisper-tiny"
diff --git a/onnx_diagnostic/tasks/text_generation.py b/onnx_diagnostic/tasks/text_generation.py
@@ -284,6 +284,21 @@ def get_inputs(
             add_second_input=0,
             **kwargs,
         )["inputs"]
+        res["inputs_batch1"] = get_inputs(
+            model=model,
+            config=config,
+            dummy_max_token_id=dummy_max_token_id,
+            num_hidden_layers=num_hidden_layers,
+            batch_size=1,
+            sequence_length=sequence_length,
+            sequence_length2=sequence_length2,
+            dynamic_rope=dynamic_rope,
+            num_key_value_heads=num_key_value_heads,
+            head_dim=head_dim,
+            cls_cache=cls_cache,
+            add_second_input=0,
+            **kwargs,
+        )["inputs"]
     return res