enables export with fake tensors

xadupre · xadupre · commit db6bb79bbb52 · 2025-10-24T12:52:24.000+02:00
diff --git a/_unittests/ut_tasks/test_tasks_text_generation.py b/_unittests/ut_tasks/test_tasks_text_generation.py
@@ -10,13 +10,14 @@
 from onnx_diagnostic.torch_models.hghub.model_inputs import get_untrained_model_with_inputs
 from onnx_diagnostic.torch_export_patches import torch_export_patches
 from onnx_diagnostic.torch_export_patches.patch_inputs import use_dyn_not_str
+from onnx_diagnostic.export.shape_helper import make_fake_with_dynamic_dimensions
 
 
 class TestTasksTextGeneration(ExtTestCase):
     @hide_stdout()
     @requires_transformers("4.53")
     @requires_torch("2.7.99")
-    def test_tet_generation_gemma3_for_causallm(self):
+    def test_text_generation_gemma3_for_causallm(self):
         mid = "hf-internal-testing/tiny-random-Gemma3ForCausalLM"
         data = get_untrained_model_with_inputs(mid, verbose=1, add_second_input=True)
         self.assertEqual(data["task"], "text-generation")
@@ -31,20 +32,38 @@ def test_tet_generation_gemma3_for_causallm(self):
     @hide_stdout()
     @requires_transformers("4.53")
     @requires_torch("2.7.99")
-    def test_itext_generation_phi_3_mini_128k_instruct(self):
+    def test_text_generation_phi_3_mini_128k_instruct(self):
         mid = "microsoft/Phi-3-mini-128k-instruct"
         data = get_untrained_model_with_inputs(mid, verbose=1, add_second_input=True)
         self.assertEqual(data["task"], "text-generation")
         model, inputs, ds = data["model"], data["inputs"], data["dynamic_shapes"]
-        print("--", self.string_type(inputs, with_shape=True))
-        print("--", self.string_type(ds))
         model(**torch_deepcopy(inputs))
         model(**data["inputs2"])
         with torch_export_patches(patch_transformers=True, verbose=10, patch_torch=False):
             torch.export.export(
                 model, (), kwargs=inputs, dynamic_shapes=use_dyn_not_str(ds), strict=False
             )
 
+    @hide_stdout()
+    @requires_transformers("4.53")
+    @requires_torch("2.7.99")
+    def test_text_generation_tiny_llm(self):
+        mid = "arnir0/Tiny-LLM"
+        data = get_untrained_model_with_inputs(mid, verbose=1, add_second_input=True)
+        self.assertEqual(data["task"], "text-generation")
+        model, inputs, ds = data["model"], data["inputs"], data["dynamic_shapes"]
+        expected = model(**torch_deepcopy(inputs))
+        model(**data["inputs2"])
+        fake = make_fake_with_dynamic_dimensions(inputs, dynamic_shapes=ds)[0]
+        with torch_export_patches(patch_transformers=True, verbose=10, patch_torch=False):
+            ep = torch.export.export(
+                model, (), kwargs=fake, dynamic_shapes=use_dyn_not_str(ds), strict=False
+            )
+            # print(ep)
+        got = ep.module()(**inputs)
+        self.assertEqualAny(expected.past_key_values, got.past_key_values)
+        self.assertEqualArray(expected.logits, got.logits)
+
 
 if __name__ == "__main__":
     unittest.main(verbosity=2)
diff --git a/onnx_diagnostic/helpers/cache_helper.py b/onnx_diagnostic/helpers/cache_helper.py
@@ -169,6 +169,21 @@ def make_dynamic_cache(
             )
             print(string_type(past_key_values, with_shape=True))
         """
+        if key_value_pairs and isinstance(
+            key_value_pairs[0][0], torch._subclasses.fake_tensor.FakeTensor
+        ):
+            cache = transformers.cache_utils.DynamicCache()
+            cache.layers.extend(
+                [transformers.cache_utils.DynamicLayer() for _ in key_value_pairs]
+            )
+            for i, layer in enumerate(cache.layers):
+                k, v = key_value_pairs[i][0], key_value_pairs[i][1]
+                layer.dtype = k.dtype
+                layer.device = k.device
+                layer.keys = k
+                layer.values = v
+            return finalize_cache(cache)
+
         cache = transformers.cache_utils.DynamicCache(key_value_pairs)
         if hasattr(cache, "layers") and len(key_value_pairs) < len(cache.layers):
             # The cache constructor contains the two following lines
diff --git a/onnx_diagnostic/helpers/helper.py b/onnx_diagnostic/helpers/helper.py
@@ -463,6 +463,7 @@ def string_type(
         if verbose:
             print(f"[string_type] F2:{type(obj)}")
         return f"{prefix}F{i}s{'x'.join(map(str, obj.shape))}"
+
     if isinstance(obj, torch.Tensor):
         from .torch_helper import torch_dtype_to_onnx_dtype
 
@@ -783,6 +784,8 @@ def string_type(
             obj, ultralytics.engine.results.Results
         ), f"Unexpected type={type(obj)}"
         return f"ultralytics.{obj.__class__.__name__}(...)"
+    if obj.__class__.__name__ == "FakeTensorMode":
+        return f"{obj}"
 
     if verbose:
         print(f"[string_type] END:{type(obj)}")