Reduce dependency on transformers (#259)

xadupre · web-flow · commit 209be4b82fef · 2025-10-14T10:15:12.000+02:00
* Reduce dependency on transformers

* disable patch on idefics

* fix example

* spell

* disable patch
diff --git a/_doc/examples/plot_export_hub_codellama.py b/_doc/examples/plot_export_hub_codellama.py
@@ -98,7 +98,7 @@
 # It still requires patches to be exportable (control flow).
 # See :func:`onnx_diagnostic.torch_export_patches.torch_export_patches`
 
-with torch_export_patches(patch_transformers=True) as f:
+with torch_export_patches(patch_torch=False, patch_transformers=True) as f:
     ep = torch.export.export(
         model,
         (),
diff --git a/_unittests/ut_tasks/test_tasks_image_text_to_text.py b/_unittests/ut_tasks/test_tasks_image_text_to_text.py
@@ -22,9 +22,8 @@ def test_image_text_to_text_idefics(self):
         self.assertEqual(data["task"], "image-text-to-text")
         model, inputs, ds = data["model"], data["inputs"], data["dynamic_shapes"]
         model(**torch_deepcopy(inputs))
-        print("***", self.string_type(data["inputs2"], with_shape=True))
         model(**data["inputs2"])
-        with torch_export_patches(patch_transformers=True, verbose=10):
+        with torch_export_patches(patch_transformers=True, verbose=10, patch_torch=False):
             torch.export.export(
                 model, (), kwargs=inputs, dynamic_shapes=use_dyn_not_str(ds), strict=False
             )
diff --git a/_unittests/ut_tasks/test_tasks_mask_generation.py b/_unittests/ut_tasks/test_tasks_mask_generation.py
@@ -23,7 +23,7 @@ def test_mask_generation(self):
         model, inputs, ds = data["model"], data["inputs"], data["dynamic_shapes"]
         model(**torch_deepcopy(inputs))
         model(**data["inputs2"])
-        with torch_export_patches(patch_transformers=True, verbose=1):
+        with torch_export_patches(patch_torch=False, patch_transformers=True, verbose=1):
             torch.export.export(
                 model, (), kwargs=inputs, dynamic_shapes=use_dyn_not_str(ds), strict=False
             )
diff --git a/onnx_diagnostic/helpers/cache_helper.py b/onnx_diagnostic/helpers/cache_helper.py
@@ -108,7 +108,7 @@ def flatten_unflatten_for_dynamic_shapes(
 
 def is_cache_dynamic_registered(fast: bool = False) -> bool:
     """
-    Tells class :class:`transformers.cache_utils.DynamicCache` can be
+    Tells if class :class:`transformers.cache_utils.DynamicCache` can be
     serialized and deserialized. Only then, :func:`torch.export.export`
     can export a model.
 
diff --git a/onnx_diagnostic/helpers/rt_helper.py b/onnx_diagnostic/helpers/rt_helper.py
@@ -3,8 +3,6 @@
 import onnx
 import torch
 from .helper import string_type, flatten_object
-from .torch_helper import to_numpy
-from .cache_helper import is_cache_dynamic_registered
 
 
 def name_type_to_onnx_dtype(name: str) -> int:
@@ -49,14 +47,16 @@ def make_feeds(
     assert (
         not check_flatten
         or not all(isinstance(obj, torch.Tensor) for obj in flat)
-        or not is_cache_dynamic_registered(fast=True)
+        # or not is_cache_dynamic_registered(fast=True)
         or len(flat) == len(torch.utils._pytree.tree_flatten(inputs)[0])
     ), (
         f"Unexpected number of flattened objects, "
         f"{string_type(flat, with_shape=True)} != "
         f"{string_type(torch.utils._pytree.tree_flatten(inputs)[0], with_shape=True)}"
     )
     if use_numpy:
+        from .torch_helper import to_numpy
+
         flat = [to_numpy(t) if isinstance(t, torch.Tensor) else t for t in flat]
     names = (
         [i.name for i in proto.graph.input]
diff --git a/onnx_diagnostic/torch_export_patches/patches/patch_transformers.py b/onnx_diagnostic/torch_export_patches/patches/patch_transformers.py
@@ -1380,7 +1380,8 @@ def forward(self, x, seq_len=None):
 
         def _set_cos_sin_cache_then(x, inv_freq, seq_len, _cos_cached, _sin_cached):
             t = torch.arange(seq_len, device=x.device, dtype=torch.int64).type_as(inv_freq)
-            freqs = torch.einsum("i,j->ij", t, inv_freq)
+            # freqs = torch.einsum("i,j->ij", t, inv_freq)
+            freqs = t.reshape((-1, 1)) * inv_freq.reshape((1, -1))
             emb = torch.cat((freqs, freqs), dim=-1)
             return emb.cos().to(x.dtype), emb.sin().to(x.dtype)
 

Original file line number	Diff line number	Diff line change
`@@ -23,7 +23,7 @@ def test_mask_generation(self):`
`23`	`23`	`model, inputs, ds = data["model"], data["inputs"], data["dynamic_shapes"]`
`24`	`24`	`model(**torch_deepcopy(inputs))`
`25`	`25`	`model(**data["inputs2"])`
`26`		`- with torch_export_patches(patch_transformers=True, verbose=1):`
	`26`	`+ with torch_export_patches(patch_torch=False, patch_transformers=True, verbose=1):`
`27`	`27`	`torch.export.export(`
`28`	`28`	`model, (), kwargs=inputs, dynamic_shapes=use_dyn_not_str(ds), strict=False`
`29`	`29`	`)`