Improve patches for transformers (#16)

xadupre · web-flow · commit 9e9b99602194 · 2025-03-25T14:26:26.000+01:00
* Improve patches for transformers

* myp

* urls

* fix issues

* fix

* patches
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -15,8 +15,8 @@ jobs:
     strategy:
       matrix:
         os: [ubuntu-latest]
-        python: ['3.12']
-        transformers: ['4.48', 'main']
+        python: ['3.11', '3.12']
+        transformers: ['4.48', '4.50', 'main']
 
     steps:
       - uses: actions/checkout@v3
@@ -69,6 +69,16 @@ jobs:
           export PYTHONPATH=.
           python _unittests/ut_torch_models/test_tiny_llms_onnx.py
 
+      - name: tiny-llm example
+        run: |
+          export PYTHONPATH=.
+          python _doc/examples/plot_export_tiny_llm.py
+
+      - name: tiny-llm bypass
+        run: |
+          export PYTHONPATH=.
+          python _doc/examples/plot_export_tiny_llm_patched.py
+
       - name: run tests
         run: |
           pip install pytest
diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml
@@ -63,6 +63,16 @@ jobs:
           export PYTHONPATH=.
           python _unittests/ut_torch_models/test_tiny_llms_onnx.py
 
+      - name: tiny-llm example
+        run: |
+          export PYTHONPATH=.
+          python _doc/examples/plot_export_tiny_llm.py
+
+      - name: tiny-llm bypass
+        run: |
+          export PYTHONPATH=.
+          python _doc/examples/plot_export_tiny_llm_patched.py
+
       - name: Generate coverage report
         run: |
           pip install pytest
diff --git a/CHANGELOGS.rst b/CHANGELOGS.rst
@@ -1,6 +1,11 @@
 Change Logs
 ===========
 
+0.2.1
++++++
+
+* :pr:`16`: refactors patches
+
 0.2.0
 +++++
 
diff --git a/README.rst b/README.rst
@@ -54,6 +54,8 @@ Enlightening Examples
   <https://sdpython.github.io/doc/onnx-diagnostic/dev/auto_examples/plot_export_with_dynamic_cache.html>`_
 * `Steel method forward to guess the dynamic shapes (with Tiny-LLM)
   <https://sdpython.github.io/doc/onnx-diagnostic/dev/auto_examples/plot_export_tiny_llm.html>`_
+* `Export Tiny-LLM with patches
+  <https://sdpython.github.io/doc/onnx-diagnostic/dev/auto_examples/plot_export_tiny_llm_patched.html>`_
 
 **Investigate ONNX models**
 
diff --git a/_doc/conf.py b/_doc/conf.py
@@ -113,6 +113,8 @@
     ("py:class", "pipeline.Pipeline"),
     ("py:class", "torch.fx.passes.operator_support.OperatorSupport"),
     ("py:class", "torch.fx.proxy.TracerBase"),
+    ("py:class", "torch.FloatTensor"),
+    ("py:class", "torch.LongTensor"),
     ("py:class", "torch.utils._pytree.Context"),
     ("py:class", "torch.utils._pytree.KeyEntry"),
     ("py:class", "torch.utils._pytree.TreeSpec"),
@@ -196,8 +198,8 @@
     "onnx-extended": "https://sdpython.github.io/doc/onnx-extended/dev/",
     "onnx-script": "https://github.com/microsoft/onnxscript",
     "onnxscript": "https://github.com/microsoft/onnxscript",
-    "onnxscript Tutorial": "https://onnxscript.ai/tutorial/index.html",
-    "Pattern-based Rewrite Using Rules With onnxscript": "https://onnxscript.ai/tutorial/rewriter/rewrite_patterns.html",
+    "onnxscript Tutorial": "https://microsoft.github.io/onnxscript/tutorial/index.html",
+    "Pattern-based Rewrite Using Rules With onnxscript": "https://microsoft.github.io/onnxscript/tutorial/rewriter/rewrite_patterns.html",
     "opsets": "https://onnx.ai/onnx/intro/concepts.html#what-is-an-opset-version",
     "pyinstrument": "https://pyinstrument.readthedocs.io/en/latest/",
     "psutil": "https://psutil.readthedocs.io/en/latest/",
diff --git a/_doc/examples/plot_export_tiny_llm.py b/_doc/examples/plot_export_tiny_llm.py
@@ -84,7 +84,7 @@ def _forward_(*args, _f=None, **kwargs):
 #
 # Let's create an untrained model using the config file provided
 # `config.json <https://huggingface.co/arnir0/Tiny-LLM/blob/main/config.json>`_
-# to create an untrained model: :func:`onnx_diagnostic.torch_models.llms.get_tiny_llm`.
+# to create an untrained model: :func:`....get_tiny_llm`.
 # Then let's use it.
 
 experiment = get_tiny_llm()
@@ -138,7 +138,7 @@ def _forward_(*args, _f=None, **kwargs):
 #
 # Let's use the same dummy inputs but we use the downloaded model.
 # Dummy inputs and dynamic shapes are created by function
-# :func:`onnx_diagnostic.torch_models.llms.get_tiny_llm`.
+# :func:`....get_tiny_llm`.
 
 data = get_tiny_llm()
 inputs, dynamic_shapes = data["inputs"], data["dynamic_shapes"]
@@ -163,3 +163,7 @@ def _forward_(*args, _f=None, **kwargs):
     # * https://github.com/huggingface/transformers/pull/36311
     # * https://github.com/huggingface/transformers/pull/36652
     print("It failed:", e)
+
+# %%
+# If you have any error, then look at example
+# :ref:`l-plot-tiny-llm-export-patched`.
diff --git a/_doc/examples/plot_export_tiny_llm_patched.py b/_doc/examples/plot_export_tiny_llm_patched.py
@@ -0,0 +1,110 @@
+"""
+.. _l-plot-tiny-llm-export-patched:
+
+Export Tiny-LLM with patches
+============================
+
+Many models from :epkg:`transformers` cannot be converted because
+the implementation uses cache classes. Let's see how to get around that.
+We focus on the model
+`Tiny-LLM <https://huggingface.co/arnir0/Tiny-LLM>`_.
+To avoid downloading any weights, we write a function creating a
+random model based on the same architecture.
+This continues example :ref:`l-plot-tiny-llm-export`.
+
+Errors
+++++++
+
+They depend on transformers version.
+
+``transformers>=4.40,<4.50`` cannot serialize DynamicCache and cannot
+map dynamic shapes to instances of DynamicCache. The following errors
+would appear:
+
+::
+
+  torch._dynamo.exc.UserError: Cannot associate shape
+      [[{0: <class '....batch'>, 2: <class '....cache_length'>}],
+       [{0: <class '....batch'>, 2: <class '....cache_length'>}]]
+      specified at `dynamic_shapes['past_key_values']`
+      to non-tensor type <class 'transformers.cache_utils.DynamicCache'>
+      at `inputs['past_key_values']` (expected None)
+  For more information about this error, see: https://pytorch.org/docs/main/generated/exportdb/index.html#dynamic-shapes-validation
+
+With ``transformers==4.50``, it shows the following:
+
+::
+
+  torch._dynamo.exc.UserError: Constraints violated (batch)!
+  For more information, run with TORCH_LOGS="+dynamic".
+      - Not all values of batch = L['args'][1]['input_ids'].size()[0]
+          in the specified range batch <= 1024 are valid
+          because batch was inferred to be a constant (2).
+      - Not all values of batch = L['args'][1]['attention_mask'].size()[0]
+          in the specified range batch <= 1024 are valid
+          because batch was inferred to be a constant (2).
+      - Not all values of batch = L['args'][1]['past_key_values']['key_cache'][0].size()[0]
+          in the specified range batch <= 1024 are valid
+          because batch was inferred to be a constant (2).
+      - Not all values of batch = L['args'][1]['past_key_values']['value_cache'][0].size()[0]
+          in the specified range batch <= 1024 are valid
+          because batch was inferred to be a constant (2).
+   Suggested fixes:
+       batch = 2
+
+However, this package implements a patch mechanism
+with replaces the part causing these issues.
+
+.. note:: restart after an export failure
+
+    If the export fails, it is better to start executing again,
+    or restart the kernel if you are in the notebook.
+    The export may leave :epkg:`torch` in one unstable state.
+"""
+
+import copy
+import torch
+import transformers
+from onnx_diagnostic.torch_export_patches.onnx_export_errors import bypass_export_some_errors
+from onnx_diagnostic.torch_models.llms import get_tiny_llm
+
+
+experiment = get_tiny_llm()
+untrained_model, inputs, dynamic_shapes = (
+    experiment["model"],
+    experiment["inputs"],
+    experiment["dynamic_shapes"],
+)
+
+cloned_inputs = copy.deepcopy(inputs)
+
+
+with bypass_export_some_errors(patch_transformers=True) as modificator:
+    ep = torch.export.export(
+        untrained_model,
+        (),
+        kwargs=modificator(cloned_inputs),
+        dynamic_shapes=dynamic_shapes,
+    )
+    print("It worked:")
+    print(ep)
+
+# %%
+# With the original model
+# +++++++++++++++++++++++
+
+MODEL_NAME = "arnir0/Tiny-LLM"
+tokenizer = transformers.AutoTokenizer.from_pretrained(MODEL_NAME)
+model = transformers.AutoModelForCausalLM.from_pretrained(MODEL_NAME)
+
+cloned_inputs = copy.deepcopy(inputs)
+
+with bypass_export_some_errors(patch_transformers=True) as modificator:
+    ep = torch.export.export(
+        model,
+        (),
+        kwargs=modificator(cloned_inputs),
+        dynamic_shapes=dynamic_shapes,
+    )
+    print("It worked:")
+    print(ep)
diff --git a/_doc/examples/plot_export_with_dynamic_cache.py b/_doc/examples/plot_export_with_dynamic_cache.py
@@ -210,16 +210,13 @@ def forward(self, cache, z):
 # The export is simple if ``transformers>=4.50``, otherwise,
 # transformers needs to be patched.
 # :func:`onnx_diagnostic.torch_export_patches.bypass_export_some_errors`
-# registers functions to serialize ``DynamicCache`` and another class
-# called ``patched_DynamicCache``. This one is modified to make
+# registers functions to serialize ``DynamicCache``. This one is modified to make
 # the shape inference implemented in :epkg:`torch` happy.
 
 if has_transformers("4.50"):
     ep = torch.export.export(model, inputs[0], dynamic_shapes=ds[0], strict=False)
 else:
-    with bypass_export_some_errors(
-        patch_transformers=True, replace_dynamic_cache=True
-    ) as modificator:
+    with bypass_export_some_errors(patch_transformers=True) as modificator:
         ep = torch.export.export(
             model, modificator(inputs[0]), dynamic_shapes=ds[0], strict=False
         )
diff --git a/_doc/index.rst b/_doc/index.rst
@@ -62,6 +62,7 @@ Enlightening Examples
 * :ref:`l-plot-sxport-with-dynamio-shapes-auto`
 * :ref:`l-plot-export-with-dynamic-shape`
 * :ref:`l-plot-tiny-llm-export`
+* :ref:`l-plot-tiny-llm-export-patched`
 
 **Investigate ONNX models**
 
diff --git a/_unittests/ut_torch_export_patches/test_onnx_export_errors.py b/_unittests/ut_torch_export_patches/test_onnx_export_errors.py
@@ -79,7 +79,7 @@ def forward(self, x: torch.Tensor, cache: MambaCache):
         model = Model()
         model(x, cache)
 
-        with bypass_export_some_errors(replace_dynamic_cache=True, verbose=1):
+        with bypass_export_some_errors(verbose=1):
             cache = MambaCache(_config(), max_batch_size=1, device="cpu")
             torch.export.export(Model(), (x, cache))
 
diff --git a/_unittests/ut_torch_export_patches/test_patch_base_class.py b/_unittests/ut_torch_export_patches/test_patch_base_class.py
@@ -0,0 +1,57 @@
+import unittest
+from onnx_diagnostic.ext_test_case import ExtTestCase
+
+
+class TestPatchBaseClass(ExtTestCase):
+    def test_check_that_trick_can_work_in_python(self):
+        class zero:
+            def ret(self, a):
+                return a - 100
+
+            def ok(self):
+                return self.ret(3)
+
+        class A(zero):
+            def ret(self, a):
+                return a + 1
+
+        class B:
+            def ret(self, a):
+                return a + 10
+
+        z = zero()
+        self.assertEqual(z.ret(4), -96)
+        self.assertEqual(z.ok(), -97)
+        a = A()
+        self.assertEqual(a.ret(4), 5)
+        self.assertEqual(a.ok(), 4)
+        b = B()
+        self.assertEqual(b.ret(4), 14)
+        self.assertFalse(hasattr(b, "ok"))
+        self.assertFalse(hasattr(B, "ok"))
+
+        self.assertEqual(A.__bases__, (zero,))
+        A.__bases__ = (zero, B)
+        self.assertEqual(a.ret(4), 5)
+        self.assertEqual(a.ok(), 4)
+        aa = A()
+        self.assertEqual(aa.ret(4), 5)
+        self.assertEqual(aa.ok(), 4)
+
+        A.__bases__ = (B, zero)
+        self.assertEqual(a.ret(4), 5)
+        self.assertEqual(a.ok(), 4)
+        aa = A()
+        self.assertEqual(aa.ret(4), 5)
+        self.assertEqual(aa.ok(), 4)
+
+        A.__bases__ = (zero,)
+        A.ret = B.ret
+        self.assertEqual(aa.ret(4), 14)
+        self.assertEqual(aa.ok(), 13)
+        self.assertEqual(a.ret(4), 14)
+        self.assertEqual(a.ok(), 13)
+
+
+if __name__ == "__main__":
+    unittest.main(verbosity=2)
diff --git a/_unittests/ut_torch_models/test_tiny_llms.py b/_unittests/ut_torch_models/test_tiny_llms.py
@@ -29,9 +29,7 @@ def test_export_tiny_llm_2_bypassed(self):
         data = get_tiny_llm()
         model, inputs = data["model"], data["inputs"]
         self.assertEqual({"attention_mask", "past_key_values", "input_ids"}, set(inputs))
-        with bypass_export_some_errors(
-            patch_transformers=True, replace_dynamic_cache=True
-        ) as modificator:
+        with bypass_export_some_errors(patch_transformers=True) as modificator:
             inputs = modificator(inputs)
             ep = torch.export.export(
                 model, (), kwargs=inputs, dynamic_shapes=data["dynamic_shapes"]
diff --git a/_unittests/ut_torch_models/test_tiny_llms_onnx.py b/_unittests/ut_torch_models/test_tiny_llms_onnx.py
@@ -57,9 +57,7 @@ def test_bypass_onnx_export_tiny_llm_official(self):
         data = get_tiny_llm()
         model, inputs = data["model"], data["inputs"]
         self.assertEqual({"attention_mask", "past_key_values", "input_ids"}, set(inputs))
-        with bypass_export_some_errors(
-            patch_transformers=True, replace_dynamic_cache=True, verbose=1
-        ) as modificator:
+        with bypass_export_some_errors(patch_transformers=True, verbose=1) as modificator:
             new_inputs = modificator(inputs)
             ep = torch.onnx.export(
                 model,
@@ -80,9 +78,7 @@ def test_bypass_onnx_export_tiny_llm_xdbg(self):
         data = get_tiny_llm()
         model, inputs = data["model"], data["inputs"]
         self.assertEqual({"attention_mask", "past_key_values", "input_ids"}, set(inputs))
-        with bypass_export_some_errors(
-            patch_transformers=True, replace_dynamic_cache=True, verbose=1
-        ) as modificator:
+        with bypass_export_some_errors(patch_transformers=True, verbose=1) as modificator:
             new_inputs = modificator(inputs)
             onx = to_onnx(
                 model, (), kwargs=new_inputs, dynamic_shapes=data["dynamic_shapes"], verbose=1
diff --git a/onnx_diagnostic/export/dynamic_shapes.py b/onnx_diagnostic/export/dynamic_shapes.py
@@ -311,7 +311,7 @@ def guess_dynamic_shape_object(self, *objs: Any, msg: Optional[Callable] = None)
                 shapes[i] = self.guess_dynamic_shape_object(*[o[i] for o in objs])
             return shapes
 
-        if obj.__class__.__name__ in ("DynamicCache", "patched_DynamicCache"):
+        if obj.__class__.__name__ == "DynamicCache":
             kc = set(len(o.key_cache) for o in objs)
             assert (
                 len(kc) == 1
diff --git a/onnx_diagnostic/helpers.py b/onnx_diagnostic/helpers.py
@@ -408,7 +408,7 @@ def string_type(
     if type(obj).__name__ == "ValueInfoProto":
         return f"OT{obj.type.tensor_type.elem_type}"
 
-    if obj.__class__.__name__ in ("DynamicCache", "patched_DynamicCache"):
+    if obj.__class__.__name__ == "DynamicCache":
         kc = string_type(
             obj.key_cache,
             with_shape=with_shape,
@@ -1693,8 +1693,8 @@ def max_diff(
             flatten=flatten,
         )
 
-    if expected.__class__.__name__ in ("DynamicCache", "patched_DynamicCache"):
-        if got.__class__.__name__ in ("DynamicCache", "patched_DynamicCache"):
+    if expected.__class__.__name__ == "DynamicCache":
+        if got.__class__.__name__ == "DynamicCache":
             if verbose >= 6:
                 print(f"[max_diff] DynamicCache: {string_type(expected)} ? {string_type(got)}")
             return max_diff(
diff --git a/onnx_diagnostic/torch_export_patches/onnx_export_errors.py b/onnx_diagnostic/torch_export_patches/onnx_export_errors.py
diff --git a/onnx_diagnostic/torch_export_patches/onnx_export_serialization.py b/onnx_diagnostic/torch_export_patches/onnx_export_serialization.py
diff --git a/onnx_diagnostic/torch_export_patches/patches/patch_transformers.py b/onnx_diagnostic/torch_export_patches/patches/patch_transformers.py
diff --git a/onnx_diagnostic/torch_test_helper.py b/onnx_diagnostic/torch_test_helper.py
diff --git a/pyproject.toml b/pyproject.toml