fix a fzw things

xadupre · xadupre · commit 659896b6aac1 · 2025-03-21T15:58:06.000+01:00
diff --git a/_doc/examples/plot_export_tiny_llm.py b/_doc/examples/plot_export_tiny_llm.py
@@ -15,11 +15,11 @@
 We use the dummy example from the model page.
 """
 
-from typing import Any, Dict
+import copy
 import torch
 import transformers
 from onnx_diagnostic.helpers import string_type
-from onnx_diagnostic.cache_helpers import make_dynamic_cache
+from onnx_diagnostic.torch_models.llms import get_tiny_llm
 
 
 MODEL_NAME = "arnir0/Tiny-LLM"
@@ -30,21 +30,6 @@
 # We rewrite the forward method to print the cache dimension.
 
 
-def string_inputs(args, kwargs):
-    def _cache(a):
-        if len(a.key_cache):
-            return f"n_caches={len(a.key_cache)}, shape={a.key_cache[0].shape}"
-        return f"n_caches={len(a.key_cache)}"
-
-    for a in args:
-        if isinstance(a, transformers.cache_utils.DynamicCache):
-            return _cache(a)
-    for k, a in kwargs.items():
-        if isinstance(a, transformers.cache_utils.DynamicCache):
-            return f"{k}={_cache(a)}"
-    return "no_cache"
-
-
 def _forward_(*args, _f=None, **kwargs):
     assert _f is not None
     if not torch.compiler.is_exporting():
@@ -83,100 +68,6 @@ def _forward_(*args, _f=None, **kwargs):
 # Let's create an untrained model.
 
 
-def get_tiny_llm(
-    batch_size: int = 2,
-    input_cache: bool = True,
-    common_dynamic_shapes: bool = True,
-    dynamic_rope: bool = False,
-    **kwargs,
-) -> Dict[str, Any]:
-    """
-    Gets a non initialized model.
-
-    :param batch_size: batch size
-    :param input_cache: generate data for this iteration with or without cache
-    :param kwargs: to overwrite the configuration, example ``num_hidden_layers=1``
-    :param common_dynamic_shapes: if True returns dynamic shapes as well
-    :param dynamic_rope: use dynamic rope (see :class:`transformers.LlamaConfig`)
-    :return: dictionary
-    """
-    import transformers
-
-    config = {
-        "architectures": ["LlamaForCausalLM"],
-        "bos_token_id": 1,
-        "eos_token_id": 2,
-        "hidden_act": "silu",
-        "hidden_size": 192,
-        "initializer_range": 0.02,
-        "intermediate_size": 1024,
-        "max_position_embeddings": 1024,
-        "model_type": "llama",
-        "num_attention_heads": 2,
-        "num_hidden_layers": 1,
-        "num_key_value_heads": 1,
-        "pretraining_tp": 1,
-        "rms_norm_eps": 1e-05,
-        "rope_scaling": {"rope_type": "dynamic", "factor": 10.0} if dynamic_rope else None,
-        "tie_word_embeddings": False,
-        "torch_dtype": "float32",
-        "transformers_version": "4.31.0.dev0",
-        "use_cache": True,
-        "vocab_size": 32000,
-    }
-
-    config.update(**kwargs)
-    conf = transformers.LlamaConfig(**config)
-    model = transformers.LlamaForCausalLM(conf)
-    model.eval()
-
-    # now the inputs
-    cache_last_dim = 96
-    sequence_length = 30
-    sequence_length2 = 3
-    num_key_value_heads = 1
-    max_token_id = config["vocab_size"] - 1
-    n_layers = config["num_hidden_layers"]
-
-    batch = torch.export.Dim("batch", min=1, max=1024)
-    seq_length = torch.export.Dim("seq_length", min=1, max=4096)
-    cache_length = torch.export.Dim("cache_length", min=1, max=4096)
-
-    shapes = {
-        "input_ids": {0: batch, 1: seq_length},
-        "attention_mask": {
-            0: batch,
-            1: torch.export.Dim.DYNAMIC,  # cache_length + seq_length
-        },
-        "past_key_values": [
-            [{0: batch, 2: cache_length} for _ in range(n_layers)],
-            [{0: batch, 2: cache_length} for _ in range(n_layers)],
-        ],
-    }
-    inputs = dict(
-        input_ids=torch.randint(0, max_token_id, (batch_size, sequence_length2)).to(
-            torch.int64
-        ),
-        attention_mask=torch.ones((batch_size, sequence_length + sequence_length2)).to(
-            torch.int64
-        ),
-        past_key_values=make_dynamic_cache(
-            [
-                (
-                    torch.randn(
-                        batch_size, num_key_value_heads, sequence_length, cache_last_dim
-                    ),
-                    torch.randn(
-                        batch_size, num_key_value_heads, sequence_length, cache_last_dim
-                    ),
-                )
-                for i in range(n_layers)
-            ]
-        ),
-    )
-    return dict(inputs=inputs, model=model, dynamic_shapes=shapes)
-
-
 # %%
 # Let's get the model, inputs and dynamic shapes.
 
@@ -187,9 +78,25 @@ def get_tiny_llm(
     experiment["dynamic_shapes"],
 )
 
+# %%
+# Before we run it, we make a copy of the inputs as the cache
+# get modified by the execution. Then it is no longer valid
+# associated with the previous input_ids and mask.
+cloned_inputs = copy.deepcopy(inputs)
+
+
 # %% Let's run it.
-expected_output = model(**inputs)
-print("result type", type(expected_output))
+print("input type", string_type(inputs, with_shape=True))
+
+expected_output = untrained_model(**inputs)
+
+
+print("input after the execution", string_type(inputs, with_shape=True))
+print("result type", string_type(expected_output, with_shape=True))
+
+ep = torch.export.export(
+    untrained_model, (), kwargs=cloned_inputs, dynamic_shapes=dynamic_shapes
+)
 
 # %%
 # It works.
@@ -199,7 +106,7 @@ def get_tiny_llm(
 
 try:
     ep = torch.export.export(
-        untrained_model, (), inputs, dynamic_shapes=dynamic_shapes, strict=False
+        untrained_model, (), kwargs=cloned_inputs, dynamic_shapes=dynamic_shapes
     )
     print("It worked:")
     print(ep)
@@ -217,7 +124,7 @@ def get_tiny_llm(
 # Let's use the same dummy inputs but we use the downloaded model.
 
 try:
-    ep = torch.export.export(model, (), inputs, dynamic_shapes=dynamic_shapes, strict=False)
+    ep = torch.export.export(model, (), kwargs=cloned_inputs, dynamic_shapes=dynamic_shapes)
     print("It worked:")
     print(ep)
 except Exception as e:
diff --git a/_doc/examples/plot_export_with_dynamic_shapes_auto.py b/_doc/examples/plot_export_with_dynamic_shapes_auto.py
diff --git a/_unittests/ut_torch_export_patches/test_onnx_export_errors.py b/_unittests/ut_torch_export_patches/test_onnx_export_errors.py
@@ -1,5 +1,5 @@
 import unittest
-from experimental_experiment.ext_test_case import (
+from onnx_diagnostic.ext_test_case import (
     ExtTestCase,
     requires_torch,
     requires_transformers,
diff --git a/_unittests/ut_torch_models/test_llms.py b/_unittests/ut_torch_models/test_llms.py
@@ -0,0 +1,38 @@
+import unittest
+import torch
+from onnx_diagnostic.ext_test_case import ExtTestCase, ignore_warnings
+from onnx_diagnostic.torch_models.llms import get_tiny_llm
+from onnx_diagnostic.helpers import string_type
+from onnx_diagnostic.torch_export_patches import bypass_export_some_errors
+
+
+class TestLlms(ExtTestCase):
+    def test_get_tiny_llm(self):
+        data = get_tiny_llm()
+        model, inputs = data["model"], data["inputs"]
+        self.assertIn("DynamicCache", string_type(inputs))
+        model(**inputs)
+
+    @ignore_warnings(UserWarning)
+    def test_export_tiny_llm_1(self):
+        data = get_tiny_llm()
+        model, inputs = data["model"], data["inputs"]
+        ep = torch.export.export(
+            model, (), kwargs=inputs, dynamic_shapes=data["dynamic_shapes"]
+        )
+        assert ep
+        print(ep)
+
+    @ignore_warnings(UserWarning)
+    def test_export_tiny_llm_2_bypassed(self):
+        data = get_tiny_llm()
+        model, inputs = data["model"], data["inputs"]
+        with bypass_export_some_errors():
+            ep = torch.export.export(
+                model, (), kwargs=inputs, dynamic_shapes=data["dynamic_shapes"]
+            )
+            assert ep
+
+
+if __name__ == "__main__":
+    unittest.main(verbosity=2)
diff --git a/onnx_diagnostic/torch_models/__init__.py b/onnx_diagnostic/torch_models/__init__.py
diff --git a/onnx_diagnostic/torch_models/llms.py b/onnx_diagnostic/torch_models/llms.py
@@ -0,0 +1,94 @@
+from typing import Any, Dict
+import torch
+import transformers
+from ..cache_helpers import make_dynamic_cache
+
+
+def get_tiny_llm(
+    batch_size: int = 2,
+    input_cache: bool = True,
+    dynamic_rope: bool = False,
+    **kwargs,
+) -> Dict[str, Any]:
+    """
+    Gets a non initialized model.
+
+    :param batch_size: batch size
+    :param input_cache: generate data for this iteration with or without cache
+    :param dynamic_rope: use dynamic rope (see :class:`transformers.LlamaConfig`)
+    :param kwargs: to overwrite the configuration, example ``num_hidden_layers=1``
+    :return: dictionary
+    """
+    config = {
+        "architectures": ["LlamaForCausalLM"],
+        "bos_token_id": 1,
+        "eos_token_id": 2,
+        "hidden_act": "silu",
+        "hidden_size": 192,
+        "initializer_range": 0.02,
+        "intermediate_size": 1024,
+        "max_position_embeddings": 1024,
+        "model_type": "llama",
+        "num_attention_heads": 2,
+        "num_hidden_layers": 1,
+        "num_key_value_heads": 1,
+        "pretraining_tp": 1,
+        "rms_norm_eps": 1e-05,
+        "rope_scaling": {"rope_type": "dynamic", "factor": 10.0} if dynamic_rope else None,
+        "tie_word_embeddings": False,
+        "torch_dtype": "float32",
+        "transformers_version": "4.31.0.dev0",
+        "use_cache": True,
+        "vocab_size": 32000,
+    }
+
+    config.update(**kwargs)
+    conf = transformers.LlamaConfig(**config)
+    model = transformers.LlamaForCausalLM(conf)
+    model.eval()
+
+    # now the inputs
+    cache_last_dim = 96
+    sequence_length = 30
+    sequence_length2 = 3
+    num_key_value_heads = 1
+    max_token_id = config["vocab_size"] - 1
+    n_layers = config["num_hidden_layers"]
+
+    batch = torch.export.Dim("batch", min=1, max=1024)
+    seq_length = torch.export.Dim("seq_length", min=1, max=4096)
+    cache_length = torch.export.Dim("cache_length", min=1, max=4096)
+
+    shapes = {
+        "input_ids": {0: batch, 1: seq_length},
+        "attention_mask": {
+            0: batch,
+            1: torch.export.Dim.DYNAMIC,  # cache_length + seq_length
+        },
+        "past_key_values": [
+            [{0: batch, 2: cache_length} for _ in range(n_layers)],
+            [{0: batch, 2: cache_length} for _ in range(n_layers)],
+        ],
+    }
+    inputs = dict(
+        input_ids=torch.randint(0, max_token_id, (batch_size, sequence_length2)).to(
+            torch.int64
+        ),
+        attention_mask=torch.ones((batch_size, sequence_length + sequence_length2)).to(
+            torch.int64
+        ),
+        past_key_values=make_dynamic_cache(
+            [
+                (
+                    torch.randn(
+                        batch_size, num_key_value_heads, sequence_length, cache_last_dim
+                    ),
+                    torch.randn(
+                        batch_size, num_key_value_heads, sequence_length, cache_last_dim
+                    ),
+                )
+                for i in range(n_layers)
+            ]
+        ),
+    )
+    return dict(inputs=inputs, model=model, dynamic_shapes=shapes)
diff --git a/pyproject.toml b/pyproject.toml
@@ -34,6 +34,10 @@ disable_error_code = ["union-attr"]
 module = ["onnx_diagnostic.torch_export_patches.*"]
 disable_error_code = ["arg-type", "assignment", "attr-defined", "index", "misc", "name-defined", "operator", "return-value"]
 
+[[tool.mypy.overrides]]
+module = ["onnx_diagnostic.torch_models.*"]
+disable_error_code = ["attr-defined", "call-overload", "operator"]
+
 [tool.ruff]
 
 # Exclude a variety of commonly ignored directories.