fix position ids

xadupre · xadupre · commit eabc4ef15d74 · 2025-03-25T19:29:03.000+01:00
diff --git a/_doc/api/torch_models/llms.rst b/_doc/api/torch_models/llms.rst
@@ -3,4 +3,4 @@ onnx_diagnostic.torch_models.llms
 =================================
 
 .. automodule:: onnx_diagnostic.torch_models.llms
-    :members: get_tiny_llm
+    :members: get_phi2, get_tiny_llm
diff --git a/_unittests/ut_torch_models/test_llm_phi2.py b/_unittests/ut_torch_models/test_llm_phi2.py
@@ -0,0 +1,45 @@
+import unittest
+import torch
+from onnx_diagnostic.ext_test_case import ExtTestCase, ignore_warnings, requires_transformers
+from onnx_diagnostic.torch_models.llms import get_phi2
+from onnx_diagnostic.helpers import string_type
+from onnx_diagnostic.torch_export_patches import bypass_export_some_errors
+
+
+class TestLlmPhi(ExtTestCase):
+    def test_get_phi2(self):
+        data = get_phi2(num_hidden_layers=2)
+        model, inputs = data["model"], data["inputs"]
+        self.assertIn("DynamicCache", string_type(inputs))
+        model(**inputs)
+
+    @ignore_warnings(UserWarning)
+    @requires_transformers("4.52")
+    def test_export_phi2_1(self):
+        data = get_phi2(num_hidden_layers=2)
+        model, inputs = data["model"], data["inputs"]
+        self.assertEqual(
+            {"attention_mask", "past_key_values", "input_ids", "position_ids"}, set(inputs)
+        )
+        ep = torch.export.export(
+            model, (), kwargs=inputs, dynamic_shapes=data["dynamic_shapes"]
+        )
+        assert ep
+
+    @ignore_warnings(UserWarning)
+    def test_export_phi2_2_bypassed(self):
+        data = get_phi2(num_hidden_layers=2)
+        model, inputs = data["model"], data["inputs"]
+        self.assertEqual(
+            {"attention_mask", "past_key_values", "input_ids", "position_ids"}, set(inputs)
+        )
+        with bypass_export_some_errors(patch_transformers=True) as modificator:
+            inputs = modificator(inputs)
+            ep = torch.export.export(
+                model, (), kwargs=inputs, dynamic_shapes=data["dynamic_shapes"], strict=False
+            )
+            assert ep
+
+
+if __name__ == "__main__":
+    unittest.main(verbosity=2)
diff --git a/_unittests/ut_torch_models/test_tiny_llms.py b/_unittests/ut_torch_models/test_tiny_llms.py
@@ -6,7 +6,7 @@
 from onnx_diagnostic.torch_export_patches import bypass_export_some_errors
 
 
-class TestLlms(ExtTestCase):
+class TestTinyLlm(ExtTestCase):
     def test_get_tiny_llm(self):
         data = get_tiny_llm()
         model, inputs = data["model"], data["inputs"]
@@ -18,7 +18,9 @@ def test_get_tiny_llm(self):
     def test_export_tiny_llm_1(self):
         data = get_tiny_llm()
         model, inputs = data["model"], data["inputs"]
-        self.assertEqual({"attention_mask", "past_key_values", "input_ids"}, set(inputs))
+        self.assertEqual(
+            {"attention_mask", "past_key_values", "input_ids", "position_ids"}, set(inputs)
+        )
         ep = torch.export.export(
             model, (), kwargs=inputs, dynamic_shapes=data["dynamic_shapes"]
         )
@@ -28,11 +30,34 @@ def test_export_tiny_llm_1(self):
     def test_export_tiny_llm_2_bypassed(self):
         data = get_tiny_llm()
         model, inputs = data["model"], data["inputs"]
-        self.assertEqual({"attention_mask", "past_key_values", "input_ids"}, set(inputs))
-        with bypass_export_some_errors(patch_transformers=True) as modificator:
+        self.assertEqual(
+            {"attention_mask", "past_key_values", "input_ids", "position_ids"}, set(inputs)
+        )
+
+        with bypass_export_some_errors(
+            patch_torch=False, patch_transformers=True, catch_constraints=False
+        ) as modificator:
             inputs = modificator(inputs)
+
+            def debug():
+                print("***", string_type(inputs, with_shape=True))
+                print("***", data["dynamic_shapes"])
+                import torch.export._draft_export
+
+                ep, report = torch.export._draft_export.draft_export(
+                    model,
+                    (),
+                    kwargs=inputs,
+                    dynamic_shapes=data["dynamic_shapes"],
+                    strict=False,
+                )
+                print(report)
+
+            if self._debug():
+                debug()
+
             ep = torch.export.export(
-                model, (), kwargs=inputs, dynamic_shapes=data["dynamic_shapes"]
+                model, (), kwargs=inputs, dynamic_shapes=data["dynamic_shapes"], strict=False
             )
             assert ep
 
diff --git a/_unittests/ut_torch_models/test_tiny_llms_onnx.py b/_unittests/ut_torch_models/test_tiny_llms_onnx.py
@@ -11,19 +11,21 @@
 from onnx_diagnostic.torch_export_patches import bypass_export_some_errors
 
 try:
-    from experimental_experiment.torch_interpreter import to_onnx
+    from experimental_experiment.torch_interpreter import to_onnx, ExportOptions
 except ImportError:
     to_onnx = None
 
 
-class TestLlmsOnnx(ExtTestCase):
+class TestTinyLlmOnnx(ExtTestCase):
     @ignore_warnings((UserWarning, DeprecationWarning, FutureWarning))
     @requires_transformers("4.50.9999")
     @hide_stdout()
     def test_onnx_export_tiny_llm_official(self):
         data = get_tiny_llm()
         model, inputs = data["model"], data["inputs"]
-        self.assertEqual({"attention_mask", "past_key_values", "input_ids"}, set(inputs))
+        self.assertEqual(
+            {"attention_mask", "past_key_values", "input_ids", "position_ids"}, set(inputs)
+        )
         ep = torch.onnx.export(
             model,
             (),
@@ -43,7 +45,9 @@ def test_onnx_export_tiny_llm_official(self):
     def test_onnx_export_tiny_llm_xdbg(self):
         data = get_tiny_llm()
         model, inputs = data["model"], data["inputs"]
-        self.assertEqual({"attention_mask", "past_key_values", "input_ids"}, set(inputs))
+        self.assertEqual(
+            {"attention_mask", "past_key_values", "input_ids", "position_ids"}, set(inputs)
+        )
         onx = to_onnx(
             model, (), kwargs=inputs, dynamic_shapes=data["dynamic_shapes"], verbose=1
         )
@@ -56,7 +60,9 @@ def test_onnx_export_tiny_llm_xdbg(self):
     def test_bypass_onnx_export_tiny_llm_official(self):
         data = get_tiny_llm()
         model, inputs = data["model"], data["inputs"]
-        self.assertEqual({"attention_mask", "past_key_values", "input_ids"}, set(inputs))
+        self.assertEqual(
+            {"attention_mask", "past_key_values", "input_ids", "position_ids"}, set(inputs)
+        )
         with bypass_export_some_errors(patch_transformers=True, verbose=1) as modificator:
             new_inputs = modificator(inputs)
             ep = torch.onnx.export(
@@ -77,11 +83,18 @@ def test_bypass_onnx_export_tiny_llm_official(self):
     def test_bypass_onnx_export_tiny_llm_xdbg(self):
         data = get_tiny_llm()
         model, inputs = data["model"], data["inputs"]
-        self.assertEqual({"attention_mask", "past_key_values", "input_ids"}, set(inputs))
+        self.assertEqual(
+            {"attention_mask", "past_key_values", "input_ids", "position_ids"}, set(inputs)
+        )
         with bypass_export_some_errors(patch_transformers=True, verbose=1) as modificator:
             new_inputs = modificator(inputs)
             onx = to_onnx(
-                model, (), kwargs=new_inputs, dynamic_shapes=data["dynamic_shapes"], verbose=1
+                model,
+                (),
+                kwargs=new_inputs,
+                dynamic_shapes=data["dynamic_shapes"],
+                verbose=1,
+                export_options=ExportOptions(strict=False),
             )
         self.assert_onnx_disc(
             inspect.currentframe().f_code.co_name, onx, model, inputs, verbose=1
diff --git a/onnx_diagnostic/__init__.py b/onnx_diagnostic/__init__.py
@@ -3,5 +3,5 @@
 Functions, classes to dig into a model when this one is right, slow, wrong...
 """
 
-__version__ = "0.2.1"
+__version__ = "0.3.0"
 __author__ = "Xavier Dupré"
diff --git a/onnx_diagnostic/ext_test_case.py b/onnx_diagnostic/ext_test_case.py
@@ -1090,3 +1090,7 @@ def assert_onnx_disc(
             and not numpy.isnan(diff["rel"])
             and diff["rel"] <= rtol
         ), f"discrepancies in {test_name!r}, diff={string_diff(diff)}"
+
+    def _debug(self):
+        "Tells if DEBUG=1 is set up."
+        return os.environ.get("DEBUG") in BOOLEAN_VALUES
diff --git a/onnx_diagnostic/torch_models/llms.py b/onnx_diagnostic/torch_models/llms.py
@@ -1 +1,2 @@
-from .untrained.tiny_llm import get_tiny_llm
+from .untrained.llm_phi2 import get_phi2
+from .untrained.llm_tiny_llm import get_tiny_llm
diff --git a/onnx_diagnostic/torch_models/untrained/llm_phi2.py b/onnx_diagnostic/torch_models/untrained/llm_phi2.py
@@ -0,0 +1,109 @@
+from typing import Any, Dict
+import torch
+import transformers
+from ...cache_helpers import make_dynamic_cache
+
+
+def get_phi2(
+    batch_size: int = 1,
+    sequence_length: int = 30,
+    sequence_length2: int = 3,
+    dynamic_rope: bool = False,
+    **kwargs,
+) -> Dict[str, Any]:
+    """
+    Gets a non initialized model
+    similar to `microsoft/phi-2 <https://huggingface.co/microsoft/phi-2>`_
+
+    :param batch_size: batch size
+    :param sequence_length: sequence length
+    :param sequence_length2: new sequence length
+    :param dynamic_rope: use dynamic rope (see :class:`transformers.LlamaConfig`)
+    :param kwargs: to overwrite the configuration, example ``num_hidden_layers=1``
+    :return: dictionary
+
+    See :ref:`l-plot-tiny-llm-export-patched` for an example with a similar model.
+    """
+    config = {
+        "_name_or_path": "microsoft/phi-2",
+        "architectures": ["PhiForCausalLM"],
+        "attention_dropout": 0.0,
+        "bos_token_id": 50256,
+        "embd_pdrop": 0.0,
+        "eos_token_id": 50256,
+        "hidden_act": "gelu_new",
+        "hidden_size": 2560,
+        "initializer_range": 0.02,
+        "intermediate_size": 10240,
+        "layer_norm_eps": 1e-05,
+        "max_position_embeddings": 2048,
+        "model_type": "phi",
+        "num_attention_heads": 32,
+        "num_hidden_layers": 32,
+        "num_key_value_heads": 32,
+        "partial_rotary_factor": 0.4,
+        "qk_layernorm": False,
+        "resid_pdrop": 0.1,
+        "rope_scaling": {"rope_type": "dynamic", "factor": 10.0} if dynamic_rope else None,
+        "rope_theta": 10000.0,
+        "tie_word_embeddings": False,
+        "torch_dtype": "float16",
+        "transformers_version": "4.37.0",
+        "use_cache": True,
+        "vocab_size": 51200,
+    }
+    config.update(**kwargs)
+    conf = transformers.PhiConfig(**config)
+    model = transformers.PhiForCausalLM(conf)
+    model.eval()
+
+    # now the inputs
+    cache_last_dim = 80
+    max_token_id = config["vocab_size"] - 1
+    n_layers = config["num_hidden_layers"]
+    num_key_value_heads = config["num_key_value_heads"]
+
+    batch = torch.export.Dim("batch", min=1, max=1024)
+    seq_length = torch.export.Dim("seq_length", min=1, max=4096)
+    cache_length = torch.export.Dim("cache_length", min=1, max=4096)
+
+    shapes = {
+        "input_ids": {0: batch, 1: seq_length},
+        "position_ids": {
+            0: batch,
+            1: torch.export.Dim.DYNAMIC,  # cache_length + seq_length
+        },
+        "attention_mask": {
+            0: batch,
+            1: torch.export.Dim.DYNAMIC,  # cache_length + seq_length
+        },
+        "past_key_values": [
+            [{0: batch, 2: cache_length} for _ in range(n_layers)],
+            [{0: batch, 2: cache_length} for _ in range(n_layers)],
+        ],
+    }
+    inputs = dict(
+        input_ids=torch.randint(0, max_token_id, (batch_size, sequence_length2)).to(
+            torch.int64
+        ),
+        attention_mask=torch.ones((batch_size, sequence_length + sequence_length2)).to(
+            torch.int64
+        ),
+        position_ids=torch.arange(sequence_length, sequence_length + sequence_length2)
+        .to(torch.int64)
+        .expand((batch_size, -1)),
+        past_key_values=make_dynamic_cache(
+            [
+                (
+                    torch.randn(
+                        batch_size, num_key_value_heads, sequence_length, cache_last_dim
+                    ),
+                    torch.randn(
+                        batch_size, num_key_value_heads, sequence_length, cache_last_dim
+                    ),
+                )
+                for i in range(n_layers)
+            ]
+        ),
+    )
+    return dict(inputs=inputs, model=model, dynamic_shapes=shapes)
diff --git a/onnx_diagnostic/torch_models/untrained/llm_tiny_llm.py b/onnx_diagnostic/torch_models/untrained/llm_tiny_llm.py
@@ -6,18 +6,23 @@
 
 def get_tiny_llm(
     batch_size: int = 2,
+    sequence_length: int = 30,
+    sequence_length2: int = 3,
     dynamic_rope: bool = False,
     **kwargs,
 ) -> Dict[str, Any]:
     """
-    Gets a non initialized model.
+    Gets a non initialized model
+    similar to `arnir0/Tiny-LLM <https://huggingface.co/arnir0/Tiny-LLM>`_
 
     :param batch_size: batch size
+    :param sequence_length: sequence length
+    :param sequence_length2: new sequence length
     :param dynamic_rope: use dynamic rope (see :class:`transformers.LlamaConfig`)
     :param kwargs: to overwrite the configuration, example ``num_hidden_layers=1``
     :return: dictionary
 
-    See :ref:`l-plot-tiny-llm-export` for an example.
+    See :ref:`l-plot-tiny-llm-export` or :ref:`l-plot-tiny-llm-export-patched` for examples.
     """
     config = {
         "architectures": ["LlamaForCausalLM"],
@@ -49,19 +54,20 @@ def get_tiny_llm(
 
     # now the inputs
     cache_last_dim = 96
-    sequence_length = 30
-    sequence_length2 = 3
-    num_key_value_heads = 1
     max_token_id = config["vocab_size"] - 1
     n_layers = config["num_hidden_layers"]
+    num_key_value_heads = config["num_key_value_heads"]
 
     batch = torch.export.Dim("batch", min=1, max=1024)
     seq_length = torch.export.Dim("seq_length", min=1, max=4096)
     cache_length = torch.export.Dim("cache_length", min=1, max=4096)
 
     shapes = {
         "input_ids": {0: batch, 1: seq_length},
-        "position_ids": {0: torch.export.Dim.DYNAMIC},
+        "position_ids": {
+            0: batch,
+            1: torch.export.Dim.DYNAMIC,  # cache_length + seq_length
+        },
         "attention_mask": {
             0: batch,
             1: torch.export.Dim.DYNAMIC,  # cache_length + seq_length

Original file line number	Diff line number	Diff line change
`@@ -1 +1,2 @@`
`1`		`-from .untrained.tiny_llm import get_tiny_llm`
	`1`	`+from .untrained.llm_phi2 import get_phi2`
	`2`	`+from .untrained.llm_tiny_llm import get_tiny_llm`