payvhrd'

xadupre · xadupre · commit 0478a591fe7b · 2025-03-26T00:36:14.000+01:00
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -17,6 +17,7 @@ jobs:
         os: [ubuntu-latest]
         python: ['3.11', '3.12']
         transformers: ['4.48', '4.50', 'main']
+        torch: ['2.6', 'main']
 
     steps:
       - uses: actions/checkout@v3
@@ -26,7 +27,13 @@ jobs:
           python-version: ${{ matrix.python }}
 
       - name: Install pytorch
-        run: python -m pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cpu
+        run: |
+          if [[ "${{ matrix.torch }}" == "main" ]]; then
+            python -m pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cpu
+          else
+            echo "install torch==${{ matrix.torch }}"
+            pip install torch==${{ matrix.torch }}
+          fi
 
       - name: Install transformers ${{ matrix.transformers }}
         run: |
diff --git a/_unittests/ut_torch_models/test_llm_phi2.py b/_unittests/ut_torch_models/test_llm_phi2.py
@@ -17,27 +17,23 @@ def test_get_phi2(self):
     @requires_transformers("4.52")
     def test_export_phi2_1(self):
         data = get_phi2(num_hidden_layers=2)
-        model, inputs = data["model"], data["inputs"]
+        model, inputs, ds = data["model"], data["inputs"], data["dynamic_shapes"]
         self.assertEqual(
             {"attention_mask", "past_key_values", "input_ids", "position_ids"}, set(inputs)
         )
-        ep = torch.export.export(
-            model, (), kwargs=inputs, dynamic_shapes=data["dynamic_shapes"]
-        )
+        ep = torch.export.export(model, (), kwargs=inputs, dynamic_shapes=ds)
         assert ep
 
     @ignore_warnings(UserWarning)
     def test_export_phi2_2_bypassed(self):
         data = get_phi2(num_hidden_layers=2)
-        model, inputs = data["model"], data["inputs"]
+        model, inputs, ds = data["model"], data["inputs"], data["dynamic_shapes"]
         self.assertEqual(
             {"attention_mask", "past_key_values", "input_ids", "position_ids"}, set(inputs)
         )
         with bypass_export_some_errors(patch_transformers=True) as modificator:
             inputs = modificator(inputs)
-            ep = torch.export.export(
-                model, (), kwargs=inputs, dynamic_shapes=data["dynamic_shapes"], strict=False
-            )
+            ep = torch.export.export(model, (), kwargs=inputs, dynamic_shapes=ds, strict=False)
             assert ep
 
 
diff --git a/_unittests/ut_torch_models/test_tiny_llms_onnx.py b/_unittests/ut_torch_models/test_tiny_llms_onnx.py
@@ -1,3 +1,4 @@
+import copy
 import inspect
 import unittest
 import torch
@@ -57,19 +58,41 @@ def test_onnx_export_tiny_llm_xdbg(self):
 
     @ignore_warnings((UserWarning, DeprecationWarning, FutureWarning))
     @hide_stdout()
-    def test_bypass_onnx_export_tiny_llm_official(self):
+    def test_bypass_onnx_export_tiny_llm_official_nopositionids(self):
         data = get_tiny_llm()
-        model, inputs = data["model"], data["inputs"]
+        model, inputs, ds = data["model"], data["inputs"], data["dynamic_shapes"]
+        del inputs["position_ids"]
+        del ds["position_ids"]
+        self.assertEqual({"attention_mask", "past_key_values", "input_ids"}, set(inputs))
+        with bypass_export_some_errors(patch_transformers=True, verbose=1) as modificator:
+            new_inputs = modificator(copy.deepcopy(inputs))
+            ep = torch.onnx.export(
+                model,
+                (),
+                kwargs=new_inputs,
+                dynamic_shapes=ds,
+                dynamo=True,
+                optimize=True,
+            )
+        self.assert_onnx_disc(
+            inspect.currentframe().f_code.co_name, ep.model_proto, model, inputs, verbose=1
+        )
+
+    @ignore_warnings((UserWarning, DeprecationWarning, FutureWarning))
+    @hide_stdout()
+    def test_bypass_onnx_export_tiny_llm_official_full(self):
+        data = get_tiny_llm()
+        model, inputs, ds = data["model"], data["inputs"], data["dynamic_shapes"]
         self.assertEqual(
             {"attention_mask", "past_key_values", "input_ids", "position_ids"}, set(inputs)
         )
         with bypass_export_some_errors(patch_transformers=True, verbose=1) as modificator:
-            new_inputs = modificator(inputs)
+            new_inputs = modificator(copy.deepcopy(inputs))
             ep = torch.onnx.export(
                 model,
                 (),
                 kwargs=new_inputs,
-                dynamic_shapes=data["dynamic_shapes"],
+                dynamic_shapes=ds,
                 dynamo=True,
                 optimize=True,
             )
@@ -82,7 +105,7 @@ def test_bypass_onnx_export_tiny_llm_official(self):
     @hide_stdout()
     def test_bypass_onnx_export_tiny_llm_xdbg(self):
         data = get_tiny_llm()
-        model, inputs = data["model"], data["inputs"]
+        model, inputs, ds = data["model"], data["inputs"], data["dynamic_shapes"]
         self.assertEqual(
             {"attention_mask", "past_key_values", "input_ids", "position_ids"}, set(inputs)
         )
@@ -92,7 +115,7 @@ def test_bypass_onnx_export_tiny_llm_xdbg(self):
                 model,
                 (),
                 kwargs=new_inputs,
-                dynamic_shapes=data["dynamic_shapes"],
+                dynamic_shapes=ds,
                 verbose=1,
                 export_options=ExportOptions(strict=False),
             )
diff --git a/onnx_diagnostic/torch_export_patches/patches/patch_torch.py b/onnx_diagnostic/torch_export_patches/patches/patch_torch.py
@@ -146,3 +146,183 @@ def patched__broadcast_shapes(*_shapes):
                 common_shape[idx] = torch.sym_max(common_shape[idx], shape[idx])
 
     return common_shape
+
+
+class patched_ShapeEnv:
+
+    def _set_replacement(
+        self, a: "sympy.Symbol", tgt: "sympy.Expr", msg: str  # noqa: F821
+    ) -> None:
+        """
+        Adds or updates a replacement for a symbol.
+        Use this instead of `self.replacements[a] = tgt`.
+        """
+        if tgt == self.replacements.get(a, None):
+            return
+
+        if a in tgt.free_symbols:
+            return
+
+        import sympy
+        from torch._logging import structured
+        from torch.utils._traceback import CapturedTraceback
+        from torch._logging import trace_structured
+        from torch._guards import TracingContext
+        from torch.utils._sympy.functions import FloorToInt, CeilToInt
+        from torch.utils._sympy.solve import try_solve
+        from torch.fx.experimental.symbolic_shapes import (
+            _is_supported_equivalence,
+            ValueRanges,
+        )
+
+        # Precondition: a == tgt
+        assert isinstance(a, sympy.Symbol)
+
+        if self.allow_complex_guards_as_runtime_asserts and not _is_supported_equivalence(tgt):
+            # continuing leads to placeholder shapes
+            # having complex expressions that we can't resolve
+            return
+
+        # Handles nested tensor symbolic variables which don't have
+        # var_to_range bounds
+        tgt_bound = None
+        if a in self.var_to_range:
+            src_bound = self.var_to_range[a]
+
+            # First, refine the value range of a based on the computed value range
+            # of tgt.  This is always OK to do, even if we decide not to do the
+            # substitution in the end.  This might be a no-op, if a already has
+            # a tighter bound
+            tgt_bound = self.bound_sympy(tgt)
+            self._update_var_to_range(a, tgt_bound)
+
+            # Next, check if we can update the range of free symbols in tgt
+            # based on the range in a. But only do it if:
+            #  - the source bound non-trivially improves over what we get out of
+            #    the existing bounds.
+            #  - the replacement is univariate and we can invert the tgt expression
+            if not tgt_bound.issubset(src_bound) and len(tgt.free_symbols) == 1:
+                b = next(iter(tgt.free_symbols))
+                # Try to invert the equality
+                r = try_solve(sympy.Eq(a, tgt), b, floordiv_inequality=False)
+                if r is not None:
+                    self.log.debug(
+                        "set_replacement: solve for %s in %s == %s gives %s",
+                        b,
+                        a,
+                        tgt,
+                        r,
+                    )
+                    # The solution here can be non-integral, for example, if
+                    # we have s0 = 2*s1, then s1 = s0/2.  What we would like
+                    # to do is calculated the bounds in arbitrary precision,
+                    # and then requantize the bound to integers when we are
+                    # done.
+                    rat_b_bound = self.bound_sympy(r[1])
+                    b_bound = ValueRanges(
+                        CeilToInt(rat_b_bound.lower), FloorToInt(rat_b_bound.upper)
+                    )
+                    self._update_var_to_range(b, b_bound, self.var_to_range_sloc[a])
+                    tgt_bound = self.bound_sympy(tgt)
+                    assert tgt_bound.issubset(
+                        src_bound
+                    ), f"{tgt_bound=} not a subset of {src_bound=}"
+
+            # TODO: Should we propagate size-like-ness?
+            #
+            # Pros: if u0 is size-like, intuitively u0 == u1 should cause u1
+            # to become size-like.
+            #
+            # Cons: if u0 is size-like, what about u0 - 1 == u1?  You CAN'T
+            # propagate in this case, because what if u0 == 0, then u1 is negative
+            # and clearly isn't a size.  So, at minimum, any f(x) whose value
+            # range isn't [0, inf] given x in [0, inf] cannot propagate
+            # size-like-ness.  But there are many situations where you could
+            # imagine u1 is going to be size-like and actually you just didn't
+            # have a refined enough value range on u0.  Since even innocuous
+            # looking arithmetic operations can destroy size-like-ness, it's
+            # best to not propagate it at all and force the user to annotate it
+            # as necessary.
+            #
+            # Compromise: we preserve size-like-ness only for exact equality
+            # and nothing else.
+            if a in self.size_like and isinstance(tgt, sympy.Symbol):
+                self.size_like.add(tgt)
+            elif isinstance(tgt, sympy.Symbol) and tgt in self.size_like:
+                self.size_like.add(a)
+
+            # Now, decide if we will do the substitution.
+            #
+            #  - If the source has a non-trivial range, only substitute if
+            #    we preserve this range.  Note that we may have propagated
+            #    the src_range to free variables in tgt when tgt is univariate
+            #    and we could find an inverse, which helps us achieve this.
+            #    This ensures we never "forget" about user defined ranges,
+            #    even if they end up being defined on composite formulas
+            #    like s0 + s1.
+            #
+            #  - If the variable is unbacked, only substitute if the substitution
+            #    would preserve the bounds also under size-like-ness conditions.
+
+            if not tgt_bound.issubset(src_bound):
+                self.log.debug(
+                    "skipped set_replacement %s = %s (%s) [%s not subset of %s]",
+                    a,
+                    tgt,
+                    msg,
+                    tgt_bound,
+                    src_bound,
+                )
+                return
+            elif a in self.size_like:
+                tgt_bound_so = self.bound_sympy(tgt, size_oblivious=True)
+                src_bound_so = self.bound_sympy(a, size_oblivious=True)
+                if not tgt_bound_so.issubset(src_bound_so):
+                    self.log.debug(
+                        "skipped set_replacement %s = %s (%s) "
+                        "[%s not subset of %s (size-oblivious conditions)]",
+                        a,
+                        tgt,
+                        msg,
+                        tgt_bound_so,
+                        src_bound_so,
+                    )
+                    return
+
+        if isinstance(tgt, (sympy.Integer, sympy.Float)):
+            # specializing to a constant, which is likely unexpected (unless
+            # you specified dynamic=True)
+
+            user_tb = TracingContext.extract_stack()
+            trace_structured(
+                "symbolic_shape_specialization",
+                metadata_fn=lambda: {
+                    "symbol": repr(a),
+                    "sources": [s.name() for s in self.var_to_sources.get(a, [])],
+                    "value": repr(tgt),
+                    "reason": msg,
+                    "stack": structured.from_traceback(
+                        CapturedTraceback.extract(skip=1).summary()
+                    ),
+                    "user_stack": (structured.from_traceback(user_tb) if user_tb else None),
+                },
+            )
+
+            # if config.print_specializations:
+            #    self.log.warning(
+            #         "Specializing %s to %s", self.var_to_sources[a][0].name(), tgt
+            #     )
+            #     self.log.debug("SPECIALIZATION", stack_info=True)
+        assert msg != "range_refined_to_singleton", f"{[a, tgt, msg, tgt_bound]}"
+        # log.info("set_replacement %s = %s (%s) %s", a, tgt, msg, tgt_bound)
+        self.replacements[a] = tgt
+        # NB: the replacement may get refined, but the user will find the
+        # FIRST one most useful (TODO: Maybe we could consider tracking all of
+        # them)
+        if a not in self.replacements_slocs:
+            self.replacements_slocs[a] = self._get_sloc()
+        self._update_version_counter()
+
+        # When specializing 'a == tgt', the equality should be also conveyed to
+        # Z3, in case an expression uses 'a'.
+        self._add_target_expr(sympy.Eq(a, tgt, evaluate=False))
diff --git a/onnx_diagnostic/torch_models/untrained/llm_tiny_llm.py b/onnx_diagnostic/torch_models/untrained/llm_tiny_llm.py
@@ -64,11 +64,11 @@ def get_tiny_llm(
 
     shapes = {
         "input_ids": {0: batch, 1: seq_length},
-        "position_ids": {
+        "attention_mask": {
             0: batch,
             1: torch.export.Dim.DYNAMIC,  # cache_length + seq_length
         },
-        "attention_mask": {
+        "position_ids": {
             0: batch,
             1: torch.export.Dim.DYNAMIC,  # cache_length + seq_length
         },