change

xadupre · xadupre · commit 528fa412865a · 2025-06-12T18:08:34.000+02:00
diff --git a/_unittests/ut_torch_models/test_validate_whole_models.py b/_unittests/ut_torch_models/test_validate_whole_models.py
@@ -265,7 +265,7 @@ def test_validate_phi3_mini_4k_instruct(self):
             mid,
             do_run=True,
             verbose=10,
-            exporter="custom",
+            exporter="onnx-dynamo",
             dump_folder="dump_test/validate_phi3_mini_4k_instruct",
             inputs2=True,
             patch=True,
diff --git a/onnx_diagnostic/_command_lines_parser.py b/onnx_diagnostic/_command_lines_parser.py
@@ -5,7 +5,7 @@
 import sys
 import textwrap
 import onnx
-from typing import Any, List, Optional
+from typing import Any, Dict, List, Optional, Union
 from argparse import ArgumentParser, RawTextHelpFormatter, BooleanOptionalAction
 from textwrap import dedent
 
@@ -291,6 +291,14 @@ def _cmd_config(argv: List[Any]):
         print(f"task: {task_from_id(args.mid)}")
 
 
+def _parse_json(value: str) -> Union[str, Dict[str, Any]]:
+    assert isinstance(value, str), f"value should be string but value={value!r}"
+    if value and value[0] == "{" and value[-1] == "}":
+        # a dictionary
+        return json.loads(value.replace("'", '"'))
+    return value
+
+
 class _ParseDict(argparse.Action):
     def __call__(self, parser, namespace, values, option_string=None):
         d = getattr(namespace, self.dest) or {}
@@ -314,7 +322,7 @@ def __call__(self, parser, namespace, values, option_string=None):
                     continue
                 except (TypeError, ValueError):
                     pass
-                d[key] = value
+                d[key] = _parse_json(value)
 
         setattr(namespace, self.dest, d)
 
@@ -430,7 +438,8 @@ def get_parser_validate() -> ArgumentParser:
         metavar="KEY=VALUE",
         nargs="*",
         help="Additional model options, use to change some parameters of the model, "
-        "example: --mop attn_implementation=eager",
+        "example: ``--mop attn_implementation=eager`` or "
+        "``--mop \"rope_scaling={'rope_type': 'dynamic', 'factor': 10.0}\"``",
         action=_ParseDict,
     )
     parser.add_argument(
diff --git a/onnx_diagnostic/torch_export_patches/patches/patch_transformers.py b/onnx_diagnostic/torch_export_patches/patches/patch_transformers.py
@@ -540,7 +540,7 @@ def patched__compute_dynamic_ntk_parameters(
     seq_len: Optional[int] = None,
     **rope_kwargs,
 ) -> Tuple["torch.Tensor", float]:
-    """
+    """manual patch:
     ``[patch:transformers.modeling_rope_utils._compute_dynamic_ntk_parameters]``
 
     Computes the inverse frequencies with NTK scaling.
@@ -594,8 +594,9 @@ def patched__compute_dynamic_ntk_parameters(
         seq_len = max_position_embeddings
     else:
         torch._check(isinstance(seq_len, torch.Tensor))
-        seq_len = torch.max(
-            seq_len, torch.Tensor(max_position_embeddings, dtype=seq_len.dtype)
+        seq_len = torch.maximum(
+            seq_len,
+            torch.tensor(max_position_embeddings, dtype=seq_len.dtype, device=seq_len.device),
         )
 
     # Compute the inverse frequencies
@@ -676,13 +677,23 @@ def wrapper(self, x, position_ids):
     """
 
     def longrope_frequency_update(self, position_ids, device):
+        # It is no use to patch the function after the model is created
+        # as rope_init_fn is an attribute set to one function when the model
+        # is created and when no patch is applied yet.
+        # So we select the patched version here.
+        rope_init_fn = (
+            patched__compute_dynamic_ntk_parameters
+            if self.rope_init_fn
+            is transformers.modeling_rope_utils._compute_dynamic_ntk_parameters
+            else self.rope_init_fn
+        )
         seq_len = torch.max(position_ids) + 1
         if hasattr(self.config, "original_max_position_embeddings"):
             original_max_position_embeddings = self.config.original_max_position_embeddings
         else:
             original_max_position_embeddings = self.config.max_position_embeddings
         # At export time, seq_len is unknown.
-        long_inv_freq, _ = self.rope_init_fn(
+        long_inv_freq, _ = rope_init_fn(
             self.config, device, seq_len=original_max_position_embeddings + 1
         )
         original_inv_freq = self.original_inv_freq.to(device)
@@ -706,6 +717,17 @@ def dynamic_frequency_update(self, position_ids, device):
         # - self.original_max_seq_len = config.max_position_embeddings
         # - inv_freq, self.attention_scaling = self.rope_init_fn(self.config, device)
 
+        # It is no use to patch the function after the model is created
+        # as rope_init_fn is an attribute set to one function when the model
+        # is created and when no patch is applied yet.
+        # So we select the patched version here.
+        rope_init_fn = (
+            patched__compute_dynamic_ntk_parameters
+            if self.rope_init_fn
+            is transformers.modeling_rope_utils._compute_dynamic_ntk_parameters
+            else self.rope_init_fn
+        )
+
         # This behaviour is difficult to translate.
         # The sequence always grows.
         # The test should always True.
@@ -729,7 +751,7 @@ def dynamic_frequency_update(self, position_ids, device):
         # )
 
         seq_len = torch.max(position_ids) + 1
-        long_inv_freq, self.attention_scaling = self.rope_init_fn(
+        long_inv_freq, self.attention_scaling = rope_init_fn(
             self.config, device, seq_len=seq_len
         )