fix draft

titaiwangms · titaiwangms · commit d817f1900a4a · 2025-09-24T21:07:31.000Z
diff --git a/onnx_diagnostic/torch_export_patches/patches/patch_transformers.py b/onnx_diagnostic/torch_export_patches/patches/patch_transformers.py
@@ -4,7 +4,6 @@
 from functools import wraps
 from typing import Callable, List, Optional, Tuple
 import packaging.version as pv
-from sklearn import logger
 import torch
 import transformers
 from transformers.modeling_attn_mask_utils import AttentionMaskConverter
@@ -1658,11 +1657,6 @@ def patched_sdpa_attention_forward(
         **kwargs,
     ) -> tuple[torch.Tensor, None]:
         """manual patch for function ```transformers.integrations.sdpa_attention.sdpa_attention_forward```."""  # noqa: E501
-        if kwargs.get("output_attentions", False) or kwargs.get("head_mask") is not None:
-            logger.warning_once(
-                "`sdpa` attention does not support `output_attentions=True` or `head_mask`."
-                " Please set your attention to `eager` if you want any of these features."
-            )
         sdpa_kwargs = {}
         if hasattr(module, "num_key_value_groups"):
             if not use_gqa_in_sdpa(attention_mask, key):
diff --git a/onnx_diagnostic/torch_models/untrained/llm_tiny_llm.py b/onnx_diagnostic/torch_models/untrained/llm_tiny_llm.py
@@ -5,7 +5,7 @@
 def get_tiny_llm(
     batch_size: int = 2,
     sequence_length: int = 30,
-    sequence_length2: int = 3,
+    past_sequence_length: int = 3,
     dynamic_rope: bool = False,
     use_static_cache: bool = False,
     **kwargs,
@@ -15,7 +15,7 @@ def get_tiny_llm(
 
     :param batch_size: batch size
     :param sequence_length: sequence length
-    :param sequence_length2: new sequence length
+    :param past_sequence_length: past sequence length
     :param dynamic_rope: use dynamic rope (see :class:`transformers.LlamaConfig`)
     :param use_static_cache: use StaticCache instead of DynamicCache
     :param kwargs: to overwrite the configuration, example ``num_hidden_layers=1``
@@ -62,7 +62,7 @@ def get_tiny_llm(
         num_hidden_layers=config["num_hidden_layers"],  # type: ignore[arg-type]
         batch_size=batch_size,
         sequence_length=sequence_length,
-        sequence_length2=sequence_length2,
+        past_sequence_length=past_sequence_length,
         dynamic_rope=dynamic_rope,
         num_key_value_heads=config["num_key_value_heads"],  # type: ignore[arg-type]
         cls_cache="StaticCache" if use_static_cache else "DynamicCache",