disable modeling_utils rewrite

titaiwangms · titaiwangms · commit 1f4ca3a1196c · 2025-10-06T20:05:51.000Z
diff --git a/onnx_diagnostic/torch_export_patches/patches/patch_transformers.py b/onnx_diagnostic/torch_export_patches/patches/patch_transformers.py
@@ -1901,7 +1901,10 @@ def get_placeholder_mask(
 try:
     import transformers.modeling_utils
 
-    patch_modeling_utils = True
+    # TODO(titaiwang): This is not ready yet.
+    # Using multi-turn conversation to export, we don't need to rewrite the attention
+    # as sequence_length is not restricted to 1.
+    patch_modeling_utils = False
 
     from transformers.integrations.sdpa_attention import use_gqa_in_sdpa, repeat_kv