Update default attn_implementation to None for Biencoder

oliverholworthy · oliverholworthy · commit 530798976600 · 2026-02-25T09:10:00.000Z
Signed-off-by: Oliver Holworthy &lt;1216955+oliverholworthy@users.noreply.github.com&gt;
diff --git a/nemo_automodel/_transformers/auto_model.py b/nemo_automodel/_transformers/auto_model.py
@@ -734,7 +734,7 @@ def from_pretrained(
         share_encoder: bool = True,
         pooling: str = "avg",
         l2_normalize: bool = True,
-        attn_implementation: str = "flash_attention_2",
+        attn_implementation: Optional[str] = None,
         use_liger_kernel: bool = True,
         use_sdpa_patching: bool = True,
         sdpa_method: Optional[List[SDPBackend]] = None,
@@ -762,7 +762,7 @@ def from_pretrained(
             l2_normalize: Whether to L2 normalize embeddings.
             attn_implementation: Attention implementation to use (e.g.,
                 ``"flash_attention_2"``, ``"sdpa"``, ``"eager"``).
-                Defaults to ``"flash_attention_2"``.
+                Defaults to ``None`` (uses the model/transformers default, typically sdpa).
             use_liger_kernel: Whether to apply Liger kernel optimizations.
             use_sdpa_patching: Whether to apply SDPA patching.
             sdpa_method: SDPA backend methods to use.