Add attn_implementation option to mining recipe

oliverholworthy · oliverholworthy · commit ab1fa49cc550 · 2026-02-25T09:10:00.000Z
Signed-off-by: Oliver Holworthy &lt;1216955+oliverholworthy@users.noreply.github.com&gt;
diff --git a/nemo_automodel/recipes/biencoder/mine_hard_negatives.py b/nemo_automodel/recipes/biencoder/mine_hard_negatives.py
@@ -69,6 +69,8 @@
     # Model loading parameters (loaded directly, not from config)
     "model_name_or_path": None,  # Required: path to model checkpoint
     "tokenizer_name_or_path": None,  # Optional: defaults to model_name_or_path
+    # Attention implementation for model loading
+    "attn_implementation": None,  # None = use model default; "sdpa", "flash_attention_2", "eager"
 }
 
 
@@ -175,6 +177,7 @@ def __init__(self, cfg):
         self.tokenizer_name_or_path = None
         self.add_bos_token = None
         self.add_eos_token = None
+        self.attn_implementation = None
 
         # Model and tokenizer (populated in setup)
         self.model = None
@@ -234,11 +237,15 @@ def setup(self):
         # Load model directly from checkpoint path
         # This loads the saved model without requiring architecture config
         logger.info(f"Loading biencoder model from {self.model_name_or_path}...")
+        model_kwargs = {
+            "use_liger_kernel": False,  # Not needed for inference
+            "use_sdpa_patching": True,
+        }
+        if self.attn_implementation is not None:
+            model_kwargs["attn_implementation"] = self.attn_implementation
         self.model = NeMoAutoModelBiencoder.from_pretrained(
             self.model_name_or_path,
-            # Use inference-appropriate settings
-            use_liger_kernel=False,  # Not needed for inference
-            use_sdpa_patching=True,
+            **model_kwargs,
         )
         self.model = self.model.to(self.dist_env.device)
         self.model.eval()
@@ -297,6 +304,9 @@ def _extract_mining_params(self):
         self.add_bos_token = self._get_mining_param("add_bos_token")
         self.add_eos_token = self._get_mining_param("add_eos_token")
 
+        # Attention implementation for model loading
+        self.attn_implementation = self._get_mining_param("attn_implementation")
+
         # Prefix and length parameters for embedding generation
         self.query_prefix = self._get_mining_param("query_prefix")
         self.passage_prefix = self._get_mining_param("passage_prefix")