Add kwargs for plm inference functions

niklases · niklases · commit a7d20401627c · 2026-02-02T22:34:07.000+01:00
diff --git a/pypef/plm/inference.py b/pypef/plm/inference.py
@@ -39,43 +39,31 @@ def unmasked_wt_score(
         train: bool = False,
         cut_special_tokens: bool = True,  # assumption: cut first and last token
         device=None,
-        **kwargs
+        verbose: bool = False,
+        **model_kwargs
     ):
     if device is None:
         device = get_device()
     if wt_input_ids.dim() == 1:
         wt_input_ids = wt_input_ids.unsqueeze(0)
-    structure_input_ids = kwargs.get("structure_input_ids", None)
+    #structure_input_ids = model_kwargs.get("structure_input_ids", None)
 
     attention_masks = torch.Tensor(np.full(
         shape=np.shape(wt_input_ids), fill_value=attention_mask)).to(torch.int64)
     if train:
-        if structure_input_ids is not None:
+        outputs = model(
+            input_ids=wt_input_ids.to(device),
+            attention_mask=attention_masks.to(device),
+            **model_kwargs
+        )
+
+    else:
+        with torch.no_grad():
             outputs = model(
                 input_ids=wt_input_ids.to(device),
                 attention_mask=attention_masks.to(device),
-                ss_input_ids=structure_input_ids.to(device)
-            )
-        else:
-            outputs = model(
-                wt_input_ids.to(device), 
-                attention_masks.to(device), 
-                output_hidden_states=False
+                **model_kwargs
             )
-    else:
-        with torch.no_grad():
-            if structure_input_ids is not None:
-                outputs = model(
-                    input_ids=wt_input_ids.to(device),
-                    attention_mask=attention_masks.to(device),
-                    ss_input_ids=structure_input_ids.to(device)
-                )
-            else:
-                outputs = model(
-                    wt_input_ids.to(device), 
-                    attention_masks.to(device), 
-                    output_hidden_states=False,
-                )
 
     logits = outputs.logits
     logits = logits.squeeze(0)   # remove batch dim
@@ -105,7 +93,7 @@ def unmasked_wt_score(
     return log_probs
 
 
-def esm_mutation_only_mutation_masked_pll(
+def mutation_only_mutation_masked_pll(
     tokenized_sequences: torch.Tensor,        # (L,)
     wt_input_ids: torch.Tensor,     # (L,)
     attention_mask: torch.Tensor,   # (L,)
@@ -198,7 +186,7 @@ def esm_mutation_only_mutation_masked_pll(
     return plls
 
 
-def esm_mutation_all_pos_masked_pll(
+def mutation_all_pos_masked_pll(
     tokenized_sequences: torch.Tensor,        # (L,)
     attention_mask: torch.Tensor,   # (L,)
     model,
@@ -285,7 +273,7 @@ def plm_inference(
     wt_input_ids,
     attention_mask,
     model,
-    mask_token_id,
+    mask_token_id = None,
     inference_type='unmasked',
     wt_structure_input_ids=None,
     batch_size=5,
@@ -304,9 +292,9 @@ def plm_inference(
     if not isinstance(attention_mask, torch.Tensor):
         attention_mask = torch.tensor(attention_mask, dtype=torch.long)
     if inference_type == 'mutation-masking':
-        inference_function = esm_mutation_only_mutation_masked_pll
+        inference_function = mutation_only_mutation_masked_pll
     elif inference_type in ['full-masking', 'all-pos-masking']:
-        inference_function = esm_mutation_all_pos_masked_pll
+        inference_function = mutation_all_pos_masked_pll
     elif inference_type in ['unmasked', 'wt-marginals']:
         inference_function = unmasked_wt_score
     else:
@@ -317,6 +305,13 @@ def plm_inference(
     xs_b = get_batches(xs, dtype=int, batch_size=batch_size, keep_remaining=True, verbose=True)
     desc = f"Inference: {inference_type} batch (size={batch_size}) processing ({device.upper()})'"
 
+    kwargs = {}
+    if mask_token_id is not None:
+        kwargs["mask_token_id"] = mask_token_id
+
+    if wt_structure_input_ids is not None:
+        kwargs["structure_input_ids"] = wt_structure_input_ids
+
     pbar = tqdm(
         range(len(xs_b)),
         desc=desc,
@@ -327,13 +322,12 @@ def plm_inference(
         pll = inference_function(
             tokenized_sequences=torch.tensor(xs_b[i]),
             wt_input_ids=wt_input_ids,
-            structure_input_ids=wt_structure_input_ids,
             attention_mask=attention_mask,
             model=model,
-            mask_token_id=mask_token_id,
             train=train,
             device=device,
-            verbose=False
+            verbose=False,
+            **kwargs
         )
         scores.append(pll)
     return torch.cat(scores)
diff --git a/tests/test_api_functions.py b/tests/test_api_functions.py
@@ -72,6 +72,7 @@
 
 
 def test_gremlin_avgfp():
+    print("test_gremlin_avgfp()...")
     g = GREMLIN(
         alignment=msa_file_avgfp,
         char_alphabet="ARNDCQEGHILKMFPSTWYV-",
@@ -87,6 +88,7 @@ def test_gremlin_avgfp():
 
 
 def test_hybrid_model_dca_llm():
+    print("test_hybrid_model_dca_llm()...")
     g = GREMLIN(
         alignment=msa_file_aneh,
         char_alphabet="ARNDCQEGHILKMFPSTWYV-",
@@ -225,6 +227,7 @@ def test_hybrid_model_dca_llm():
 
 
 def test_dataset_b_results():
+    print("test_dataset_b_results()...")
     aaindex = "WOLR810101.txt"
     x_fft_train, _ = AAIndexEncoding(
         full_aaidx_txt_path(aaindex), train_seqs_aneh
@@ -250,6 +253,7 @@ def test_dataset_b_results():
 
 @pytest.mark.requires_gpu
 def test_plm_corr_blat_ecolx():
+    print("test_plm_corr_blat_ecolx()...")
     device = get_device()
     print("Device", device)
     blat_ecolx_wt_seq = get_wt_sequence(wt_seq_file_blat_ecolx)
@@ -283,7 +287,7 @@ def test_plm_corr_blat_ecolx():
             train=False,
             verbose=True
         )
-        print(f'{x}: ESM1v (unsupervised performance): '  
+        print(f'{x}: ESM1v (unsupervised performance mutation-masking): '  
               f'{spearmanr(y_true, y_esm.cpu())[0]}')
         np.testing.assert_almost_equal(spearmanr(y_true, y_esm.cpu())[0], 0.6367826285982324, decimal=6)
         
@@ -292,13 +296,13 @@ def test_plm_corr_blat_ecolx():
             wt_input_ids=wt_tokens,
             attention_mask=esm_attention_mask,
             model=esm_base_model,
-            mask_token_id=esm_tokenizer.mask_token_id,
+            mask_token_id=None, # do not define for unmasked
             inference_type='unmasked',
             batch_size=5,
             train=False,
             verbose=True
         )
-        print(f'{x}: ESM1v (unsupervised performance): '  
+        print(f'{x}: ESM1v (unsupervised performance unmasked): '  
               f'{spearmanr(y_true, y_esm.cpu())[0]}')
         np.testing.assert_almost_equal(spearmanr(y_true, y_esm.cpu())[0], 0.6498987261125897, decimal=6)
         
@@ -341,7 +345,7 @@ def test_plm_corr_blat_ecolx():
             train=False,
             verbose=True   
     )
-    print(f'ProSST (unsupervised performance): '  # ProSST not made/trained for this: 0.607137337377509
+    print(f'ProSST (unsupervised performance): '  # ProSST not made/trained for MLM: 0.607137337377509
           f'{spearmanr(y_true, y_prosst.cpu())[0]}')
     np.testing.assert_almost_equal(spearmanr(y_true, y_prosst.cpu())[0], 0.607137337377509, decimal=6)
 
@@ -351,7 +355,7 @@ def test_plm_corr_blat_ecolx():
             wt_input_ids=wt_input_ids,
             attention_mask=prosst_attention_mask,
             model=prosst_base_model,
-            mask_token_id=prosst_tokenizer.mask_token_id,
+            mask_token_id=None,  # do not define for unmasked
             inference_type='unmasked',
             wt_structure_input_ids=wt_structure_input_ids,
             batch_size=5,
@@ -379,8 +383,8 @@ def test_plm_corr_blat_ecolx():
 
 
 if __name__ == "__main__":
-    test_gremlin_avgfp()
-    test_hybrid_model_dca_llm()
-    test_dataset_b_results()
+    #test_gremlin_avgfp()
+    #test_hybrid_model_dca_llm()
+    #test_dataset_b_results()
     test_plm_corr_blat_ecolx()