@@ -128,13 +128,13 @@ def compute_performances(mut_data, mut_sep=':', start_i: int = 0, already_tested
128128 x_dca = gremlin .collect_encoded_sequences (sequences )
129129 x_wt = gremlin .x_wt
130130 y_pred_dca = get_delta_e_statistical_model (x_dca , x_wt )
131- print ('DCA:' , spearmanr (fitnesses , y_pred_dca ), len ( fitnesses ) )
131+ print (f 'DCA (unsupervised performance): { spearmanr (fitnesses , y_pred_dca )[ 0 ]:.3f } ' )
132132 dca_unopt_perf = spearmanr (fitnesses , y_pred_dca )[0 ]
133133
134134 try :
135135 x_esm , esm_attention_mask = esm_tokenize_sequences (sequences , esm_tokenizer , max_length = 1000 )#len(wt_seq))
136136 y_esm = esm_infer (get_batches (x_esm , dtype = float , batch_size = 1 ), esm_attention_mask , esm_base_model )
137- print ('ESM1v:' , spearmanr (fitnesses , y_esm .cpu ()))
137+ print (f 'ESM1v (unsupervised performance): { spearmanr (fitnesses , y_esm .cpu ())[ 0 ]:.3f } ' )
138138 esm_unopt_perf = spearmanr (fitnesses , y_esm .cpu ())[0 ]
139139 except RuntimeError :
140140 esm_unopt_perf = np .nan
@@ -144,7 +144,7 @@ def compute_performances(mut_data, mut_sep=':', start_i: int = 0, already_tested
144144 x_prosst = prosst_tokenize_sequences (sequences = sequences , vocab = prosst_vocab )
145145 y_prosst = get_logits_from_full_seqs (
146146 x_prosst , prosst_base_model , input_ids , prosst_attention_mask , structure_input_ids , train = False )
147- print ('ProSST:' , spearmanr (fitnesses , y_prosst .cpu ()))
147+ print (f 'ProSST (unsupervised performance): { spearmanr (fitnesses , y_prosst .cpu ())[ 0 ]:.3f } ' )
148148 prosst_unopt_perf = spearmanr (fitnesses , y_prosst .cpu ())[0 ]
149149 except RuntimeError :
150150 prosst_unopt_perf = np .nan
@@ -239,7 +239,7 @@ def compute_performances(mut_data, mut_sep=':', start_i: int = 0, already_tested
239239 np .asarray (x_llm_test_prosst )
240240 ][i_m ]
241241 )
242- print (f'Hybrid perf. : { spearmanr (y_test , y_test_pred )[0 ]} ' )
242+ print (f'Hybrid performance : { spearmanr (y_test , y_test_pred )[0 ]:.3f } ' )
243243 hybrid_perfs .append (spearmanr (y_test , y_test_pred )[0 ])
244244 except RuntimeError : # modeling_prosst.py, line 920, in forward
245245 # or UnboundLocalError in prosst_lora_tune.py, line 167
@@ -263,8 +263,6 @@ def compute_performances(mut_data, mut_sep=':', start_i: int = 0, already_tested
263263 dset_ns_y_test_i = ''
264264 for ns_y_t in ns_y_test :
265265 dset_ns_y_test_i += f'{ ns_y_t } ,'
266- print (ns_y_test )
267- print ('\n README:\n ' , dset_hybrid_perfs_i , '\n ' , dset_ns_y_test_i , '\n ' )
268266 with open (out_results_csv , 'a' ) as fh :
269267 fh .write (
270268 f'{ numbers_of_datasets [i ]} ,{ dset_key } ,{ len (variants_orig )} ,{ max_muts } ,{ dca_unopt_perf } ,'
@@ -274,7 +272,6 @@ def compute_performances(mut_data, mut_sep=':', start_i: int = 0, already_tested
274272def plot_csv_data (csv , plot_name ):
275273 train_test_size_texts = []
276274 df = pd .read_csv (csv , sep = ',' )
277- # No.,Dataset,N_Variants,N_Max_Muts,Untrained_Performance_DCA,Untrained_Performance_LLM,Hybrid_Trained_Performance_100,Hybrid_Trained_Performance_200,Hybrid_Trained_Performance_1000
278275 tested_dsets = df ['No.' ]
279276 dset_dca_perfs = df ['Untrained_Performance_DCA' ]
280277 dset_esm_perfs = df ['Untrained_Performance_ESM1v' ]
@@ -290,7 +287,6 @@ def plot_csv_data(csv, plot_name):
290287 dset_hybrid_perfs_dca_prosst_1000 = df ['Hybrid_DCA_ProSST_Trained_Performance_1000' ]
291288
292289 plt .figure (figsize = (80 , 12 ))
293- #import gc;gc.collect() # Potentially GC is needed to free some RAM (deallocated VRAM -> partly stored in RAM?) after each run
294290 plt .plot (range (len (tested_dsets )), dset_dca_perfs , 'o--' , markersize = 8 , color = 'tab:blue' , label = 'DCA (0)' )
295291 plt .plot (range (len (tested_dsets ) + 1 ), np .full (len (tested_dsets ) + 1 , np .nanmean (dset_dca_perfs )), color = 'tab:blue' , linestyle = '--' )
296292 for i , (p , n_test ) in enumerate (zip (dset_dca_perfs , df ['N_Y_test' ].astype ('Int64' ).to_list ())):
0 commit comments