Skip to content

Commit 687dfb8

Browse files
committed
reduce warnings
1 parent 852ad7b commit 687dfb8

File tree

6 files changed

+21
-12
lines changed

6 files changed

+21
-12
lines changed

pypef/dca/gremlin_inference.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,8 @@ def get_sequences_from_msa(self, msa_file: str):
161161
"""
162162
sequences = []
163163
seq_ids = []
164-
alignment = AlignIO.read(open(msa_file), "fasta")
164+
with open(msa_file, 'r') as fh:
165+
alignment = AlignIO.read(fh, "fasta")
165166
for record in alignment:
166167
sequences.append(str(record.seq))
167168
seq_ids.append(str(record.id))
@@ -326,7 +327,8 @@ def loss(self, v, w):
326327
return loss
327328

328329
def _loss(self, decimals=2):
329-
return torch.round(self.loss(self.v, self.w) * self.n_eff, decimals=decimals)
330+
return torch.round(
331+
self.loss(self.v.detach(), self.w.detach()) * self.n_eff, decimals=decimals)
330332

331333
def run_optimization(self):
332334
"""

pypef/llm/esm_lora_tune.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ def corr_loss(y_true: torch.Tensor, y_pred: torch.Tensor):
9090

9191

9292
def get_batches(a, dtype, batch_size=5, keep_numpy: bool = False, verbose: bool = False):
93-
a = np.array(a, dtype=dtype)
93+
a = np.asarray(a, dtype=dtype)
9494
orig_shape = np.shape(a)
9595
remaining = len(a) % batch_size
9696
if remaining != 0:
@@ -174,7 +174,7 @@ def esm_train(xs, attention_mask, scores, loss_fn, model, optimizer, n_epochs=3,
174174
optimizer.step()
175175
optimizer.zero_grad()
176176
pbar_batches.set_description(
177-
f"EPOCH: {epoch}. Loss: {loss.item():>1f} "
177+
f"EPOCH: {epoch}. Loss: {loss.detach():>1f} "
178178
f"[batch: {batch+1}/{len(xs)} | "
179179
f"sequence: {(batch + 1) * len(xs_b):>5d}/{len(xs) * len(xs_b)}] "
180180
)

pypef/llm/prosst_lora_tune.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@ def prosst_train(
134134
optimizer.step()
135135
optimizer.zero_grad()
136136
pbar_batches.set_description(
137-
f"Epoch: {epoch}. Loss: {loss.item():>1f} "
137+
f"Epoch: {epoch}. Loss: {loss.detach():>1f} "
138138
f"[batch: {batch+1}/{len(x_sequence_batches)} | "
139139
f"sequence: {(batch + 1) * len(seqs_b):>5d}/{len(x_sequence_batches) * len(seqs_b)}] "
140140
)
@@ -162,7 +162,10 @@ def prosst_train(
162162
pbar_epochs.set_description(
163163
f'Epoch {epoch}/{n_epochs} [SpearCorr: {epoch_spearman_2:.3f}, Loss: {loss_total:.3f}] '
164164
f'(Best epoch: {best_model_epoch}: {best_model_perf:.3f})')
165-
print(f"Loading best model as {best_model}...")
165+
try:
166+
print(f"Loading best model as {best_model}...")
167+
except UnboundLocalError:
168+
raise RuntimeError
166169
load_model(model, best_model)
167170
y_preds_train = get_logits_from_full_seqs(
168171
x_sequence_batches.flatten(start_dim=0, end_dim=1),

pypef/llm/prosst_structure/quantizer.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -352,10 +352,8 @@ def predict_sturcture(model, cluster_models, dataloader, device):
352352
for cluster_model_path in cluster_models:
353353
cluster_model_name = cluster_model_path.split("/")[-1].split(".")[0]
354354
struc_label_dict[cluster_model_name] = []
355-
#kmeans_model = joblib.load(cluster_model_path)
356-
#cluster_model_dict[cluster_model_name] = joblib.load(cluster_model_path)
357-
#np.save(open('2048_kmeans_cluster_centers.npy', 'wb'), kmeans_model.cluster_centers_, allow_pickle=False)
358-
cluster_centers = np.load(open(cluster_model_path, 'rb'), allow_pickle=False)
355+
with open(cluster_model_path, 'rb') as fh:
356+
cluster_centers = np.load(fh, allow_pickle=False)
359357
kmeans_ = {
360358
'algorithm': 'lloyd', 'copy_x': True, 'init': 'k-means++', 'n_init': 'auto',
361359
'max_iter': 1, 'n_clusters': 2048, 'random_state': 0, 'tol': 0.0001, 'verbose': 0

scripts/ProteinGym_runs/results/dca_esm_and_hybrid_opt_results.csv

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,3 +20,8 @@ No.,Dataset,N_Variants,N_Max_Muts,Untrained_Performance_DCA,Untrained_Performanc
2020
19,BLAT_ECOLX_Stiffler_2015,4996,1,0.6770558216510012,0.6416832584219534,0.7429704711839445,0.6761612638022167,0.685911619948229,0.7751083097115732,0.6749771506378947,0.7252465085238093,0.7968744412838603,0.6726389518173735,0.8325185048951383,0.8455668050428576,4996,4896,4796,3996,2971
2121
22,C6KNH7_9INFA_Lee_2018,10754,1,0.3791927566837183,0.3984720325442061,0.4330097365766406,0.37927109617574484,0.4205973198021492,0.47614637594052095,0.37986754538383266,0.46189131088601726,0.5281422521011082,0.3809479497535896,0.6309329101735078,0.6563769817443349,10754,10654,10554,9754,7614
2222
23,CALM1_HUMAN_Weile_2017,1813,1,0.1880116162650222,0.2459920642821527,0.19559402966288597,0.1828819344684797,0.24219842101559685,0.20088084724409896,0.18328280019998686,0.2574374437011387,0.2037934116873545,0.1735381577792808,0.2449077778119384,0.20200836495394978,1813,1713,1613,813,1291
23+
27,CASP3_HUMAN_Roychowdhury_2020,1567,1,0.6166355580179443,0.5773051726874454,0.5169199401717735,0.6115355787552461,0.594753620431413,0.6258493325764596,0.6088133777894346,0.6283183249961026,0.6263622872473888,0.5892193814676293,0.6404812341169117,0.6267734005130113,1567,1467,1367,567,1031
24+
28,CASP7_HUMAN_Roychowdhury_2020,1680,1,0.6400542956395568,0.559529351312228,0.5037626030141176,0.6377026999733968,0.6211281590622422,0.6530323665549631,0.633223327235218,0.6128019337094553,0.6560112576305517,0.6401479193237954,0.6752096010287533,0.671639924419743,1680,1580,1480,680,1109
25+
29,CBS_HUMAN_Sun_2020,7217,1,0.3457135100579966,0.3638867312864154,0.28991446876054217,0.34435451853357957,0.37215562263942265,0.34791914668201773,0.3430828956205332,0.3634484961494902,0.33132417686793286,0.3451931359108094,nan,0.36927167137582195,7217,7117,7017,6217,4040
26+
30,CCDB_ECOLI_Adkar_2012,1176,1,0.07288430258166731,0.2779434449164313,0.3973893311023567,0.07046851068006718,0.5547506333632457,0.6478121363417151,0.06712073793933808,0.685255527896479,0.7721360042546895,0.14005381798366617,0.8545615614571208,0.9063313255198726,1176,1076,976,176,517
27+
31,CCDB_ECOLI_Tripathi_2016,1663,1,0.18068892008695575,0.34763937235341463,0.4676776379729287,0.17297449665027417,nan,nan,0.17587631940074586,nan,nan,0.2421228329147073,nan,nan,1663,1563,1463,663,576

scripts/ProteinGym_runs/run_performance_tests_proteingym_hybrid_dca_llm.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ def compute_performances(mut_data, mut_sep=':', start_i: int = 0, already_tested
6060
prosst_base_model = prosst_base_model.to(device)
6161
esm_base_model, esm_lora_model, esm_tokenizer, esm_optimizer = get_esm_models()
6262
esm_base_model = esm_base_model.to(device)
63-
MAX_WT_SEQUENCE_LENGTH = 2000
63+
MAX_WT_SEQUENCE_LENGTH = 1000
6464
get_vram()
6565
plt.figure(figsize=(40, 12))
6666
numbers_of_datasets = [i + 1 for i in range(len(mut_data.keys()))]
@@ -241,7 +241,8 @@ def compute_performances(mut_data, mut_sep=':', start_i: int = 0, already_tested
241241
)
242242
print(f'Hybrid perf.: {spearmanr(y_test, y_test_pred)[0]}')
243243
hybrid_perfs.append(spearmanr(y_test, y_test_pred)[0])
244-
except RuntimeError: # modeling_prosst.py, line 920, in forward
244+
except RuntimeError: # modeling_prosst.py, line 920, in forward
245+
# or UnboundLocalError in prosst_lora_tune.py, line 167
245246
hybrid_perfs.append(np.nan)
246247
ns_y_test.append(len(y_test_pred))
247248
except ValueError as e:

0 commit comments

Comments
 (0)