Skip to content

Commit dfa425d

Browse files
committed
Upload v. 0.4.1
1 parent f7f1f9d commit dfa425d

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

47 files changed

+6451
-954
lines changed

.github/imgs/mut_performance.png

14.9 MB
Loading
2.01 MB
Loading
207 KB
Loading

.gitignore

Lines changed: 60 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -18,20 +18,40 @@ __pycache__/
1818
scripts/ProteinGym_runs/DMS_msa_files/
1919
scripts/ProteinGym_runs/DMS_ProteinGym_substitutions/
2020
scripts/ProteinGym_runs/ProteinGym_AF2_structures/
21-
scripts/ProteinGym_runs/multi_point_dms_mut_data.json
22-
scripts/ProteinGym_runs/single_point_dms_mut_data.json
23-
scripts/ProteinGym_runs/higher_point_dms_mut_data.json
2421
scripts/ProteinGym_runs/_Description_DMS_substitutions_data.csv
2522

26-
scripts/ProteinGym_runs/single_point_mut_performance.png
27-
scripts/ProteinGym_runs/multi_point_mut_performance.png
28-
2923
# Created test/output files
3024
model_saves/*
25+
avGFP_shortened_dca_encoded.csv
26+
avGFP_dca_encoded.csv
27+
3128
scripts/Setup/windows/Miniconda3-latest-Windows-x86_64.exe
3229
scripts/Setup/windows/Miniconda3/*
30+
3331
scripts/Encoding_low_N/apc.png
3432
scripts/Encoding_low_N/low_N_avGFP_extrapolation.png
33+
34+
scripts/Runtime_tests/runtimes.png
35+
36+
scripts/ProteinGym_runs/multi_point_dms_mut_data.json
37+
scripts/ProteinGym_runs/single_point_dms_mut_data.json
38+
scripts/ProteinGym_runs/higher_point_dms_mut_data.json
39+
scripts/ProteinGym_runs/single_point_mut_performance_violin.png
40+
scripts/ProteinGym_runs/multi_point_mut_performance_violin.png
41+
scripts/ProteinGym_runs/single_point_mut_performance.png
42+
scripts/ProteinGym_runs/multi_point_mut_performance.png
43+
44+
scripts/ESM_finetuning/DMS_msa_files/
45+
scripts/ESM_finetuning/DMS_ProteinGym_substitutions/
46+
scripts/ESM_finetuning/ProteinGym_AF2_structures/
47+
scripts/ESM_finetuning/higher_point_dms_mut_data.json
48+
scripts/ESM_finetuning/single_point_dms_mut_data.json
49+
scripts/ESM_finetuning/results/dca_esm_and_hybrid_opt_results_clean.csv
50+
scripts/ESM_finetuning/results/dca_esm_and_hybrid_opt_results.csv
51+
scripts/ESM_finetuning/mut_performance.png
52+
scripts/ESM_finetuning/_Description_DMS_substitutions_data.csv
53+
scripts/ESM_finetuning/mut_performance_violin.png
54+
3555
datasets/ANEH/37_ANEH_variants_aaidx_encoded.csv
3656
datasets/ANEH/37_ANEH_variants_dca_encoded.csv
3757
datasets/ANEH/37_ANEH_variants_gremlin_dca_encoded.csv
@@ -318,6 +338,29 @@ datasets/ANEH/Recomb_Triple_Split/Predictions_MLgremlin_TopRecomb_Triple_Split.t
318338
datasets/ANEH/Recomb_Triple_Split/Predictions_MLplmc_TopRecomb_Triple_Split.txt
319339
datasets/ANEH/Recomb_Triple_Split/Predictions_ONEHOT_TopRecomb_Triple_Split.txt
320340
datasets/ANEH/Recomb_Triple_Split/Recomb_Triple_Split0.fasta
341+
datasets/ANEH/ML_Model_Performance_CHAM830106_PLS_LOOCV.png
342+
datasets/ANEH/ML_Model_Performance_CHOP780206_PLS_LOOCV.png
343+
datasets/ANEH/ML_Model_Performance_ZHOH040102_PLS.png
344+
datasets/ANEH/CV_performance/CHAM830106_PLS_LOOCV_5-fold-CV.png
345+
datasets/ANEH/CV_performance/CHAM830106_PLS_LOOCV_CV_Results.txt
346+
datasets/ANEH/CV_performance/CHOP780206_PLS_LOOCV_5-fold-CV.png
347+
datasets/ANEH/CV_performance/CHOP780206_PLS_LOOCV_CV_Results.txt
348+
datasets/ANEH/CV_performance/ZHOH040102_PLS_5-fold-CV.png
349+
datasets/ANEH/CV_performance/ZHOH040102_PLS_CV_Results.txt
350+
datasets/ANEH/Pickles/CHOP780206
351+
datasets/ANEH/Pickles/ZHOH040102
352+
datasets/ANEH/DCA_Hybrid_Model_Performance_ESM1v_no_ML.png
353+
datasets/ANEH/DCA_Hybrid_Model_Performance_GREMLIN_.png
354+
datasets/ANEH/DCA_Hybrid_Model_Performance_GREMLIN_ESM.png
355+
datasets/ANEH/DCA_Hybrid_Model_Performance_GREMLIN_no_ML_ESM.png
356+
datasets/ANEH/DCA_Hybrid_Model_Performance_Hybrid_ESM1v_ESM.png
357+
datasets/ANEH/DCA_Hybrid_Model_Performance_PLMC_.png
358+
datasets/ANEH/HYBRIDGREMLINESM1V_DE_trajectories.png
359+
datasets/ANEH/Predictions_Hybrid_ESM1v_Top37_ANEH_variants_prediction_set.txt
360+
datasets/ANEH/Pickles/HYBRIDGREMLINESM1V
361+
datasets/ANEH/SSM_landscape.png
362+
datasets/ANEH/SSM_landscape.csv
363+
321364
datasets/AVGFP/apc.png
322365
datasets/AVGFP/avGFP_aaidx_encoded.csv
323366
datasets/AVGFP/avGFP_dca_encoded.csv
@@ -396,34 +439,19 @@ datasets/AVGFP/Recomb_Double_Split/Recomb_Double_Split4.fasta
396439
datasets/AVGFP/Recomb_Double_Split/Recomb_Double_Split5.fasta
397440
datasets/AVGFP/Recomb_Double_Split/Recomb_Double_Split6.fasta
398441
datasets/AVGFP/Recomb_Double_Split/Recomb_Double_Split7.fasta
399-
avGFP_shortened_dca_encoded.csv
400-
datasets/AVGFP/avGFP_shortened.csv
401-
avGFP_dca_encoded.csv
402-
scripts/Runtime_tests/runtimes.png
403442
datasets/AVGFP/Recomb_Double_Split/Predictions_Hybrid_TopRecomb_Double_Split.txt
404-
scripts/ProteinGym_runs/single_point_mut_performance_violin.png
405-
scripts/ProteinGym_runs/multi_point_mut_performance_violin.png
406-
scripts/ESM_finetuning/DMS_msa_files/
407-
scripts/ESM_finetuning/DMS_ProteinGym_substitutions/
408-
scripts/ESM_finetuning/ProteinGym_AF2_structures/
409-
410-
scripts/ESM_finetuning/higher_point_dms_mut_data.json
411-
scripts/ESM_finetuning/single_point_dms_mut_data.json
412-
scripts/ESM_finetuning/results/dca_esm_and_hybrid_opt_results_clean.csv
413-
scripts/ESM_finetuning/results/dca_esm_and_hybrid_opt_results.csv
414-
scripts/ESM_finetuning/mut_performance.png
415-
scripts/ESM_finetuning/_Description_DMS_substitutions_data.csv
416-
scripts/ESM_finetuning/mut_performance_violin.png
417-
datasets/ANEH/SSM_landscape.png
418-
datasets/ANEH/SSM_landscape.csv
419443
datasets/AVGFP/model_saves/*
420444
datasets/AVGFP/Pickles/*
421445
datasets/AVGFP/DCA_Hybrid_Model_Performance_ESM1v_no_ML.png
422446
datasets/AVGFP/DCA_Hybrid_Model_Performance_ProSST_no_ML.png
423-
424-
# Large files // LFS in niklases/PyPEF
425-
datasets/ANEH/ANEH_72.6.params
426-
datasets/AVGFP/uref100_avgfp_jhmmer_119_plmc_42.6.params
427-
datasets/AVGFP/uref100_avgfp_jhmmer_119.sto
428-
datasets/GRB2/GRB2_HUMAN_full_11-26-2021_b05.a2m
429-
datasets/ANEH/ANEH_jhmmer.sto
447+
datasets/AVGFP/HYBRIDgremlinesm_DE_trajectories.png
448+
datasets/AVGFP/HYBRIDgremlinprosst_DE_trajectories.png
449+
datasets/AVGFP/DCA_Hybrid_Model_Performance_GREMLIN_.png
450+
datasets/AVGFP/SSM_landscape.csv
451+
datasets/AVGFP/SSM_landscape.png
452+
datasets/AVGFP/DCA_Hybrid_Model_Performance_GREMLIN_PROSST.png
453+
datasets/AVGFP/DCA_Hybrid_Model_Performance_Hybrid_ProSST_PROSST.png
454+
datasets/AVGFP/HYBRIDGREMLINESM1V_DE_trajectories.png
455+
datasets/AVGFP/Recomb_Double_Split/Predictions_Hybrid_ESM1v_TopRecomb_Double_Split.txt
456+
datasets/AVGFP/Recomb_Double_Split/Predictions_Hybrid_ProSST_TopRecomb_Double_Split.txt
457+
datasets/AVGFP/avGFP_shortened.csv

.vscode/launch.json

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,22 @@
6666
]
6767
},
6868

69+
{
70+
"name": "Python: PyPEF MKPS avGFP PS",
71+
"type": "debugpy",
72+
"request": "launch",
73+
"env": {"PYTHONPATH": "${workspaceFolder}"},
74+
"program": "${workspaceFolder}/pypef/main.py",
75+
"console": "integratedTerminal",
76+
"justMyCode": true,
77+
"cwd": "${workspaceFolder}/datasets/AVGFP/",
78+
"args": [
79+
"mkps",
80+
"--wt", "P42212_F64L.fasta",
81+
"--input", "avGFP.csv"
82+
]
83+
},
84+
6985
{
7086
"name": "Python: PyPEF ml -e onehot pls_loocv",
7187
"type": "debugpy",
@@ -104,6 +120,24 @@
104120
]
105121
},
106122

123+
{
124+
"name": "Python: PyPEF ML avGFP DirectEvo: GREMLIN",
125+
"type": "debugpy",
126+
"request": "launch",
127+
"env": {"PYTHONPATH": "${workspaceFolder}"},
128+
"program": "${workspaceFolder}/pypef/main.py",
129+
"console": "integratedTerminal",
130+
"justMyCode": true,
131+
"cwd": "${workspaceFolder}/datasets/AVGFP/",
132+
"args": [
133+
"hybrid",
134+
"directevo",
135+
"--wt", "P42212_F64L.fasta",
136+
"--params", "GREMLIN",
137+
"--temp", "0.3"
138+
]
139+
},
140+
107141
{
108142
"name": "Python: PyPEF hybrid LS-TS GREMLIN-DCA avGFP",
109143
"type": "debugpy",
@@ -282,6 +316,78 @@
282316
]
283317
},
284318

319+
{
320+
"name": "Python: PyPEF hybrid/only-PS-zero-shot GREMLIN-DCA avGFP PS: ProSST",
321+
"type": "debugpy",
322+
"request": "launch",
323+
"env": {"PYTHONPATH": "${workspaceFolder}"},
324+
"program": "${workspaceFolder}/pypef/main.py",
325+
"console": "integratedTerminal",
326+
"justMyCode": true,
327+
"cwd": "${workspaceFolder}/datasets/AVGFP/",
328+
"args": [
329+
"hybrid",
330+
"-m", "HYBRIDgremlinprosst",
331+
"--ps", "avGFP_prediction_set.fasta",
332+
"--params", "GREMLIN"
333+
]
334+
},
335+
336+
{
337+
"name": "Python: PyPEF hybrid/only-PS-zero-shot GREMLIN-DCA avGFP PS: ESM1v",
338+
"type": "debugpy",
339+
"request": "launch",
340+
"env": {"PYTHONPATH": "${workspaceFolder}"},
341+
"program": "${workspaceFolder}/pypef/main.py",
342+
"console": "integratedTerminal",
343+
"justMyCode": true,
344+
"cwd": "${workspaceFolder}/datasets/AVGFP/",
345+
"args": [
346+
"hybrid",
347+
"-m", "HYBRIDgremlinesm",
348+
"--ps", "avGFP_prediction_set.fasta",
349+
"--params", "GREMLIN"
350+
]
351+
},
352+
353+
{
354+
"name": "Python: PyPEF hybrid/only-PS-zero-shot GREMLIN-DCA avGFP DirectEvo: ESM1v",
355+
"type": "debugpy",
356+
"request": "launch",
357+
"env": {"PYTHONPATH": "${workspaceFolder}"},
358+
"program": "${workspaceFolder}/pypef/main.py",
359+
"console": "integratedTerminal",
360+
"justMyCode": true,
361+
"cwd": "${workspaceFolder}/datasets/AVGFP/",
362+
"args": [
363+
"hybrid",
364+
"directevo",
365+
"-m", "HYBRIDgremlinesm",
366+
"--wt", "P42212_F64L.fasta",
367+
"--params", "GREMLIN",
368+
"--temp", "0.1"
369+
]
370+
},
371+
372+
{
373+
"name": "Python: PyPEF hybrid/only-PS-zero-shot GREMLIN-DCA avGFP DirectEvo: ProSST",
374+
"type": "debugpy",
375+
"request": "launch",
376+
"env": {"PYTHONPATH": "${workspaceFolder}"},
377+
"program": "${workspaceFolder}/pypef/main.py",
378+
"console": "integratedTerminal",
379+
"justMyCode": true,
380+
"cwd": "${workspaceFolder}/datasets/AVGFP/",
381+
"args": [
382+
"hybrid",
383+
"directevo",
384+
"-m", "HYBRIDgremlinprosst",
385+
"--wt", "P42212_F64L.fasta",
386+
"--params", "GREMLIN",
387+
"--temp", "0.1"
388+
]
389+
},
390+
285391
{ // PLMC zero-shot steps:
286392
// 1. $pypef param_inference --params uref100_avgfp_jhmmer_119_plmc_42.6.params
287393
// 2. $pypef hybrid -t TS.fasl --params PLMC

0 commit comments

Comments
 (0)