Skip to content

Commit 2f13d38

Browse files
authored
Merge pull request #2 from Protein-Engineering-Framework/dev-0.4.0
Dev 0.4.0
2 parents d015a6a + cfa800a commit 2f13d38

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

67 files changed

+81579
-1961
lines changed

.github/imgs/mut_performance.png

14.9 MB
Loading
5.26 MB
Loading
2.01 MB
Loading
767 KB
Loading
207 KB
Loading

.github/workflows/build.yml

Lines changed: 32 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,12 @@ permissions:
99
contents: read
1010

1111
jobs:
12-
build:
13-
12+
ubuntu:
13+
name: ubuntu
1414
runs-on: [ubuntu-latest]
1515
strategy:
1616
matrix:
17-
python-version: ["3.9", "3.10", "3.11", "3.12"]
18-
17+
python-version: ["3.10", "3.11", "3.12"]
1918
steps:
2019
- uses: actions/checkout@v4
2120
- name: Set up Python ${{ matrix.python-version }}
@@ -37,3 +36,32 @@ jobs:
3736
- name: Export Pythonpath and run PyPEF API and CLI version test with pytest
3837
run: |
3938
export PYTHONPATH="${PYTHONPATH}:${PWD}" && python -m pytest tests/
39+
40+
windows:
41+
name: windows
42+
runs-on: [windows-latest]
43+
strategy:
44+
matrix:
45+
python-version: ["3.10", "3.11", "3.12"]
46+
steps:
47+
- uses: actions/checkout@v4
48+
- name: Set up Python ${{ matrix.python-version }}
49+
uses: actions/setup-python@v5
50+
with:
51+
python-version: ${{ matrix.python-version }}
52+
- name: Display Path and Python version
53+
run: |
54+
python -c "import sys, platform; print(sys.version, platform.system())"
55+
- name: Install dependencies
56+
run: |
57+
python -m pip install --upgrade pip
58+
pip install flake8 pytest
59+
pip install -r requirements.txt
60+
- name: Lint with flake8
61+
run: |
62+
# stop the build if there are Python syntax errors or undefined names
63+
flake8 .\pypef --count --select=E9,F63,F7,F82 --show-source --statistics
64+
- name: Export Pythonpath and run PyPEF API and CLI version test with pytest
65+
shell: pwsh
66+
run: |
67+
$env:PYTHONPATH = "${PWD};${env:PYTHONPATH}";python -m pytest .\tests\

.gitignore

Lines changed: 65 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -18,19 +18,40 @@ __pycache__/
1818
scripts/ProteinGym_runs/DMS_msa_files/
1919
scripts/ProteinGym_runs/DMS_ProteinGym_substitutions/
2020
scripts/ProteinGym_runs/ProteinGym_AF2_structures/
21-
scripts/ProteinGym_runs/multi_point_dms_mut_data.json
22-
scripts/ProteinGym_runs/single_point_dms_mut_data.json
23-
scripts/ProteinGym_runs/higher_point_dms_mut_data.json
2421
scripts/ProteinGym_runs/_Description_DMS_substitutions_data.csv
2522

26-
scripts/ProteinGym_runs/single_point_mut_performance.png
27-
scripts/ProteinGym_runs/multi_point_mut_performance.png
28-
2923
# Created test/output files
24+
model_saves/*
25+
avGFP_shortened_dca_encoded.csv
26+
avGFP_dca_encoded.csv
27+
3028
scripts/Setup/windows/Miniconda3-latest-Windows-x86_64.exe
3129
scripts/Setup/windows/Miniconda3/*
30+
3231
scripts/Encoding_low_N/apc.png
3332
scripts/Encoding_low_N/low_N_avGFP_extrapolation.png
33+
34+
scripts/Runtime_tests/runtimes.png
35+
36+
scripts/ProteinGym_runs/multi_point_dms_mut_data.json
37+
scripts/ProteinGym_runs/single_point_dms_mut_data.json
38+
scripts/ProteinGym_runs/higher_point_dms_mut_data.json
39+
scripts/ProteinGym_runs/single_point_mut_performance_violin.png
40+
scripts/ProteinGym_runs/multi_point_mut_performance_violin.png
41+
scripts/ProteinGym_runs/single_point_mut_performance.png
42+
scripts/ProteinGym_runs/multi_point_mut_performance.png
43+
44+
scripts/ESM_finetuning/DMS_msa_files/
45+
scripts/ESM_finetuning/DMS_ProteinGym_substitutions/
46+
scripts/ESM_finetuning/ProteinGym_AF2_structures/
47+
scripts/ESM_finetuning/higher_point_dms_mut_data.json
48+
scripts/ESM_finetuning/single_point_dms_mut_data.json
49+
scripts/ESM_finetuning/results/dca_esm_and_hybrid_opt_results_clean.csv
50+
scripts/ESM_finetuning/results/dca_esm_and_hybrid_opt_results.csv
51+
scripts/ESM_finetuning/mut_performance.png
52+
scripts/ESM_finetuning/_Description_DMS_substitutions_data.csv
53+
scripts/ESM_finetuning/mut_performance_violin.png
54+
3455
datasets/ANEH/37_ANEH_variants_aaidx_encoded.csv
3556
datasets/ANEH/37_ANEH_variants_dca_encoded.csv
3657
datasets/ANEH/37_ANEH_variants_gremlin_dca_encoded.csv
@@ -317,6 +338,29 @@ datasets/ANEH/Recomb_Triple_Split/Predictions_MLgremlin_TopRecomb_Triple_Split.t
317338
datasets/ANEH/Recomb_Triple_Split/Predictions_MLplmc_TopRecomb_Triple_Split.txt
318339
datasets/ANEH/Recomb_Triple_Split/Predictions_ONEHOT_TopRecomb_Triple_Split.txt
319340
datasets/ANEH/Recomb_Triple_Split/Recomb_Triple_Split0.fasta
341+
datasets/ANEH/ML_Model_Performance_CHAM830106_PLS_LOOCV.png
342+
datasets/ANEH/ML_Model_Performance_CHOP780206_PLS_LOOCV.png
343+
datasets/ANEH/ML_Model_Performance_ZHOH040102_PLS.png
344+
datasets/ANEH/CV_performance/CHAM830106_PLS_LOOCV_5-fold-CV.png
345+
datasets/ANEH/CV_performance/CHAM830106_PLS_LOOCV_CV_Results.txt
346+
datasets/ANEH/CV_performance/CHOP780206_PLS_LOOCV_5-fold-CV.png
347+
datasets/ANEH/CV_performance/CHOP780206_PLS_LOOCV_CV_Results.txt
348+
datasets/ANEH/CV_performance/ZHOH040102_PLS_5-fold-CV.png
349+
datasets/ANEH/CV_performance/ZHOH040102_PLS_CV_Results.txt
350+
datasets/ANEH/Pickles/CHOP780206
351+
datasets/ANEH/Pickles/ZHOH040102
352+
datasets/ANEH/DCA_Hybrid_Model_Performance_ESM1v_no_ML.png
353+
datasets/ANEH/DCA_Hybrid_Model_Performance_GREMLIN_.png
354+
datasets/ANEH/DCA_Hybrid_Model_Performance_GREMLIN_ESM.png
355+
datasets/ANEH/DCA_Hybrid_Model_Performance_GREMLIN_no_ML_ESM.png
356+
datasets/ANEH/DCA_Hybrid_Model_Performance_Hybrid_ESM1v_ESM.png
357+
datasets/ANEH/DCA_Hybrid_Model_Performance_PLMC_.png
358+
datasets/ANEH/HYBRIDGREMLINESM1V_DE_trajectories.png
359+
datasets/ANEH/Predictions_Hybrid_ESM1v_Top37_ANEH_variants_prediction_set.txt
360+
datasets/ANEH/Pickles/HYBRIDGREMLINESM1V
361+
datasets/ANEH/SSM_landscape.png
362+
datasets/ANEH/SSM_landscape.csv
363+
320364
datasets/AVGFP/apc.png
321365
datasets/AVGFP/avGFP_aaidx_encoded.csv
322366
datasets/AVGFP/avGFP_dca_encoded.csv
@@ -395,10 +439,19 @@ datasets/AVGFP/Recomb_Double_Split/Recomb_Double_Split4.fasta
395439
datasets/AVGFP/Recomb_Double_Split/Recomb_Double_Split5.fasta
396440
datasets/AVGFP/Recomb_Double_Split/Recomb_Double_Split6.fasta
397441
datasets/AVGFP/Recomb_Double_Split/Recomb_Double_Split7.fasta
398-
avGFP_shortened_dca_encoded.csv
399-
datasets/AVGFP/avGFP_shortened.csv
400-
avGFP_dca_encoded.csv
401-
scripts/Runtime_tests/runtimes.png
402442
datasets/AVGFP/Recomb_Double_Split/Predictions_Hybrid_TopRecomb_Double_Split.txt
403-
scripts/ProteinGym_runs/single_point_mut_performance_violin.png
404-
scripts/ProteinGym_runs/multi_point_mut_performance_violin.png
443+
datasets/AVGFP/model_saves/*
444+
datasets/AVGFP/Pickles/*
445+
datasets/AVGFP/DCA_Hybrid_Model_Performance_ESM1v_no_ML.png
446+
datasets/AVGFP/DCA_Hybrid_Model_Performance_ProSST_no_ML.png
447+
datasets/AVGFP/HYBRIDgremlinesm_DE_trajectories.png
448+
datasets/AVGFP/HYBRIDgremlinprosst_DE_trajectories.png
449+
datasets/AVGFP/DCA_Hybrid_Model_Performance_GREMLIN_.png
450+
datasets/AVGFP/SSM_landscape.csv
451+
datasets/AVGFP/SSM_landscape.png
452+
datasets/AVGFP/DCA_Hybrid_Model_Performance_GREMLIN_PROSST.png
453+
datasets/AVGFP/DCA_Hybrid_Model_Performance_Hybrid_ProSST_PROSST.png
454+
datasets/AVGFP/HYBRIDGREMLINESM1V_DE_trajectories.png
455+
datasets/AVGFP/Recomb_Double_Split/Predictions_Hybrid_ESM1v_TopRecomb_Double_Split.txt
456+
datasets/AVGFP/Recomb_Double_Split/Predictions_Hybrid_ProSST_TopRecomb_Double_Split.txt
457+
datasets/AVGFP/avGFP_shortened.csv

0 commit comments

Comments
 (0)