From c257a3e9f25fb9499fb45d9eb2fb2fb2b6040b12 Mon Sep 17 00:00:00 2001 From: Richard Michael Date: Wed, 29 Nov 2023 15:23:25 +0100 Subject: [PATCH] added GFP CBas documentation --- docs/protein-optimization/index.md | 12 +++ .../tmp/Raw_101m_Repair.fxout | 2 + .../objective_repository/all_objectives.md | 6 ++ .../objective_repository/gfp_cbas.md | 84 +++++++++++++++++++ 4 files changed, 104 insertions(+) create mode 100644 docs/protein-optimization/using_poli/objective_repository/gfp_cbas.md diff --git a/docs/protein-optimization/index.md b/docs/protein-optimization/index.md index 8681e353..1d385a35 100644 --- a/docs/protein-optimization/index.md +++ b/docs/protein-optimization/index.md @@ -79,6 +79,18 @@ Solvent accessibility of mutations of a wildtype using `foldx` Rapid Stability Predictions of single mutations from a wildtype. [WIP] ::: +:::{grid-item-card} RFP Fluorescence Protein Stability (using `lambo`) +:link: ./using_poli/objective_repository/foldx_rfp_lambo.html +:columns: 6 +LaMBO Fluorescence (RFP) by stability and solvent-accessible surface area. +::: + +:::{grid-item-card} Protein Fluorescence (using `CBas`) [WIP] +:link: ./using_poli/objective_repository/gfp_cbas.html +:columns: 6 +Surrogate fluorescence prediction of mutations from a GFP sequence. [WIP] +::: + :::: diff --git a/docs/protein-optimization/understanding_foldx/01-single-mutation-using-foldx/tmp/Raw_101m_Repair.fxout b/docs/protein-optimization/understanding_foldx/01-single-mutation-using-foldx/tmp/Raw_101m_Repair.fxout index 00ebdfe3..0e4288dd 100644 --- a/docs/protein-optimization/understanding_foldx/01-single-mutation-using-foldx/tmp/Raw_101m_Repair.fxout +++ b/docs/protein-optimization/understanding_foldx/01-single-mutation-using-foldx/tmp/Raw_101m_Repair.fxout @@ -11,3 +11,5 @@ Pdb total energy Backbone Hbond Sidechain Hbond Van der Waals Electrostatics Sol WT_101m_Repair_1.pdb -34.3436 -141.831 -47.9784 -179.662 -8.13848 243.99 -239.232 3.40664 105.266 231.722 0 0 0 5.28162 157.882 -8.73035 0 0 0 0 1.56224 0 101m_Repair_1.pdb -31.7457 -141.841 -48.2413 -177.827 -8.5183 243.998 -235.896 3.3294 104.051 231.196 0 0 0 5.25497 157.841 -8.81857 0 0 0 0 1.5666 0 WT_101m_Repair_1.pdb -34.3436 -141.831 -47.9784 -179.662 -8.13848 243.99 -239.232 3.40664 105.266 231.722 0 0 0 5.28162 157.882 -8.73035 0 0 0 0 1.56224 0 +101m_Repair_1.pdb -31.7457 -141.841 -48.2413 -177.827 -8.5183 243.998 -235.896 3.3294 104.051 231.196 0 0 0 5.25497 157.841 -8.81857 0 0 0 0 1.5666 0 +WT_101m_Repair_1.pdb -34.3436 -141.831 -47.9784 -179.662 -8.13848 243.99 -239.232 3.40664 105.266 231.722 0 0 0 5.28162 157.882 -8.73035 0 0 0 0 1.56224 0 diff --git a/docs/protein-optimization/using_poli/objective_repository/all_objectives.md b/docs/protein-optimization/using_poli/objective_repository/all_objectives.md index 00e74794..cf880be7 100644 --- a/docs/protein-optimization/using_poli/objective_repository/all_objectives.md +++ b/docs/protein-optimization/using_poli/objective_repository/all_objectives.md @@ -84,6 +84,12 @@ Rapid Stability Predictions of single mutations from a wildtype. [WIP] LaMBO Fluorescence (RFP) by stability and solvent-accessible surface area. ::: +:::{grid-item-card} GFP Fluorescence Protein Surrogate (using `CBas`) +:link: ./using_poli/objective_repository/gfp_cbas.html +:columns: 6 +CBas Fluorescence (GFP) median brightness by latent encoding surrogate under the assumption that the encoder mean correlates sufficiently with target brightness. +::: + :::: ### Other diff --git a/docs/protein-optimization/using_poli/objective_repository/gfp_cbas.md b/docs/protein-optimization/using_poli/objective_repository/gfp_cbas.md new file mode 100644 index 00000000..95266ffc --- /dev/null +++ b/docs/protein-optimization/using_poli/objective_repository/gfp_cbas.md @@ -0,0 +1,84 @@ +# Protein Fluorescence (using `CBas`) +![Type of objective function: discrete](https://img.shields.io/badge/Type-discrete_inputs-blue) +![Environment to run this objective function: poli protein_cbas](https://img.shields.io/badge/Environment-poli____protein-teal +) + +[TODO: revise] + +## About + +This objective function returns the mean encoding (i.e. median brightness surrogate) using `CBas`. + +## Prerequisites + +- Assets: GFP dataframe, and model checkpoints as provided in the `poli` module + +## How to run + +You can either run this objective function in your current environment (assuming that you have the correct dependencies installed), or you can run it in an isolated environment. + +::::{tab-set} + +:::{tab-item} In current environment + +You will have to install the following dependencies: + +```bash +pip install biopython python-levenshtein numpy pandas scipy torch torchvision torchaudio keras-core tensorflow +``` + +Then run + +```python +from pathlib import Path + +import numpy as np + +from poli import objective_factory + +# How to create +f, x0, y0 = objective_factory.create( + name="gfp_cbas", +) + +# Example input: +print(x0) + +# Querying: +print(y0) # The encoding of the first batch of GFP sequences +``` + +::: + +:::{tab-item} In isolation + +If you want us to handle dependencies, run + +```python +from pathlib import Path + +import numpy as np + +from poli import objective_factory + +# How to create +f, x0, y0 = objective_factory.create( + name="gfp_cbas", + wildtype_pdb_file=wildtype_pdb_file, + force_register=True, +) + +# Example input: +print(x0) + +# Querying: +print(y0) # The stability of your wildtype +``` + +```{warning} +Registering the objective function in this way will create a `conda` environment called `poli__protein_cbas` with the relevant dependencies. +``` + +::: + +:::: \ No newline at end of file