MachineLearningLifeScience
diff --git a/‎.gitignore
Lines changed: 4 additions & 0 deletions b/‎.gitignore
Lines changed: 4 additions & 0 deletions
diff --git a/‎examples/06_running_lambo2_on_rasp/run.py
Lines changed: 61 additions & 2 deletions b/‎examples/06_running_lambo2_on_rasp/run.py
Lines changed: 61 additions & 2 deletions
diff --git a/‎examples/06_running_lambo2_on_rasp/simple_observer.py
Lines changed: 8 additions & 0 deletions b/‎examples/06_running_lambo2_on_rasp/simple_observer.py
Lines changed: 8 additions & 0 deletions
diff --git a/‎examples/07_running_lambo2_on_foldx/dnja_foldx_initsamples.npz
8.13 KB b/‎examples/07_running_lambo2_on_foldx/dnja_foldx_initsamples.npz
8.13 KB
diff --git a/‎examples/07_running_lambo2_on_foldx/readme.md
Lines changed: 11 additions & 0 deletions b/‎examples/07_running_lambo2_on_foldx/readme.md
Lines changed: 11 additions & 0 deletions
@@ -23,6 +23,10 @@ examples/09_replicating_nsga_ii_of_lambo_by_hand/repaired_pdbs
 examples/09_replicating_nsga_ii_of_lambo_by_hand/pHs.json
 examples/09_replicating_nsga_ii_of_lambo_by_hand/history/
 examples/09_replicating_nsga_ii_of_lambo_by_hand/history.json
+examples/06_running_lambo2_on_rasp/*.npz
+examples/07_running_lambo2_on_foldx/tmp/
+examples/07_running_lambo2_on_foldx/*.csv
+examples/07_running_lambo2_on_foldx/lambo2_trace.npz
 examples/ignore*
 
 # BAxUS related stuff
 
@@ -6,7 +6,7 @@
 import torch
 from poli.objective_repository import RaspProblemFactory
 
-from poli_baselines.solvers.bayesian_optimization.lambo2 import LaMBO2
+from poli_baselines.solvers.simple.random_mutation import RandomMutation
 
 THIS_DIR = Path(__file__).resolve().parent
 sys.path.append(str(THIS_DIR))
@@ -15,6 +15,8 @@
 
 
 def run_with_default_hyperparameters():
+    from poli_baselines.solvers.bayesian_optimization.lambo2 import LaMBO2
+
     RFP_PDBS_DIR = THIS_DIR / "rfp_pdbs"
     ALL_PDBS = list(RFP_PDBS_DIR.rglob("**/*.pdb"))
     problem = RaspProblemFactory().create(
@@ -69,6 +71,8 @@ def run_with_modified_hyperparameters():
     You can find the original configuration we use here:
     src/poli_baselines/solvers/bayesian_optimization/lambo2/hydra_configs
     """
+    from poli_baselines.solvers.bayesian_optimization.lambo2 import LaMBO2
+
     POPULATION_SIZE = 96
     MAX_EPOCHS_FOR_PRETRAINING = 4
 
@@ -112,6 +116,61 @@ def run_with_modified_hyperparameters():
     black_box.terminate()
 
 
+def comparing_against_directed_evolution():
+    arr = np.load(THIS_DIR / "rasp_seed_data.npz")
+    x0 = arr["x0"]
+    y0 = arr["y0"]
+    batch_size = 128
+    n_iterations = 32
+
+    x0_for_solver_no_padding = x0[np.argsort(y0.flatten())[::-1]][:batch_size]
+
+    # Adding padding
+    max_length = max(map(len, x0_for_solver_no_padding))
+    x0_for_solver_ = [[char for char in x_i] for x_i in x0_for_solver_no_padding]
+    x0_for_solver = np.array(
+        [list_ + ([""] * (max_length - len(list_))) for list_ in x0_for_solver_]
+    )
+
+    y0_for_solver = y0[np.argsort(y0.flatten())[::-1]][:batch_size]
+
+    RFP_PDBS_DIR = THIS_DIR / "rfp_pdbs"
+    ALL_PDBS = list(RFP_PDBS_DIR.rglob("**/*.pdb"))
+    problem = RaspProblemFactory().create(
+        wildtype_pdb_path=ALL_PDBS,
+        additive=True,
+        chains_to_keep=[p.parent.name.split("_")[1] for p in ALL_PDBS],
+    )
+    black_box = problem.black_box
+
+    observer = SimpleObserver()
+    black_box.set_observer(observer)
+
+    observer.x_s.append(x0.reshape(-1, 1))
+    observer.y_s.append(y0)
+
+    directed_evolution = RandomMutation(
+        black_box=black_box,
+        x0=x0_for_solver,
+        y0=y0_for_solver,
+        batch_size=batch_size,
+    )
+    max_eval = n_iterations * batch_size
+    directed_evolution.solve(max_iter=max_eval // batch_size, verbose=True)
+    observer.save_history(
+        THIS_DIR / f"directed_evolution_rasp_trace_b_{batch_size}_i_{n_iterations}.npz"
+    )
+
+    fig, (ax1, ax2) = plt.subplots(1, 2)
+    plot_best_y(observer, ax1)
+    plot_best_y(observer, ax2, start_from=x0.shape[0])
+    ax1.axvline(x0.shape[0], color="red")
+    plt.show()
+
+    black_box.terminate()
+
+
 if __name__ == "__main__":
-    run_with_default_hyperparameters()
+    # run_with_default_hyperparameters()
     # run_with_modified_hyperparameters()
+    comparing_against_directed_evolution()
@@ -18,6 +18,14 @@ def observe(self, x: np.ndarray, y: np.ndarray, context=None) -> None:
         self.x_s.append(x)
         self.y_s.append(y)
 
+    def save_history(self, path: str) -> None:
+        arr_x = []
+        for x in self.x_s:
+            arr_x.append(np.array(["".join(x_i) for x_i in x]))
+        x_s = np.concatenate(arr_x)
+        y_s = np.vstack(self.y_s)
+        np.savez(path, x_s=x_s, y_s=y_s)
+
 
 def plot_best_y(obs: SimpleObserver, ax: plt.Axes, start_from: int = 0):
     best_y = np.maximum.accumulate(np.vstack(obs.y_s).flatten())
 
@@ -0,0 +1,11 @@
+This folder includes an example in which we optimize the thermal stability two PDBs (DNJA1 and RFAH), measured using `foldx`, using `LaMBO2`.
+
+As a pre-requisite, [we encourage you to set-up `poli` for `foldx`](https://machinelearninglifescience.github.io/poli-docs/using_poli/objective_repository/foldx_stability.html).
+
+We recommend running it inside the environment of `LaMBO2`, which you can find inside the `solvers` folder.
+
+```bash
+# From the root of the poli-baselines directory
+pip install -e .[lambo2]
+python examples/07_running_lambo2_on_foldx/run.py
+```