Skip to content

Commit dcbe4ed

Browse files
Brings tuning changes on LaMBO2 (#55)
* add lambo ehrlich example, tune params * lambo tweaks * add geometric recency * Adds beignet as a dependency * Removes the hardcoded GPU acceleration * Runs the lambo test for less time * Makes it even smaller for testing * Updates the example's readme --------- Co-authored-by: Samuel Stanton <[email protected]>
1 parent ccdfee0 commit dcbe4ed

File tree

19 files changed

+263
-27
lines changed

19 files changed

+263
-27
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,10 @@ __pycache__/
1111

1212
.tox/
1313
build/
14+
oracle/
1415

1516
examples/05_optimizing_qed_using_latent_space_bo/VAESelfies_TINY-CID-SELFIES-20_latent_dim_2.pt
17+
examples/06_running_lambo2_on_rasp/directed_evolution_rasp*.npz
1618
examples/07_optimizing_logp_on_chembl/VAE_CHEMBL.pt
1719
examples/08_pymoo_nsga_ii_on_foldx/history.json
1820
examples/09_replicating_nsga_ii_of_lambo_by_hand/pdbs

examples/06_running_lambo2_on_rasp/readme.md

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
1-
This folder includes an example in which we optimize the thermal stability of red fluorescent proteins (RFPs) using `LaMBO2`.
1+
This folder includes an example in which we optimize the thermal stability of red fluorescent proteins (RFPs), measured using an additive version of RaSP, using `LaMBO2`.
2+
3+
As a pre-requisite, [we encourage you to set-up `poli` for RaSP](https://machinelearninglifescience.github.io/poli-docs/using_poli/objective_repository/RaSP.html).
24

35
It includes the following assets:
46
- Several PDB files for these RFPs, based on the Pareto front found by [LaMBO](https://arxiv.org/abs/2203.12742).
@@ -10,7 +12,6 @@ We recommend running it inside the environment of `LaMBO2`, which you can find i
1012

1113
```bash
1214
# From the root of the poli-baselines directory
13-
conda env create --file src/poli_baselines/solvers/bayesian_optimization/lambo2/environment.lambo2.yml
14-
conda activate poli__lambo2
15+
pip install -e .[lambo2]
1516
python examples/06_running_lambo2_on_rasp/run.py
1617
```

examples/06_running_lambo2_on_rasp/run.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
import matplotlib.pyplot as plt
55
import numpy as np
6+
import torch
67
from poli.objective_repository import RaspProblemFactory
78

89
from poli_baselines.solvers.bayesian_optimization.lambo2 import LaMBO2
@@ -33,20 +34,25 @@ def run_with_default_hyperparameters():
3334
observer.x_s.append(x0.reshape(-1, 1))
3435
observer.y_s.append(y0)
3536

37+
torch.set_float32_matmul_precision("medium")
3638
lambo2 = LaMBO2(
3739
black_box=black_box,
3840
x0=x0,
3941
y0=y0,
42+
overrides=["max_epochs=2"],
43+
max_epochs_for_retraining=8,
4044
)
41-
42-
lambo2.solve(max_iter=10)
45+
lambo2.solve(max_iter=32)
4346

4447
fig, (ax1, ax2) = plt.subplots(1, 2)
4548
plot_best_y(observer, ax1)
4649
plot_best_y(observer, ax2, start_from=x0.shape[0])
4750
ax1.axvline(x0.shape[0], color="red")
4851
plt.show()
4952

53+
print("Best starting obj value: ", np.max(y0))
54+
print("Best final obj value: ", np.max(lambo2.history_for_training["y"]))
55+
5056
black_box.terminate()
5157

5258

@@ -107,5 +113,5 @@ def run_with_modified_hyperparameters():
107113

108114

109115
if __name__ == "__main__":
110-
# run_with_default_hyperparameters()
111-
run_with_modified_hyperparameters()
116+
run_with_default_hyperparameters()
117+
# run_with_modified_hyperparameters()
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
import sys
2+
from pathlib import Path
3+
4+
import matplotlib.pyplot as plt
5+
import numpy as np
6+
import torch
7+
from poli.objective_repository import EhrlichProblemFactory
8+
9+
from poli_baselines.solvers.bayesian_optimization.lambo2 import LaMBO2
10+
from poli_baselines.solvers.simple.genetic_algorithm import FixedLengthGeneticAlgorithm
11+
12+
THIS_DIR = Path(__file__).resolve().parent
13+
sys.path.append(str(THIS_DIR))
14+
15+
from simple_observer import SimpleObserver, plot_best_y # noqa: E402
16+
17+
18+
def run_with_default_hyperparameters():
19+
problem = EhrlichProblemFactory().create(
20+
sequence_length=32,
21+
motif_length=4,
22+
n_motifs=2,
23+
quantization=4,
24+
return_value_on_unfeasible=-1.0,
25+
)
26+
black_box = problem.black_box
27+
x0 = problem.x0
28+
random_seqs = np.array(
29+
[list(black_box._sample_random_sequence()) for _ in range(127)]
30+
)
31+
x0 = np.concatenate([problem.x0, random_seqs], axis=0)
32+
y0 = black_box(x0)
33+
34+
observer = SimpleObserver()
35+
black_box.set_observer(observer)
36+
37+
# arr = np.load(THIS_DIR / "rasp_seed_data.npz")
38+
# x0 = arr["x0"]
39+
# y0 = arr["y0"]
40+
41+
observer.x_s.append(x0.reshape(-1, 1))
42+
observer.y_s.append(y0)
43+
44+
presolver = FixedLengthGeneticAlgorithm(
45+
black_box=black_box, x0=x0, y0=y0, population_size=128, prob_of_mutation=0.4
46+
)
47+
presolver.solve(max_iter=1)
48+
presolver_x = np.array(presolver.history["x"])
49+
presolver_x = presolver_x.reshape(presolver_x.shape[0], -1)
50+
51+
# import pdb; pdb.set_trace()
52+
torch.set_float32_matmul_precision("medium")
53+
lambo2 = LaMBO2(
54+
black_box=black_box,
55+
x0=presolver_x, # inconsistent API
56+
overrides=["max_epochs=2"],
57+
max_epochs_for_retraining=8,
58+
)
59+
60+
lambo2.solve(max_iter=32)
61+
62+
fig, (ax1, ax2) = plt.subplots(1, 2)
63+
plot_best_y(observer, ax1)
64+
plot_best_y(observer, ax2, start_from=x0.shape[0])
65+
ax1.axvline(x0.shape[0], color="red")
66+
plt.show()
67+
68+
black_box.terminate()
69+
70+
71+
if __name__ == "__main__":
72+
run_with_default_hyperparameters()
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
import numpy as np
2+
import matplotlib.pyplot as plt
3+
4+
from poli.core.black_box_information import BlackBoxInformation
5+
from poli.core.util.abstract_observer import AbstractObserver
6+
7+
8+
class SimpleObserver(AbstractObserver):
9+
def __init__(self) -> None:
10+
self.x_s = []
11+
self.y_s = []
12+
super().__init__()
13+
14+
def initialize_observer(
15+
self, problem_setup_info: BlackBoxInformation, caller_info: object, seed: int
16+
) -> object: ...
17+
18+
def observe(self, x: np.ndarray, y: np.ndarray, context=None) -> None:
19+
self.x_s.append(x)
20+
self.y_s.append(y)
21+
22+
23+
def plot_best_y(obs: SimpleObserver, ax: plt.Axes, start_from: int = 0):
24+
best_y = np.maximum.accumulate(np.vstack(obs.y_s).flatten())
25+
ax.plot(best_y.flatten()[start_from:])
26+
ax.set_xlabel("Number of evaluations")
27+
ax.set_ylabel("Best value found")

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ bounce = [
7777
"pytest>=7.3.1,<7.4.0",
7878
"bounce @ git+https://github.com/miguelgondu/bounce.git@main"
7979
]
80-
lambo2 = ["pytorch-cortex"]
80+
lambo2 = ["pytorch-cortex", "beignet"]
8181

8282
[project.urls]
8383
Homepage = "https://github.com/MachineLearningLifeScience/poli-baselines"

src/poli_baselines/solvers/bayesian_optimization/lambo2/environment.lambo2.small.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ dependencies:
77
- pip:
88
- --extra-index-url https://download.pytorch.org/whl/cpu
99
- numpy<2
10+
- beignet
1011
- "git+https://github.com/prescient-design/cortex.git"
1112
- "git+https://github.com/MachineLearningLifeScience/poli.git"
1213
- "git+https://github.com/MachineLearningLifeScience/poli-baselines.git@main"

src/poli_baselines/solvers/bayesian_optimization/lambo2/environment.lambo2.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ dependencies:
66
- pip
77
- pip:
88
- numpy<2
9+
- beignet
910
- "git+https://github.com/prescient-design/cortex.git"
1011
- "git+https://github.com/MachineLearningLifeScience/poli.git"
1112
- "git+https://github.com/MachineLearningLifeScience/poli-baselines.git@main"
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
protein_constraint:
2+
_target_: cortex.model.branch.Conv1dBranch
3+
out_dim: 8
4+
channel_dim: ${feature_dim}
5+
num_blocks: 1
6+
kernel_size: ${kernel_size}

src/poli_baselines/solvers/bayesian_optimization/lambo2/hydra_configs/branches/protein_property.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,5 @@ protein_property:
22
_target_: cortex.model.branch.Conv1dBranch
33
out_dim: 8
44
channel_dim: ${feature_dim}
5-
num_blocks: 0
5+
num_blocks: 1
66
kernel_size: ${kernel_size}

0 commit comments

Comments
 (0)