diff --git a/src/poli_baselines/solvers/bayesian_optimization/boss/__init__.py b/src/poli_baselines/solvers/bayesian_optimization/boss/__init__.py new file mode 100644 index 0000000..d9f759c --- /dev/null +++ b/src/poli_baselines/solvers/bayesian_optimization/boss/__init__.py @@ -0,0 +1,3 @@ +from .solver import BossSolver + +__all__ = ["BossSolver"] diff --git a/src/poli_baselines/solvers/bayesian_optimization/boss/environment.boss.small.yml b/src/poli_baselines/solvers/bayesian_optimization/boss/environment.boss.small.yml new file mode 100644 index 0000000..3bdf6ba --- /dev/null +++ b/src/poli_baselines/solvers/bayesian_optimization/boss/environment.boss.small.yml @@ -0,0 +1,12 @@ +name: poli__boss +channels: + - defaults +dependencies: + - python=3.10 + - pip + - pip: + - numpy<2 + - emukit + - "git+https://github.com/MachineLearningLifeScience/poli.git@v0.2.1" + - "git+https://github.com/MachineLearningLifeScience/poli-baselines@main" + - "git+https://github.com/henrymoss/BOSS.git@master" diff --git a/src/poli_baselines/solvers/bayesian_optimization/boss/environment.boss.yml b/src/poli_baselines/solvers/bayesian_optimization/boss/environment.boss.yml new file mode 100644 index 0000000..cdbb657 --- /dev/null +++ b/src/poli_baselines/solvers/bayesian_optimization/boss/environment.boss.yml @@ -0,0 +1,18 @@ +name: poli__boss +channels: + - defaults +dependencies: + - python=3.10 + - pip + - pip: + - numpy<2 + - emukit + - pandas>=1.5.3,<1.6.0 + - coverage>=7.2.5,<7.3.0 + - requests>=2.31.0,<2.32.0 + - black>=22.12.0,<22.13.0 + - memray>=1.6.0,<1.7.0 + - pytest>=7.3.1,<7.4.0 + - "git+https://github.com/MachineLearningLifeScience/poli.git@v0.2.1" + - "git+https://github.com/MachineLearningLifeScience/poli-baselines@main" + - "git+https://github.com/miguelgondu/bounce.git@main" diff --git a/src/poli_baselines/solvers/bayesian_optimization/boss/solver.py b/src/poli_baselines/solvers/bayesian_optimization/boss/solver.py new file mode 100644 index 0000000..d21d9c3 --- /dev/null +++ b/src/poli_baselines/solvers/bayesian_optimization/boss/solver.py @@ -0,0 +1,120 @@ +""" +This has to be run inside the poli__boss environment. +""" + +from __future__ import annotations + +from pathlib import Path +from typing import Literal + +import numpy as np +import torch + +try: + from boss.code.emukit_models.emukit_ssk_model import SSK_model + from boss.code.parameters.candidate_parameter import CandidateStringParameter + from emukit.core import ParameterSpace + from emukit.core.loop import FixedIterationsStoppingCondition + from emukit.core.optimization import RandomSearchAcquisitionOptimizer + from emukit.bayesian_optimization.loops import BayesianOptimizationLoop + from emukit.bayesian_optimization.acquisitions import ExpectedImprovement + from emukit.core.initial_designs import RandomDesign +except ImportError as e: + raise ImportError( + "You are trying to use the BOSS solver. Install " + "the relevant optional dependencies with [boss]. \n" + "You can do this by running: \n" + "pip install 'poli-baselines[boss] @ git+https://github.com/MachineLearningLifeScience/poli-baselines.git'" + ) from e + +from poli.core.abstract_black_box import AbstractBlackBox +from poli.core.util.seeding import seed_python_numpy_and_torch + +from poli_baselines.core.abstract_solver import AbstractSolver + +ROOT_DIR = Path(__file__).parent.parent.parent.parent.parent.parent.resolve() + + +class BossSolver(AbstractSolver): + def __init__( + self, + black_box: AbstractBlackBox, + x0: np.ndarray = None, + y0: np.ndarray = None, + device: str | None = None, + dtype: Literal["float32", "float64"] = "float32", + batch_size: int = 1, + n_initial_points: int | None = None, + number_new_bins_on_split: int = 2, + results_dir: Path | None = None, + ): + super().__init__(black_box, None, None) + if device is None: + device = "cuda" if torch.cuda.is_available() else "cpu" + self.x0 = x0 + self.y0 = y0 + self.device = device + self.dtype = dtype + self.batch_size = batch_size + self.number_new_bins_on_split = number_new_bins_on_split + self.n_initial_points = n_initial_points + + self.objective = lambda x: -self.black_box(x) # BOSS minimizes + + # see SMILES examples + token_space = np.array([" ".join(list(ss)) for ss in self.x0]).reshape(-1, 1) + + self.search_space = ParameterSpace( + [CandidateStringParameter("string", token_space)] + ) # x0 goes here with correct wrapper + self.model = SSK_model( + self.search_space, self.x0, self.y0, max_subsequence_length=5, n_restarts=1 + ) + self.acquisition = ExpectedImprovement(self.model) + self.optimizer = RandomSearchAcquisitionOptimizer(self.search_space, 100) + + self.bo_loop_ssk = BayesianOptimizationLoop( + model=self.model, + space=self.search_space, + acquisition=self.acquisition, + acquisition_optimizer=self.optimizer, + ) + + # Creating the results dir for boss + if results_dir is None: + results_dir = ROOT_DIR / "boss_results" + Path(results_dir).mkdir(parents=True, exist_ok=True) + + # Creating a gitignore file inside that dir + with open(results_dir / ".gitignore", "w") as fp: + fp.write("*\n!.gitignore") + + def solve( + self, + max_iter: int = 100, + n_initial_points: int | None = None, + seed: int | None = None, + ) -> None: + if seed is not None: + seed_python_numpy_and_torch(seed) + + if n_initial_points is None: + if self.n_initial_points is None: + raise ValueError( + "n_initial_points must be set, either in init or in solve" + ) + n_initial_points = self.n_initial_points + + stopping_condition = FixedIterationsStoppingCondition(i_max=max_iter) + + self.boss = BossSolver( + black_box=self.black_box, + x0=self.x0, + y0=self.y0, + n_initial_points=n_initial_points, + batch_size=self.batch_size, + results_dir=ROOT_DIR / "data" / "boss_results", + device=self.device, + dtype=self.dtype, + ) + self.boss.bo_loop_ssk.run_loop(self.objective, stopping_condition) diff --git a/src/poli_baselines/tests/solvers/bayesian_optimization/test_boss.py b/src/poli_baselines/tests/solvers/bayesian_optimization/test_boss.py new file mode 100644 index 0000000..e5f528c --- /dev/null +++ b/src/poli_baselines/tests/solvers/bayesian_optimization/test_boss.py @@ -0,0 +1,59 @@ +""" +Tests for the BOSS implementation + +""" + +import json +import warnings +from pathlib import Path + +import pytest + +TEST_FILES_PATH = Path(__file__).parent.parent.parent / "test_files" + + +warnings.filterwarnings("ignore") + + +def load_alphabet() -> list[str]: + with open(TEST_FILES_PATH / "zinc250k_alphabet_stoi.json") as f: + alphabet = json.load(f) + + return list(alphabet.keys()) + + +def load_sequence_length() -> int: + with open(TEST_FILES_PATH / "zinc250k_metadata.json") as f: + metadata = json.load(f) + + return metadata["max_sequence_length"] + + +@pytest.mark.slow() +def test_boss_runs(): + """ + Test BOSS instantiates and runs. + """ + from poli import objective_factory + + pytest.importorskip("boss") + from poli_baselines.solvers.bayesian_optimization.boss import BossSolver + + alphabet = load_alphabet() + sequence_length = load_sequence_length() + + problem = objective_factory.create(name="rdkit_qed", string_representation="SMILES") + black_box = problem.black_box + x0 = problem.x0 + y0 = black_box(x0) + + solver = BossSolver( + black_box=black_box, + x0=x0, + y0=y0, + n_initial_points=1, + ) + + assert solver is not None + + solver.solve(max_iter=1) diff --git a/src/poli_baselines/tests/solvers/bayesian_optimization/test_bounce.py b/src/poli_baselines/tests/solvers/bayesian_optimization/test_bounce.py index 7d2ec60..934bd02 100644 --- a/src/poli_baselines/tests/solvers/bayesian_optimization/test_bounce.py +++ b/src/poli_baselines/tests/solvers/bayesian_optimization/test_bounce.py @@ -1,4 +1,4 @@ -"""Tests for our bridge with Probabilistic Reparametrization [1] +"""Tests for our bridge with Bounce TODO: add reference """ @@ -34,7 +34,7 @@ def test_bounce_runs(): """Tests that Bounce instantiates and runs.""" from poli import objective_factory - pytest.importorskip("bounce") # We check if we have PR installed + pytest.importorskip("bounce") # We check if we have Bounce installed from poli_baselines.solvers.bayesian_optimization.bounce import BounceSolver alphabet = load_alphabet()