diff --git a/PARALLEL.md b/PARALLEL.md new file mode 100644 index 000000000..f96efa105 --- /dev/null +++ b/PARALLEL.md @@ -0,0 +1,12 @@ +Run with + +```bash +# hangs +python -m carps.run +optimizer/smac20=blackbox +problem/BBOB=cfg_2_1_2_0 task.n_workers=4 + +# API needs to be adjusted +python -m carps.run +optimizer/optuna=blackbox +problem/BBOB=cfg_2_1_2_0 task.n_workers=4 + +# works +python -m carps.run +optimizer/randomsearch=config +problem/BBOB=cfg_2_1_2_0 task.n_workers=4 +``` \ No newline at end of file diff --git a/carps/benchmarks/problem.py b/carps/benchmarks/problem.py index 4240dbdec..1fa42e9b0 100644 --- a/carps/benchmarks/problem.py +++ b/carps/benchmarks/problem.py @@ -2,12 +2,15 @@ from abc import ABC, abstractmethod from typing import TYPE_CHECKING +from dataclasses import asdict +from carps.utils.trials import TrialInfo, TrialValue +from ConfigSpace import Configuration if TYPE_CHECKING: from ConfigSpace import ConfigurationSpace from carps.loggers.abstract_logger import AbstractLogger - from carps.utils.trials import TrialInfo, TrialValue + class Problem(ABC): @@ -86,3 +89,39 @@ def evaluate(self, trial_info: TrialInfo) -> TrialValue: ) return trial_value + + def parallel_evaluate(self, eval_config: dict, fidels: dict[str, int | float] | None = None, trial_info: TrialInfo | None = None, obj_keys: list[str] | None = None, **kwargs) -> dict[str, float]: + assert obj_keys, "obj_keys must be specified, usually during instantiation of "\ + "carps.benchmarks.wrapper.ParallelProblemWrapper" + + if trial_info is None: + trial_info = TrialInfo( + config=Configuration(values=eval_config, configuration_space=self.configspace), + budget=list(fidels.values())[0] if fidels else None + ) + + trial_value = self._evaluate(trial_info=trial_info) + self.n_function_calls += 1 + if trial_info.normalized_budget is not None: + self.n_trials += trial_info.normalized_budget + else: + self.n_trials += 1 + + for logger in self.loggers: + logger.log_trial( + n_trials=self.n_trials, + n_function_calls=self.n_function_calls, + trial_info=trial_info, + trial_value=trial_value, + ) + + cost = trial_value.cost + if not isinstance(cost, list): + cost = [cost] + cost_dict = dict(zip(obj_keys, cost, strict=False)) + + return { + **cost_dict, + "runtime": max(trial_value.time, trial_value.virtual_time) + } + diff --git a/carps/benchmarks/wrapper.py b/carps/benchmarks/wrapper.py new file mode 100644 index 000000000..9abfb2f09 --- /dev/null +++ b/carps/benchmarks/wrapper.py @@ -0,0 +1,129 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, Any + +from benchmark_simulator import AbstractAskTellOptimizer, ObjectiveFuncWrapper +from ConfigSpace import Configuration + +from carps.utils.trials import TrialInfo, TrialValue + +if TYPE_CHECKING: + from carps.optimizers.optimizer import Optimizer + + +class ParallelProblemWrapper(ObjectiveFuncWrapper): + def __call__(self, trial_info: TrialInfo) -> TrialValue: + config = trial_info.config + eval_config = dict(config) + budget = trial_info.budget + fidels = {self.fidel_keys[0]: budget} if budget else None + print(">>>>>>>>", fidels) + output = super().__call__(eval_config, fidels=fidels, trial_info=trial_info, obj_keys=self.obj_keys) + print("<<<<<<<<<, done") + + time = None + if "runtime" in self.obj_keys: + time = output["runtime"] + + if len(self.obj_keys) > 1: + cost = [output[k] for k in self.obj_keys if k != "runtime"] + else: + cost = output[self.obj_keys[0]] + + return TrialValue(cost=cost, time=time) + + +class OptimizerParallelWrapper(AbstractAskTellOptimizer): + def __init__(self, optimizer: Optimizer): + self.optimizer = optimizer + + super().__init__() + + if self.optimizer.solver is None: + self.optimizer.setup_optimizer() + + # we need to record the entire information preserved in the trial info during ask such that no information + # is lost when we feed the information to the benchmark_simulator + # NOTE: this solution does not solve the cases where one configuration runs on multiple seeds and instances! + self.history: dict[Configuration, TrialInfo] = {} + + def ask(self) -> tuple[dict[str, Any], dict[str, int | float] | None, int | None]: + """The ask method to sample a configuration using an optimizer. + + Args: + None + + Returns: + (eval_config, fidels) (tuple[dict[str, Any], dict[str, int | float] | None]): + * eval_config (dict[str, Any]): + The configuration to evaluate. + The key is the hyperparameter name and its value is the corresponding hyperparameter value. + For example, when returning {"alpha": 0.1, "beta": 0.3}, the objective function evaluates + the hyperparameter configuration with alpha=0.1 and beta=0.3. + * fidels (dict[str, int | float] | None): + The fidelity parameters to be used for the evaluation of the objective function. + If not multi-fidelity optimization, simply return None. + * config_id (int | None): + The identifier of configuration if needed for continual learning. + Not used at all when continual_max_fidel=None. + As we internally use a hash of eval_config, it may be unstable if eval_config has float. + However, even if config_id is not provided, our simulator works without errors + although we cannot guarantee that our simulator recognizes the same configs if a users' optimizer + slightly changes the content of eval_config. + """ + trial_info = self.optimizer.ask() + eval_config = dict(trial_info.config) + fidels = {self.optimizer.task.fidelity_type: trial_info.budget} if trial_info.budget else None + config_id = None + self.history[trial_info.config] = trial_info + return eval_config, fidels, config_id + + def tell( + self, + eval_config: dict[str, Any], + results: dict[str, float], + *, + fidels: dict[str, int | float] | None = None, + config_id: int | None = None, + ) -> None: + """The tell method to register for a tuple of configuration, fidelity, and the results to an optimizer. + + Args: + eval_config (dict[str, Any]): + The configuration to be used in the objective function. + results (dict[str, float]): + The dict of the return values from the objective function. + fidels (dict[str, Union[float, int] | None): + The fidelities to be used in the objective function. Typically training epoch in deep learning. + If None, we assume that no fidelity is used. + config_id (int | None): + The identifier of configuration if needed for continual learning. + Not used at all when continual_max_fidel=None. + As we internally use a hash of eval_config, it may be unstable if eval_config has float. + However, even if config_id is not provided, our simulator works without errors + although we cannot guarantee that our simulator recognizes the same configs if a users' optimizer + slightly changes the content of eval_config. + + Returns: + None + """ + config = Configuration(values=eval_config, configuration_space=self.optimizer.problem.configspace) + trial_info_ask = self.history.pop(config) + trial_info = TrialInfo( + config=Configuration(values=eval_config, configuration_space=self.optimizer.problem.configspace), + budget=next(iter(fidels.values())) if fidels else None, + instance=trial_info_ask.instance, + seed=trial_info_ask.seed, + name=trial_info_ask.name, + checkpoint=trial_info_ask.checkpoint, + ) + time = None + if "runtime" in results: + time = results["runtime"] + del results["runtime"] + cost = list(results.values()) + if len(cost) == 1: + cost = cost[0] + + trial_value = TrialValue(cost=cost, time=time) + self.optimizer.tell(trial_info=trial_info, trial_value=trial_value) diff --git a/carps/configs/optimizer/optuna/config.yaml b/carps/configs/optimizer/optuna/blackbox.yaml similarity index 100% rename from carps/configs/optimizer/optuna/config.yaml rename to carps/configs/optimizer/optuna/blackbox.yaml diff --git a/carps/utils/running.py b/carps/utils/running.py index 1eba1ef39..442bd479f 100644 --- a/carps/utils/running.py +++ b/carps/utils/running.py @@ -11,6 +11,11 @@ ) from carps.utils.exceptions import NotSupportedError +from carps.benchmarks.wrapper import ParallelProblemWrapper +from benchmark_simulator import ObjectiveFuncWrapper +from carps.benchmarks.wrapper import OptimizerParallelWrapper + +from functools import partial if TYPE_CHECKING: from py_experimenter.result_processor import ResultProcessor @@ -47,7 +52,17 @@ def make_problem(cfg: DictConfig, result_processor: ResultProcessor | None = Non kwargs = {} logger = instantiate(logger)(**kwargs) loggers.append(logger) - return instantiate(problem_cfg, loggers=loggers) + + problem = instantiate(problem_cfg, loggers=loggers) + if cfg.task.n_workers > 1: + problem.evaluate = ParallelProblemWrapper( + obj_func=problem.parallel_evaluate, + obj_keys=[*list(cfg.task.objectives), "runtime"], + fidel_keys=[cfg.task.fidelity_type] if cfg.task.fidelity_type else None, + n_workers=cfg.task.n_workers, + ask_and_tell=False + ) + return problem def make_optimizer(cfg: DictConfig, problem: Problem) -> Optimizer: @@ -99,15 +114,38 @@ def optimize(cfg: DictConfig, result_processor: ResultProcessor | None = None) - problem = make_problem(cfg=cfg, result_processor=result_processor) inspect(problem) - optimizer = make_optimizer(cfg=cfg, problem=problem) - inspect(optimizer) - - try: - inc_tuple = optimizer.run() - printr("Solution found: ", inc_tuple) - except NotSupportedError: - print("Not supported. Skipping.") - except Exception as e: - print("Something went wrong:") - print(e) - raise e + if cfg.task.n_workers > 1: + cfg_copy = cfg.copy() + cfg_copy.task.n_workers = 1 + optimizer = make_optimizer(cfg=cfg_copy, problem=problem) + inspect(optimizer) + opt = OptimizerParallelWrapper(optimizer=optimizer) + obj_fun = partial(problem.parallel_evaluate, obj_keys=optimizer.task.objectives) + worker = ObjectiveFuncWrapper( + save_dir_name="tmp", + ask_and_tell=True, + n_workers=cfg.task.n_workers, + obj_func=obj_fun, + n_actual_evals_in_opt=cfg.task.n_trials + cfg.task.n_workers, # TODO check if trial for simulator means the same as in carps + n_evals=cfg.task.n_trials, + seed=cfg.seed, + fidel_keys=None, + obj_keys=optimizer.task.objectives, + # allow_parallel_sampling=True, + expensive_sampler=True + ) + worker.simulate(opt) + + else: + optimizer = make_optimizer(cfg=cfg, problem=problem) + inspect(optimizer) + + try: + inc_tuple = optimizer.run() + printr("Solution found: ", inc_tuple) + except NotSupportedError: + print("Not supported. Skipping.") + except Exception as e: + print("Something went wrong:") + print(e) + raise e diff --git a/pyproject.toml b/pyproject.toml index 4939ef44d..57bb6ad41 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,7 +29,7 @@ dependencies = [ "typing_extensions", "pymoo", "GitPython", - "mlcroissant", + "mfhpo-simulator", ] requires-python = ">=3.9" diff --git a/requirements.txt b/requirements.txt index 0d1a05358..0f356d210 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,4 +10,5 @@ dataclasses-json pymysql cryptography domdf_python_tools -py-experimenter>=1.4.1 \ No newline at end of file +py-experimenter>=1.4.1 +mfhpo-simulator \ No newline at end of file