From 9985090c01c52f99b1074a93493eb1aa60717e36 Mon Sep 17 00:00:00 2001 From: benjamc Date: Mon, 12 Aug 2024 15:13:00 +0200 Subject: [PATCH 1/5] Add draft --- carps/benchmarks/problem.py | 41 ++++++++++- carps/benchmarks/wrapper.py | 131 ++++++++++++++++++++++++++++++++++++ carps/utils/running.py | 64 ++++++++++++++---- pyproject.toml | 2 +- 4 files changed, 223 insertions(+), 15 deletions(-) create mode 100644 carps/benchmarks/wrapper.py diff --git a/carps/benchmarks/problem.py b/carps/benchmarks/problem.py index 4240dbdec..1fa42e9b0 100644 --- a/carps/benchmarks/problem.py +++ b/carps/benchmarks/problem.py @@ -2,12 +2,15 @@ from abc import ABC, abstractmethod from typing import TYPE_CHECKING +from dataclasses import asdict +from carps.utils.trials import TrialInfo, TrialValue +from ConfigSpace import Configuration if TYPE_CHECKING: from ConfigSpace import ConfigurationSpace from carps.loggers.abstract_logger import AbstractLogger - from carps.utils.trials import TrialInfo, TrialValue + class Problem(ABC): @@ -86,3 +89,39 @@ def evaluate(self, trial_info: TrialInfo) -> TrialValue: ) return trial_value + + def parallel_evaluate(self, eval_config: dict, fidels: dict[str, int | float] | None = None, trial_info: TrialInfo | None = None, obj_keys: list[str] | None = None, **kwargs) -> dict[str, float]: + assert obj_keys, "obj_keys must be specified, usually during instantiation of "\ + "carps.benchmarks.wrapper.ParallelProblemWrapper" + + if trial_info is None: + trial_info = TrialInfo( + config=Configuration(values=eval_config, configuration_space=self.configspace), + budget=list(fidels.values())[0] if fidels else None + ) + + trial_value = self._evaluate(trial_info=trial_info) + self.n_function_calls += 1 + if trial_info.normalized_budget is not None: + self.n_trials += trial_info.normalized_budget + else: + self.n_trials += 1 + + for logger in self.loggers: + logger.log_trial( + n_trials=self.n_trials, + n_function_calls=self.n_function_calls, + trial_info=trial_info, + trial_value=trial_value, + ) + + cost = trial_value.cost + if not isinstance(cost, list): + cost = [cost] + cost_dict = dict(zip(obj_keys, cost, strict=False)) + + return { + **cost_dict, + "runtime": max(trial_value.time, trial_value.virtual_time) + } + diff --git a/carps/benchmarks/wrapper.py b/carps/benchmarks/wrapper.py new file mode 100644 index 000000000..fb4a5daaa --- /dev/null +++ b/carps/benchmarks/wrapper.py @@ -0,0 +1,131 @@ +from __future__ import annotations + +from ConfigSpace import Configuration +from typing import Any +from benchmark_simulator import ObjectiveFuncWrapper, AbstractAskTellOptimizer +from carps.benchmarks.problem import Problem +from carps.optimizers.optimizer import Optimizer + +from typing import TYPE_CHECKING +from carps.utils.trials import TrialInfo, TrialValue + +if TYPE_CHECKING: + from ConfigSpace import ConfigurationSpace + + from carps.loggers.abstract_logger import AbstractLogger + + + +class ParallelProblemWrapper(ObjectiveFuncWrapper): + def __call__( + self, + trial_info: TrialInfo + ) -> TrialValue: + config = trial_info.config + eval_config = dict(config) + budget = trial_info.budget + fidels = {self.fidel_keys[0]: budget} if budget else None + print(">>>>>>>>", fidels) + output = super().__call__(eval_config, fidels=fidels, trial_info=trial_info, obj_keys=self.obj_keys) + print("<<<<<<<<<, done") + + time = None + if "runtime" in self.obj_keys: + time = output["runtime"] + + if len(self.obj_keys) > 1: + cost = [output[k] for k in self.obj_keys if k != "runtime"] + else: + cost = output[self.obj_keys[0]] + + return TrialValue( + cost=cost, + time=time + ) + +class OptimizerParallelWrapper(AbstractAskTellOptimizer): + def __init__(self, optimizer: Optimizer): + self.optimizer = optimizer + + super().__init__() + + if self.optimizer.solver is None: + self.optimizer.setup_optimizer() + + def ask(self) -> tuple[dict[str, Any], dict[str, int | float] | None, int | None]: + """The ask method to sample a configuration using an optimizer. + + Args: + None + + Returns: + (eval_config, fidels) (tuple[dict[str, Any], dict[str, int | float] | None]): + * eval_config (dict[str, Any]): + The configuration to evaluate. + The key is the hyperparameter name and its value is the corresponding hyperparameter value. + For example, when returning {"alpha": 0.1, "beta": 0.3}, the objective function evaluates + the hyperparameter configuration with alpha=0.1 and beta=0.3. + * fidels (dict[str, int | float] | None): + The fidelity parameters to be used for the evaluation of the objective function. + If not multi-fidelity optimization, simply return None. + * config_id (int | None): + The identifier of configuration if needed for continual learning. + Not used at all when continual_max_fidel=None. + As we internally use a hash of eval_config, it may be unstable if eval_config has float. + However, even if config_id is not provided, our simulator works without errors + although we cannot guarantee that our simulator recognizes the same configs if a users' optimizer + slightly changes the content of eval_config. + """ + trial_info = self.optimizer.ask() + eval_config = dict(trial_info.config) + fidels = {self.optimizer.task.fidelity_type: trial_info.budget} if trial_info.budget else None + config_id = None + return eval_config, fidels, config_id + + def tell( + self, + eval_config: dict[str, Any], + results: dict[str, float], + *, + fidels: dict[str, int | float] | None = None, + config_id: int | None = None, + ) -> None: + """The tell method to register for a tuple of configuration, fidelity, and the results to an optimizer. + + Args: + eval_config (dict[str, Any]): + The configuration to be used in the objective function. + results (dict[str, float]): + The dict of the return values from the objective function. + fidels (dict[str, Union[float, int] | None): + The fidelities to be used in the objective function. Typically training epoch in deep learning. + If None, we assume that no fidelity is used. + config_id (int | None): + The identifier of configuration if needed for continual learning. + Not used at all when continual_max_fidel=None. + As we internally use a hash of eval_config, it may be unstable if eval_config has float. + However, even if config_id is not provided, our simulator works without errors + although we cannot guarantee that our simulator recognizes the same configs if a users' optimizer + slightly changes the content of eval_config. + + Returns: + None + """ + trial_info = TrialInfo( + config=Configuration(values=eval_config, configuration_space=self.optimizer.problem.configspace), + budget=list(fidels.values())[0] if fidels else None + ) + time = None + if "runtime" in results: + time = results["runtime"] + del results["runtime"] + cost = list(results.values()) + if len(cost) == 1: + cost = cost[0] + + trial_value = TrialValue( + cost=cost, + time=time + ) + self.optimizer.tell(trial_info=trial_info, trial_value=trial_value) + diff --git a/carps/utils/running.py b/carps/utils/running.py index 1eba1ef39..442bd479f 100644 --- a/carps/utils/running.py +++ b/carps/utils/running.py @@ -11,6 +11,11 @@ ) from carps.utils.exceptions import NotSupportedError +from carps.benchmarks.wrapper import ParallelProblemWrapper +from benchmark_simulator import ObjectiveFuncWrapper +from carps.benchmarks.wrapper import OptimizerParallelWrapper + +from functools import partial if TYPE_CHECKING: from py_experimenter.result_processor import ResultProcessor @@ -47,7 +52,17 @@ def make_problem(cfg: DictConfig, result_processor: ResultProcessor | None = Non kwargs = {} logger = instantiate(logger)(**kwargs) loggers.append(logger) - return instantiate(problem_cfg, loggers=loggers) + + problem = instantiate(problem_cfg, loggers=loggers) + if cfg.task.n_workers > 1: + problem.evaluate = ParallelProblemWrapper( + obj_func=problem.parallel_evaluate, + obj_keys=[*list(cfg.task.objectives), "runtime"], + fidel_keys=[cfg.task.fidelity_type] if cfg.task.fidelity_type else None, + n_workers=cfg.task.n_workers, + ask_and_tell=False + ) + return problem def make_optimizer(cfg: DictConfig, problem: Problem) -> Optimizer: @@ -99,15 +114,38 @@ def optimize(cfg: DictConfig, result_processor: ResultProcessor | None = None) - problem = make_problem(cfg=cfg, result_processor=result_processor) inspect(problem) - optimizer = make_optimizer(cfg=cfg, problem=problem) - inspect(optimizer) - - try: - inc_tuple = optimizer.run() - printr("Solution found: ", inc_tuple) - except NotSupportedError: - print("Not supported. Skipping.") - except Exception as e: - print("Something went wrong:") - print(e) - raise e + if cfg.task.n_workers > 1: + cfg_copy = cfg.copy() + cfg_copy.task.n_workers = 1 + optimizer = make_optimizer(cfg=cfg_copy, problem=problem) + inspect(optimizer) + opt = OptimizerParallelWrapper(optimizer=optimizer) + obj_fun = partial(problem.parallel_evaluate, obj_keys=optimizer.task.objectives) + worker = ObjectiveFuncWrapper( + save_dir_name="tmp", + ask_and_tell=True, + n_workers=cfg.task.n_workers, + obj_func=obj_fun, + n_actual_evals_in_opt=cfg.task.n_trials + cfg.task.n_workers, # TODO check if trial for simulator means the same as in carps + n_evals=cfg.task.n_trials, + seed=cfg.seed, + fidel_keys=None, + obj_keys=optimizer.task.objectives, + # allow_parallel_sampling=True, + expensive_sampler=True + ) + worker.simulate(opt) + + else: + optimizer = make_optimizer(cfg=cfg, problem=problem) + inspect(optimizer) + + try: + inc_tuple = optimizer.run() + printr("Solution found: ", inc_tuple) + except NotSupportedError: + print("Not supported. Skipping.") + except Exception as e: + print("Something went wrong:") + print(e) + raise e diff --git a/pyproject.toml b/pyproject.toml index 4939ef44d..57bb6ad41 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,7 +29,7 @@ dependencies = [ "typing_extensions", "pymoo", "GitPython", - "mlcroissant", + "mfhpo-simulator", ] requires-python = ">=3.9" From 756b410b8aa0141cc50d9a8bf11a91ccfdd4c4d1 Mon Sep 17 00:00:00 2001 From: benjamc Date: Mon, 12 Aug 2024 15:14:06 +0200 Subject: [PATCH 2/5] Add notes --- PARALLEL.md | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 PARALLEL.md diff --git a/PARALLEL.md b/PARALLEL.md new file mode 100644 index 000000000..f96efa105 --- /dev/null +++ b/PARALLEL.md @@ -0,0 +1,12 @@ +Run with + +```bash +# hangs +python -m carps.run +optimizer/smac20=blackbox +problem/BBOB=cfg_2_1_2_0 task.n_workers=4 + +# API needs to be adjusted +python -m carps.run +optimizer/optuna=blackbox +problem/BBOB=cfg_2_1_2_0 task.n_workers=4 + +# works +python -m carps.run +optimizer/randomsearch=config +problem/BBOB=cfg_2_1_2_0 task.n_workers=4 +``` \ No newline at end of file From b7f7eea67d7c84b41c3459c36dee0110db6d0784 Mon Sep 17 00:00:00 2001 From: benjamc Date: Mon, 12 Aug 2024 15:14:30 +0200 Subject: [PATCH 3/5] Rename file --- carps/configs/optimizer/optuna/{config.yaml => blackbox.yaml} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename carps/configs/optimizer/optuna/{config.yaml => blackbox.yaml} (100%) diff --git a/carps/configs/optimizer/optuna/config.yaml b/carps/configs/optimizer/optuna/blackbox.yaml similarity index 100% rename from carps/configs/optimizer/optuna/config.yaml rename to carps/configs/optimizer/optuna/blackbox.yaml From e16e975e7b0ae17d8455eef6500673e185e473dc Mon Sep 17 00:00:00 2001 From: dengdifan Date: Sat, 17 Aug 2024 18:05:53 +0200 Subject: [PATCH 4/5] add requirements for parallel setting --- requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 0d1a05358..0f356d210 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,4 +10,5 @@ dataclasses-json pymysql cryptography domdf_python_tools -py-experimenter>=1.4.1 \ No newline at end of file +py-experimenter>=1.4.1 +mfhpo-simulator \ No newline at end of file From 067c54aadabed3f3dc93781dd39c9bbd151a917d Mon Sep 17 00:00:00 2001 From: dengdifan Date: Sat, 17 Aug 2024 18:28:09 +0200 Subject: [PATCH 5/5] record trial info in parallel setting --- carps/benchmarks/wrapper.py | 44 ++++++++++++++++++------------------- 1 file changed, 21 insertions(+), 23 deletions(-) diff --git a/carps/benchmarks/wrapper.py b/carps/benchmarks/wrapper.py index fb4a5daaa..9abfb2f09 100644 --- a/carps/benchmarks/wrapper.py +++ b/carps/benchmarks/wrapper.py @@ -1,26 +1,18 @@ from __future__ import annotations +from typing import TYPE_CHECKING, Any + +from benchmark_simulator import AbstractAskTellOptimizer, ObjectiveFuncWrapper from ConfigSpace import Configuration -from typing import Any -from benchmark_simulator import ObjectiveFuncWrapper, AbstractAskTellOptimizer -from carps.benchmarks.problem import Problem -from carps.optimizers.optimizer import Optimizer -from typing import TYPE_CHECKING from carps.utils.trials import TrialInfo, TrialValue if TYPE_CHECKING: - from ConfigSpace import ConfigurationSpace - - from carps.loggers.abstract_logger import AbstractLogger - + from carps.optimizers.optimizer import Optimizer class ParallelProblemWrapper(ObjectiveFuncWrapper): - def __call__( - self, - trial_info: TrialInfo - ) -> TrialValue: + def __call__(self, trial_info: TrialInfo) -> TrialValue: config = trial_info.config eval_config = dict(config) budget = trial_info.budget @@ -38,10 +30,8 @@ def __call__( else: cost = output[self.obj_keys[0]] - return TrialValue( - cost=cost, - time=time - ) + return TrialValue(cost=cost, time=time) + class OptimizerParallelWrapper(AbstractAskTellOptimizer): def __init__(self, optimizer: Optimizer): @@ -52,6 +42,11 @@ def __init__(self, optimizer: Optimizer): if self.optimizer.solver is None: self.optimizer.setup_optimizer() + # we need to record the entire information preserved in the trial info during ask such that no information + # is lost when we feed the information to the benchmark_simulator + # NOTE: this solution does not solve the cases where one configuration runs on multiple seeds and instances! + self.history: dict[Configuration, TrialInfo] = {} + def ask(self) -> tuple[dict[str, Any], dict[str, int | float] | None, int | None]: """The ask method to sample a configuration using an optimizer. @@ -80,6 +75,7 @@ def ask(self) -> tuple[dict[str, Any], dict[str, int | float] | None, int | None eval_config = dict(trial_info.config) fidels = {self.optimizer.task.fidelity_type: trial_info.budget} if trial_info.budget else None config_id = None + self.history[trial_info.config] = trial_info return eval_config, fidels, config_id def tell( @@ -111,9 +107,15 @@ def tell( Returns: None """ + config = Configuration(values=eval_config, configuration_space=self.optimizer.problem.configspace) + trial_info_ask = self.history.pop(config) trial_info = TrialInfo( config=Configuration(values=eval_config, configuration_space=self.optimizer.problem.configspace), - budget=list(fidels.values())[0] if fidels else None + budget=next(iter(fidels.values())) if fidels else None, + instance=trial_info_ask.instance, + seed=trial_info_ask.seed, + name=trial_info_ask.name, + checkpoint=trial_info_ask.checkpoint, ) time = None if "runtime" in results: @@ -123,9 +125,5 @@ def tell( if len(cost) == 1: cost = cost[0] - trial_value = TrialValue( - cost=cost, - time=time - ) + trial_value = TrialValue(cost=cost, time=time) self.optimizer.tell(trial_info=trial_info, trial_value=trial_value) -