diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml index 4fecec7d..3d52b250 100644 --- a/.github/workflows/run_tests.yml +++ b/.github/workflows/run_tests.yml @@ -2,7 +2,7 @@ name: Test Pull Requests -on: [push, pull_request] +on: [push] jobs: Tests: @@ -11,36 +11,42 @@ jobs: strategy: matrix: include: - - python-version: 3.7 + - python-version: "3.7" DISPLAY_NAME: "Singularity Tests + CODECOV" RUN_TESTS: true USE_SINGULARITY: true SINGULARITY_VERSION: "3.8" RUN_CODECOV: true - - python-version: 3.7 + - python-version: "3.7" DISPLAY_NAME: "Codestyle" RUN_CODESTYLE: true USE_SINGULARITY: false - - python-version: 3.7 + - python-version: "3.7" DISPLAY_NAME: "Singularity Container Examples" RUN_CONTAINER_EXAMPLES: true USE_SINGULARITY: true SINGULARITY_VERSION: "3.8" - - python-version: 3.7 + - python-version: "3.7" DISPLAY_NAME: "Local Examples" RUN_LOCAL_EXAMPLES: true USE_SINGULARITY: false - - python-version: 3.8 + - python-version: "3.8" DISPLAY_NAME: "Singularity Tests" RUN_TESTS: true USE_SINGULARITY: true SINGULARITY_VERSION: "3.8" - - python-version: 3.9 + - python-version: "3.9" + DISPLAY_NAME: "Singularity Tests" + RUN_TESTS: true + USE_SINGULARITY: true + SINGULARITY_VERSION: "3.8" + + - python-version: "3.10" DISPLAY_NAME: "Singularity Tests" RUN_TESTS: true USE_SINGULARITY: true @@ -63,7 +69,7 @@ jobs: - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v2 with: - python-version: ${{ matrix.python-version }} + python-version: "${{ matrix.python-version }}" - name: Set up Go for Singularity if: matrix.USE_SINGULARITY == true uses: actions/setup-go@v2 @@ -78,4 +84,4 @@ jobs: python -m pip install --upgrade pip chmod +x ci_scripts/install.sh && source ./ci_scripts/install.sh - name: Run Tests - run: chmod +x ci_scripts/script.sh && source ./ci_scripts/script.sh \ No newline at end of file + run: chmod +x ci_scripts/script.sh && source ./ci_scripts/script.sh diff --git a/README.md b/README.md index ec0a442e..96dd406d 100644 --- a/README.md +++ b/README.md @@ -149,3 +149,19 @@ See whether in `~/.singularity/instances/sing/$HOSTNAME/*/` there is a file that **Note:** If you are looking for a different or older version of our benchmarking library, you might be looking for [HPOlib1.5](https://github.com/automl/HPOlib1.5) + +## Reference + +If you use HPOBench, please cite the following paper: + +```bibtex +@inproceedings{ + eggensperger2021hpobench, + title={{HPOB}ench: A Collection of Reproducible Multi-Fidelity Benchmark Problems for {HPO}}, + author={Katharina Eggensperger and Philipp M{\"u}ller and Neeratyoy Mallik and Matthias Feurer and Rene Sass and Aaron Klein and Noor Awad and Marius Lindauer and Frank Hutter}, + booktitle={Thirty-fifth Conference on Neural Information Processing Systems Datasets and Benchmarks Track (Round 2)}, + year={2021}, + url={https://openreview.net/forum?id=1k4rJYEwda-} +} +``` + diff --git a/ci_scripts/install.sh b/ci_scripts/install.sh index 2d229f74..d361600d 100644 --- a/ci_scripts/install.sh +++ b/ci_scripts/install.sh @@ -4,14 +4,24 @@ install_packages="" if [[ "$RUN_TESTS" == "true" ]]; then echo "Install tools for testing" - install_packages="${install_packages}xgboost,pytest,test_paramnet,test_tabular_datamanager," + install_packages="${install_packages}pytest,test_tabular_datamanager," pip install codecov - # The param net benchmark does not work with a scikit-learn version != 0.23.2. (See notes in the benchmark) - # To make sure that no newer version is installed, we install it before the other requirements. - # Since we are not using a "--upgrade" option later on, pip skips to install another scikit-learn version. - echo "Install the right scikit-learn function for the param net tests." - pip install --upgrade scikit-learn==0.23.2 + PYVERSION=$(python -V 2>&1 | sed 's/.* \([0-9]\).\([0-9]*\).*/\1\2/') + if [[ "${PYVERSION}" != "310" ]]; then + # The param net benchmark does not work with a scikit-learn version != 0.23.2. (See notes in the benchmark) + # To make sure that no newer version is installed, we install it before the other requirements. + # Since we are not using a "--upgrade" option later on, pip skips to install another scikit-learn version. + echo "Install the right scikit-learn function for the param net tests." + pip install --upgrade scikit-learn==0.23.2 + install_packages="${install_packages}xgboost,test_paramnet," + else + echo "Skip installing the extra paramnet tests." + # For 3.10, we need a different pandas version - this comes as a requirement for the old xgboost benchmark. + # building pandas<=1.5.0 does not work with 3.10 anymore. -> install a different version. + install_packages="${install_packages}xgboost_310," + fi + else echo "Skip installing tools for testing" fi @@ -35,7 +45,16 @@ if [[ "$RUN_LOCAL_EXAMPLES" == "true" ]]; then echo "Install packages for local examples" echo "Install swig" sudo apt-get update && sudo apt-get install -y build-essential swig - install_packages="${install_packages}xgboost," + + PYVERSION=$(python -V 2>&1 | sed 's/.* \([0-9]\).\([0-9]*\).*/\1\2/') + if [[ "${PYVERSION}" != "310" ]]; then + # For 3.10, we need a different pandas version - this comes as a requirement for the old xgboost benchmark. + # building pandas<=1.5.0 does not work with 3.10 anymore. -> install a different version. + install_packages="${install_packages}xgboost," + else + install_packages="${install_packages}xgboost_310," + fi + else echo "Skip installing packages for local examples" fi diff --git a/extra_requirements/nasbench_1shot1.json b/extra_requirements/nasbench_1shot1.json index 7523d0f2..b008c789 100644 --- a/extra_requirements/nasbench_1shot1.json +++ b/extra_requirements/nasbench_1shot1.json @@ -1,3 +1,3 @@ { - "nasbench_1shot1": ["tensorflow==1.15.0","matplotlib","seaborn", "networkx", "tqdm"] + "nasbench_1shot1": ["protobuf==3.20.1", "tensorflow==1.15.0", "matplotlib", "seaborn", "networkx", "tqdm"] } \ No newline at end of file diff --git a/extra_requirements/tests.json b/extra_requirements/tests.json index 6c27be97..b25d6755 100644 --- a/extra_requirements/tests.json +++ b/extra_requirements/tests.json @@ -2,5 +2,5 @@ "codestyle": ["pycodestyle","flake8","pylint"], "pytest": ["pytest>=4.6","pytest-cov"], "test_paramnet": ["tqdm", "scikit-learn==0.23.2"], - "test_tabular_datamanager": ["pyarrow", "fastparquet"] + "test_tabular_datamanager": ["tqdm","pyarrow", "fastparquet"] } \ No newline at end of file diff --git a/extra_requirements/xgboost.json b/extra_requirements/xgboost.json index 2789d2ef..eefc920c 100644 --- a/extra_requirements/xgboost.json +++ b/extra_requirements/xgboost.json @@ -1,3 +1,4 @@ { - "xgboost": ["xgboost==0.90","pandas>=1.0.0,<1.1.5","openml==0.10.2","scikit-learn>=0.18.1"] + "xgboost": ["xgboost==0.90","pandas>=1.0.0,<1.1.5","openml==0.10.2","scikit-learn>=0.18.1"], + "xgboost_310": ["xgboost","pandas","openml==0.10.2","scikit-learn>=0.18.1"] } \ No newline at end of file diff --git a/extra_requirements/yahpo_gym.json b/extra_requirements/yahpo_gym.json index 77bea14d..10f4e390 100644 --- a/extra_requirements/yahpo_gym.json +++ b/extra_requirements/yahpo_gym.json @@ -1,3 +1,4 @@ { - "yahpo_gym": ["yahpo_gym@git+https://github.com/pfistfl/yahpo_gym#egg=yahpo_gym&subdirectory=yahpo_gym"] + "yahpo_gym": ["yahpo_gym@git+https://github.com/pfistfl/yahpo_gym#egg=yahpo_gym&subdirectory=yahpo_gym"], + "yahpo_gym_raw": ["yahpo_gym@git+https://github.com/pfistfl/yahpo_gym#egg=yahpo_gym&subdirectory=yahpo_gym", "rpy2>=3.5.0", "openml==0.10.2", "gitpython>=3.1"] } diff --git a/hpobench/abstract_benchmark.py b/hpobench/abstract_benchmark.py index 57e837c5..6a2942af 100644 --- a/hpobench/abstract_benchmark.py +++ b/hpobench/abstract_benchmark.py @@ -1,20 +1,20 @@ """ Base-class of all benchmarks """ import abc -from typing import Union, Dict, List, Tuple import functools - import logging +from typing import Union, Dict, List, Tuple + import ConfigSpace import numpy as np - from ConfigSpace.util import deactivate_inactive_hyperparameters + from hpobench.util import rng_helper logger = logging.getLogger('AbstractBenchmark') -class AbstractBenchmark(abc.ABC, metaclass=abc.ABCMeta): +class _BaseAbstractBenchmark(abc.ABC, metaclass=abc.ABCMeta): def __init__(self, rng: Union[int, np.random.RandomState, None] = None, **kwargs): """ @@ -34,7 +34,7 @@ def __init__(self, rng: Union[int, np.random.RandomState, None] = None, **kwargs np.random.RandomState with seed `rng` is created. If type is None, create a new random state. """ - + super(_BaseAbstractBenchmark, self).__init__(**kwargs) self.rng = rng_helper.get_rng(rng=rng) self.configuration_space = self.get_configuration_space(self.rng.randint(0, 10000)) self.fidelity_space = self.get_fidelity_space(self.rng.randint(0, 10000)) @@ -210,20 +210,14 @@ def _check_and_cast_fidelity(fidelity: Union[dict, ConfigSpace.Configuration, No fidelity_space.check_configuration(fidelity) return fidelity - @staticmethod - def _check_return_values(return_values: Dict) -> Dict: - """ - The return values should contain the fields `function_value` and `cost`. - """ - assert 'function_value' in return_values.keys() - assert 'cost' in return_values.keys() - - return return_values - def __call__(self, configuration: Dict, **kwargs) -> float: """ Provides interface to use, e.g., SciPy optimizers """ return self.objective_function(configuration, **kwargs)['function_value'] + @staticmethod + def _check_return_values(return_values: Dict) -> Dict: + raise NotImplementedError() + @staticmethod @abc.abstractmethod def get_configuration_space(seed: Union[int, None] = None) -> ConfigSpace.ConfigurationSpace: @@ -269,74 +263,39 @@ def get_meta_information() -> Dict: raise NotImplementedError() -class AbstractMultiObjectiveBenchmark(AbstractBenchmark): +class AbstractSingleObjectiveBenchmark(_BaseAbstractBenchmark): """ - Abstract Benchmark class for multi-objective benchmarks. - The only purpose of this class is to point out to users that this benchmark returns multiple - objective function values. + Abstract Benchmark class for single-objective benchmarks. + This corresponds to the old AbstractBenchmark class. + + The only purpose of this class is to point out to users that this benchmark returns only a single + objective function value. When writing a benchmark, please make sure to inherit from the correct abstract class. """ - @abc.abstractmethod - def objective_function(self, configuration: Union[ConfigSpace.Configuration, Dict], - fidelity: Union[Dict, ConfigSpace.Configuration, None] = None, - rng: Union[np.random.RandomState, int, None] = None, - **kwargs) -> Dict: - """ - Objective function. - - Override this function to provide your multi-objective benchmark function. This - function will be called by one of the evaluate functions. For - flexibility, you have to return a dictionary with the only mandatory - key being `function_values`, the objective function values for the - `configuration` which was passed. By convention, all benchmarks are - minimization problems. - `function_value` is a dictionary that contains all available criteria. + @staticmethod + def _check_return_values(return_values: Dict) -> Dict: + """ + The return values should contain the fields `function_value` and `cost`. + """ + assert 'function_value' in return_values.keys() + assert 'cost' in return_values.keys() + return return_values - Parameters - ---------- - configuration : Dict - fidelity: Dict, None - Fidelity parameters, check get_fidelity_space(). Uses default (max) value if None. - rng : np.random.RandomState, int, None - It might be useful to pass a `rng` argument to the function call to - bypass the default "seed" generator. Only using the default random - state (`self.rng`) could lead to an overfitting towards the - `self.rng`'s seed. - Returns - ------- - Dict - Must contain at least the key `function_value` and `cost`. - Note that `function_value` should be a Dict here. - """ - raise NotImplementedError() +# Ensure compatibility with older versions of the HPOBench +AbstractBenchmark = AbstractSingleObjectiveBenchmark - @abc.abstractmethod - def objective_function_test(self, configuration: Union[ConfigSpace.Configuration, Dict], - fidelity: Union[Dict, ConfigSpace.Configuration, None] = None, - rng: Union[np.random.RandomState, int, None] = None, - **kwargs) -> Dict: - """ - If there is a different objective function for offline testing, e.g - testing a machine learning on a hold extra test set instead - on a validation set override this function here. - Parameters - ---------- - configuration : Dict - fidelity: Dict, None - Fidelity parameters, check get_fidelity_space(). Uses default (max) value if None. - rng : np.random.RandomState, int, None - see :py:func:`~HPOBench.abstract_benchmark.objective_function` +class AbstractMultiObjectiveBenchmark(_BaseAbstractBenchmark): + """ + Abstract Benchmark class for multi-objective benchmarks. + The only purpose of this class is to point out to users that this benchmark returns multiple + objective function values. - Returns - ------- - Dict - Must contain at least the key `function_value` and `cost`. - """ - raise NotImplementedError() + When writing a benchmark, please make sure to inherit from the correct abstract class. + """ @staticmethod def _check_return_values(return_values: Dict) -> Dict: diff --git a/hpobench/benchmarks/ml/__init__.py b/hpobench/benchmarks/ml/__init__.py index 64e399cd..e69de29b 100644 --- a/hpobench/benchmarks/ml/__init__.py +++ b/hpobench/benchmarks/ml/__init__.py @@ -1,22 +0,0 @@ -from hpobench.benchmarks.ml.histgb_benchmark import HistGBBenchmark, HistGBBenchmarkBB, HistGBBenchmarkMF -from hpobench.benchmarks.ml.lr_benchmark import LRBenchmark, LRBenchmarkBB, LRBenchmarkMF -from hpobench.benchmarks.ml.nn_benchmark import NNBenchmark, NNBenchmarkBB, NNBenchmarkMF -from hpobench.benchmarks.ml.rf_benchmark import RandomForestBenchmark, RandomForestBenchmarkBB, \ - RandomForestBenchmarkMF -from hpobench.benchmarks.ml.svm_benchmark import SVMBenchmark, SVMBenchmarkBB, SVMBenchmarkMF -from hpobench.benchmarks.ml.tabular_benchmark import TabularBenchmark - -try: - from hpobench.benchmarks.ml.xgboost_benchmark import XGBoostBenchmark, XGBoostBenchmarkBB, XGBoostBenchmarkMF -except ImportError: - pass - - -__all__ = ['HistGBBenchmark', 'HistGBBenchmarkBB', 'HistGBBenchmarkMF', - 'LRBenchmark', 'LRBenchmarkBB', 'LRBenchmarkMF', - 'NNBenchmark', 'NNBenchmarkBB', 'NNBenchmarkMF', - 'RandomForestBenchmark', 'RandomForestBenchmarkBB', 'RandomForestBenchmarkMF', - 'SVMBenchmark', 'SVMBenchmarkBB', 'SVMBenchmarkMF', - 'TabularBenchmark', - 'XGBoostBenchmark', 'XGBoostBenchmarkBB', 'XGBoostBenchmarkMF', - ] diff --git a/hpobench/benchmarks/ml/lr_benchmark.py b/hpobench/benchmarks/ml/lr_benchmark.py index 8c317111..aa7aa162 100644 --- a/hpobench/benchmarks/ml/lr_benchmark.py +++ b/hpobench/benchmarks/ml/lr_benchmark.py @@ -4,30 +4,38 @@ 0.0.1: * First implementation of the LR Benchmarks. +0.0.2: +* Restructuring for consistency and to match ML Benchmark Template updates. +0.0.3: +* Adding Learning Curve support. """ - +import time from typing import Union, Tuple, Dict import ConfigSpace as CS import numpy as np +import pandas as pd from ConfigSpace.hyperparameters import Hyperparameter from sklearn.linear_model import SGDClassifier +from hpobench.util.rng_helper import get_rng from hpobench.dependencies.ml.ml_benchmark_template import MLBenchmark -__version__ = '0.0.1' +__version__ = '0.0.3' class LRBenchmark(MLBenchmark): - def __init__(self, - task_id: int, - rng: Union[np.random.RandomState, int, None] = None, - valid_size: float = 0.33, - data_path: Union[str, None] = None): - - super(LRBenchmark, self).__init__(task_id, rng, valid_size, data_path) - self.cache_size = 500 + """ Multi-multi-fidelity Logisitic Regression Benchmark + """ + def __init__( + self, + task_id: int, + valid_size: float = 0.33, + rng: Union[np.random.RandomState, int, None] = None, + data_path: Union[str, None] = None + ): + super(LRBenchmark, self).__init__(task_id, valid_size, rng, data_path) @staticmethod def get_configuration_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace: @@ -44,7 +52,8 @@ def get_configuration_space(seed: Union[int, None] = None) -> CS.ConfigurationSp ]) return cs - def get_fidelity_space(self, seed: Union[int, None] = None) -> CS.ConfigurationSpace: + @staticmethod + def get_fidelity_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace: fidelity_space = CS.ConfigurationSpace(seed=seed) fidelity_space.add_hyperparameters( # gray-box setting (multi-multi-fidelity) - iterations + data subsample @@ -53,17 +62,11 @@ def get_fidelity_space(self, seed: Union[int, None] = None) -> CS.ConfigurationS return fidelity_space @staticmethod - def _get_fidelity_choices(iter_choice: str, subsample_choice: str) -> Tuple[Hyperparameter, Hyperparameter]: + def _get_fidelity_choices( + iter_choice: str, subsample_choice: str + ) -> Tuple[Hyperparameter, Hyperparameter]: """Fidelity space available --- specifies the fidelity dimensions - - For SVM, only a single fidelity exists, i.e., subsample fraction. - if fidelity_choice == 0 - uses the entire data (subsample=1), reflecting the black-box setup - else - parameterizes the fraction of data to subsample - """ - assert iter_choice in ['fixed', 'variable'] assert subsample_choice in ['fixed', 'variable'] @@ -79,14 +82,16 @@ def _get_fidelity_choices(iter_choice: str, subsample_choice: str) -> Tuple[Hype 'subsample', lower=0.1, upper=1.0, default_value=1.0, log=False ) ) - iter = fidelity1[iter_choice] subsample = fidelity2[subsample_choice] return iter, subsample - def init_model(self, config: Union[CS.Configuration, Dict], - fidelity: Union[CS.Configuration, Dict, None] = None, - rng: Union[int, np.random.RandomState, None] = None): + def init_model( + self, + config: Union[CS.Configuration, Dict], + fidelity: Union[CS.Configuration, Dict, None] = None, + rng: Union[int, np.random.RandomState, None] = None + ): # initializing model rng = self.rng if rng is None else rng @@ -103,13 +108,185 @@ def init_model(self, config: Union[CS.Configuration, Dict], learning_rate="adaptive", tol=None, random_state=rng, - ) return model + def get_model_size(self, model: SGDClassifier = None) -> float: + """ Returns the dimensionality as a proxy for the number of model parameters + + Logistic Regression models have a fixed number of parameters given a dataset. Model size is + being approximated as the number of beta parameters required as the model support plus the + intercept. This depends on the dataset and not on the trained model. + + Parameters + ---------- + model : SGDClassifier + Trained LR model. This parameter is required to maintain function signature. + + Returns + ------- + float + """ + ndims = self.train_X.shape[1] + # accounting for the intercept + ndims += 1 + return ndims + + def _train_objective( + self, + config: Dict, + fidelity: Dict, + shuffle: bool, + rng: Union[np.random.RandomState, int, None] = None, + evaluation: Union[str, None] = "valid", + record_stats: bool = False, + get_learning_curve: bool = False, + lc_every_k: int = 1, + **kwargs + ): + """Function that instantiates a 'config' on a 'fidelity' and trains it + + The ML model is instantiated and trained on the training split. Optionally, the model is + evaluated on the training set. Optionally, the learning curves are collected. + + Parameters + ---------- + config : CS.Configuration, Dict + The hyperparameter configuration. + fidelity : CS.Configuration, Dict + The fidelity configuration. + shuffle : bool (optional) + If True, shuffles the training split before fitting the ML model. + rng : np.random.RandomState, int (optional) + The random seed passed to the ML model and if applicable, used for shuffling the data + and subsampling the dataset fraction. + evaluation : str (optional) + If "valid", the ML model is trained on the training set alone. + If "test", the ML model is trained on the training + validation sets. + record_stats : bool (optional) + If True, records the evaluation metrics of the trained ML model on the training set. + This is set to False by default to reduce overall compute time. + get_learning_curve : bool (optional) + If True, records the learning curve using partial_fit or warm starting, if applicable. + This is set to False by default to reduce overall compute time. + Enabling True, implies that the for each iteration, the model will be evaluated on both + the validation and test sets, optionally on the training set also. + lc_every_k : int (optional) + If True, records the learning curve after every k iterations. + """ + if rng is not None: + rng = get_rng(rng, self.rng) + + # initializing model + model = self.init_model(config, fidelity, rng) + + # preparing data + if evaluation == "valid": + train_X = self.train_X + train_y = self.train_y + elif evaluation == "test": + train_X = np.vstack((self.train_X, self.valid_X)) + train_y = pd.concat((self.train_y, self.valid_y)) + else: + raise ValueError("{} not in ['valid', 'test']".format(evaluation)) + train_idx = np.arange(len(train_X)) if self.train_idx is None else self.train_idx + + # shuffling data + if shuffle: + train_idx = self.shuffle_data_idx(train_idx, rng) + if isinstance(train_idx, np.ndarray): + train_X = train_X[train_idx] + else: + train_X = train_X.iloc[train_idx] + train_y = train_y.iloc[train_idx] + + # subsample here: + # application of the other fidelity to the dataset that the model interfaces + # carried over from previous HPOBench code that borrowed from FABOLAS' SVM + lower_bound_lim = 1.0 / 512.0 + if self.lower_bound_train_size is None: + self.lower_bound_train_size = (10 * self.n_classes) / self.train_X.shape[0] + self.lower_bound_train_size = np.max((lower_bound_lim, self.lower_bound_train_size)) + subsample = np.max((fidelity['subsample'], self.lower_bound_train_size)) + train_idx = self.rng.choice( + np.arange(len(train_X)), size=int( + subsample * len(train_X) + ) + ) + # fitting the model with subsampled data + if get_learning_curve: + # IMPORTANT to allow partial_fit + model.warm_start = True + lc_time = 0.0 + model_fit_time = 0.0 + learning_curves = dict(train=[], valid=[], test=[]) + lc_spacings = self._get_lc_spacing(model.max_iter, lc_every_k) + iter_start = 0 + for i in range(len(lc_spacings)): + iter_end = lc_spacings[i] + start = time.time() + # trains model for k steps + for j in range(iter_end - iter_start): + model.partial_fit( + train_X[train_idx], + train_y.iloc[train_idx], + np.unique(train_y.iloc[train_idx]) + ) + # adding all partial fit times + model_fit_time += time.time() - start + iter_start = iter_end + lc_start = time.time() + if record_stats: + train_pred = model.predict(train_X) + train_loss = 1 - self.scorers['acc']( + train_y, train_pred, **self.scorer_args['acc'] + ) + learning_curves['train'].append(train_loss) + val_pred = model.predict(self.valid_X) + val_loss = 1 - self.scorers['acc']( + self.valid_y, val_pred, **self.scorer_args['acc'] + ) + learning_curves['valid'].append(val_loss) + test_pred = model.predict(self.test_X) + test_loss = 1 - self.scorers['acc']( + self.test_y, test_pred, **self.scorer_args['acc'] + ) + learning_curves['test'].append(test_loss) + # sums the time taken to evaluate and collect data for the learning curves + lc_time += time.time() - lc_start + else: + # default training as per the base benchmark template + learning_curves = None + lc_time = None + start = time.time() + model.fit(train_X[train_idx], train_y.iloc[train_idx]) + model_fit_time = time.time() - start + # model inference + inference_time = 0.0 + # can optionally not record evaluation metrics on training set to save compute + if record_stats: + start = time.time() + pred_train = model.predict(train_X) + inference_time = time.time() - start + # computing statistics on training data + scores = dict() + score_cost = dict() + for k, v in self.scorers.items(): + scores[k] = 0.0 + score_cost[k] = 0.0 + _start = time.time() + if record_stats: + scores[k] = v(train_y, pred_train, **self.scorer_args[k]) + score_cost[k] = time.time() - _start + inference_time + train_loss = 1 - scores["acc"] + return model, model_fit_time, train_loss, scores, score_cost, learning_curves, lc_time + class LRBenchmarkBB(LRBenchmark): - def get_fidelity_space(self, seed: Union[int, None] = None) -> CS.ConfigurationSpace: + """ Black-box version of the LRBenchmark + """ + @staticmethod + def get_fidelity_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace: fidelity_space = CS.ConfigurationSpace(seed=seed) fidelity_space.add_hyperparameters( # black-box setting (full fidelity) @@ -119,7 +296,10 @@ def get_fidelity_space(self, seed: Union[int, None] = None) -> CS.ConfigurationS class LRBenchmarkMF(LRBenchmark): - def get_fidelity_space(self, seed: Union[int, None] = None) -> CS.ConfigurationSpace: + """ Multi-fidelity version of the LRBenchmark + """ + @staticmethod + def get_fidelity_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace: fidelity_space = CS.ConfigurationSpace(seed=seed) fidelity_space.add_hyperparameters( # gray-box setting (multi-fidelity) - iterations diff --git a/hpobench/benchmarks/ml/nn_benchmark.py b/hpobench/benchmarks/ml/nn_benchmark.py index 06634661..4263278f 100644 --- a/hpobench/benchmarks/ml/nn_benchmark.py +++ b/hpobench/benchmarks/ml/nn_benchmark.py @@ -4,28 +4,39 @@ 0.0.1: * First implementation of the NN Benchmarks. +0.0.2: +* Restructuring for consistency and to match ML Benchmark Template updates. +0.0.3: +* Adding Learning Curve support. """ +import time from copy import deepcopy from typing import Union, Tuple, Dict import ConfigSpace as CS import numpy as np +import pandas as pd from ConfigSpace.hyperparameters import Hyperparameter from sklearn.neural_network import MLPClassifier +from hpobench.util.rng_helper import get_rng from hpobench.dependencies.ml.ml_benchmark_template import MLBenchmark -__version__ = '0.0.1' +__version__ = '0.0.3' class NNBenchmark(MLBenchmark): - def __init__(self, - task_id: int, - rng: Union[np.random.RandomState, int, None] = None, - valid_size: float = 0.33, - data_path: Union[str, None] = None): - super(NNBenchmark, self).__init__(task_id, rng, valid_size, data_path) + """ Multi-multi-fidelity Multi-Layer Perceptron Benchmark + """ + def __init__( + self, + task_id: int, + valid_size: float = 0.33, + rng: Union[np.random.RandomState, int, None] = None, + data_path: Union[str, None] = None + ): + super(NNBenchmark, self).__init__(task_id, valid_size, rng, data_path) @staticmethod def get_configuration_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace: @@ -63,8 +74,11 @@ def get_fidelity_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace: return fidelity_space @staticmethod - def _get_fidelity_choices(iter_choice: str, subsample_choice: str) -> Tuple[Hyperparameter, Hyperparameter]: - + def _get_fidelity_choices( + iter_choice: str, subsample_choice: str + ) -> Tuple[Hyperparameter, Hyperparameter]: + """Fidelity space available --- specifies the fidelity dimensions + """ fidelity1 = dict( fixed=CS.Constant('iter', value=243), variable=CS.UniformIntegerHyperparameter( @@ -81,11 +95,13 @@ def _get_fidelity_choices(iter_choice: str, subsample_choice: str) -> Tuple[Hype subsample = fidelity2[subsample_choice] return iter, subsample - def init_model(self, config: Union[CS.Configuration, Dict], - fidelity: Union[CS.Configuration, Dict, None] = None, - rng: Union[int, np.random.RandomState, None] = None): - """ Function that returns the model initialized based on the configuration and fidelity - """ + def init_model( + self, + config: Union[CS.Configuration, Dict], + fidelity: Union[CS.Configuration, Dict, None] = None, + rng: Union[int, np.random.RandomState, None] = None + ): + # initializing model rng = self.rng if rng is None else rng if isinstance(config, CS.Configuration): @@ -99,6 +115,7 @@ def init_model(self, config: Union[CS.Configuration, Dict], config.pop("depth") config.pop("width") hidden_layers = [width] * depth + # TODO: check for iteration length and edit n_iter_no_change maybe model = MLPClassifier( **config, hidden_layer_sizes=hidden_layers, @@ -109,9 +126,175 @@ def init_model(self, config: Union[CS.Configuration, Dict], ) return model + def get_model_size(self, model: MLPClassifier) -> float: + """ Returns the total number of trained parameters in the MLP model + + Parameters + ---------- + model : MLPClassifier + Trained MLP model. + + Returns + ------- + float + """ + nparams = 0 + for layer in model.coefs_: + nparams += layer.shape[0] * layer.shape[1] + for layer in model.intercepts_: + nparams += layer.shape[0] + return nparams + + def _train_objective( + self, + config: Dict, + fidelity: Dict, + shuffle: bool, + rng: Union[np.random.RandomState, int, None] = None, + evaluation: Union[str, None] = "valid", + record_stats: bool = False, + get_learning_curve: bool = False, + lc_every_k: int = 1, + **kwargs + ): + """Function that instantiates a 'config' on a 'fidelity' and trains it + + The ML model is instantiated and trained on the training split. Optionally, the model is + evaluated on the training set. Optionally, the learning curves are collected. + + Parameters + ---------- + config : CS.Configuration, Dict + The hyperparameter configuration. + fidelity : CS.Configuration, Dict + The fidelity configuration. + shuffle : bool (optional) + If True, shuffles the training split before fitting the ML model. + rng : np.random.RandomState, int (optional) + The random seed passed to the ML model and if applicable, used for shuffling the data + and subsampling the dataset fraction. + evaluation : str (optional) + If "valid", the ML model is trained on the training set alone. + If "test", the ML model is trained on the training + validation sets. + record_stats : bool (optional) + If True, records the evaluation metrics of the trained ML model on the training set. + This is set to False by default to reduce overall compute time. + get_learning_curve : bool (optional) + If True, records the learning curve using partial_fit or warm starting, if applicable. + This is set to False by default to reduce overall compute time. + Enabling True, implies that the for each iteration, the model will be evaluated on both + the validation and test sets, optionally on the training set also. + lc_every_k : int (optional) + If True, records the learning curve after every k iterations. + """ + if rng is not None: + rng = get_rng(rng, self.rng) + + # initializing model + model = self.init_model(config, fidelity, rng) + + # preparing data + if evaluation == "valid": + train_X = self.train_X + train_y = self.train_y + elif evaluation == "test": + train_X = np.vstack((self.train_X, self.valid_X)) + train_y = pd.concat((self.train_y, self.valid_y)) + else: + raise ValueError("{} not in ['valid', 'test']".format(evaluation)) + train_idx = np.arange(len(train_X)) if self.train_idx is None else self.train_idx + + # shuffling data + if shuffle: + train_idx = self.shuffle_data_idx(train_idx, rng) + if isinstance(train_idx, np.ndarray): + train_X = train_X[train_idx] + else: + train_X = train_X.iloc[train_idx] + train_y = train_y.iloc[train_idx] + + # subsample here: + # application of the other fidelity to the dataset that the model interfaces + if self.lower_bound_train_size is None: + self.lower_bound_train_size = (10 * self.n_classes) / self.train_X.shape[0] + self.lower_bound_train_size = np.max((1 / 512, self.lower_bound_train_size)) + subsample = np.max((fidelity['subsample'], self.lower_bound_train_size)) + train_idx = self.rng.choice( + np.arange(len(train_X)), size=int( + subsample * len(train_X) + ) + ) + # fitting the model with subsampled data + if get_learning_curve: + # IMPORTANT to allow partial_fit + model.warm_start = True + lc_time = 0.0 + model_fit_time = 0.0 + learning_curves = dict(train=[], valid=[], test=[]) + lc_spacings = self._get_lc_spacing(model.max_iter, lc_every_k) + iter_start = 0 + for i in range(len(lc_spacings)): + iter_end = lc_spacings[i] + start = time.time() + # trains model for k steps + for j in range(iter_end - iter_start): + model.partial_fit( + train_X[train_idx], + train_y.iloc[train_idx], + np.unique(train_y.iloc[train_idx]) + ) + # adding all partial fit times + model_fit_time += time.time() - start + lc_start = time.time() + if record_stats: + train_pred = model.predict(train_X) + train_loss = 1 - self.scorers['acc']( + train_y, train_pred, **self.scorer_args['acc'] + ) + learning_curves['train'].append(train_loss) + val_pred = model.predict(self.valid_X) + val_loss = 1 - self.scorers['acc']( + self.valid_y, val_pred, **self.scorer_args['acc'] + ) + learning_curves['valid'].append(val_loss) + test_pred = model.predict(self.test_X) + test_loss = 1 - self.scorers['acc']( + self.test_y, test_pred, **self.scorer_args['acc'] + ) + learning_curves['test'].append(test_loss) + lc_time += time.time() - lc_start + else: + learning_curves = None + lc_time = None + start = time.time() + model.fit(train_X[train_idx], train_y.iloc[train_idx]) + model_fit_time = time.time() - start + # model inference + inference_time = 0.0 + # can optionally not record evaluation metrics on training set to save compute + if record_stats: + start = time.time() + pred_train = model.predict(train_X) + inference_time = time.time() - start + # computing statistics on training data + scores = dict() + score_cost = dict() + for k, v in self.scorers.items(): + scores[k] = 0.0 + score_cost[k] = 0.0 + _start = time.time() + if record_stats: + scores[k] = v(train_y, pred_train, **self.scorer_args[k]) + score_cost[k] = time.time() - _start + inference_time + train_loss = 1 - scores["acc"] + return model, model_fit_time, train_loss, scores, score_cost, learning_curves, lc_time + class NNBenchmarkBB(NNBenchmark): - def get_fidelity_space(self, seed: Union[int, None] = None) -> CS.ConfigurationSpace: + """ Black-box version of the NNBenchmark + """ + @staticmethod + def get_fidelity_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace: fidelity_space = CS.ConfigurationSpace(seed=seed) fidelity_space.add_hyperparameters( # black-box setting (full fidelity) @@ -121,7 +304,10 @@ def get_fidelity_space(self, seed: Union[int, None] = None) -> CS.ConfigurationS class NNBenchmarkMF(NNBenchmark): - def get_fidelity_space(self, seed: Union[int, None] = None) -> CS.ConfigurationSpace: + """ Multi-fidelity version of the NNBenchmark + """ + @staticmethod + def get_fidelity_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace: fidelity_space = CS.ConfigurationSpace(seed=seed) fidelity_space.add_hyperparameters( # gray-box setting (multi-fidelity) - iterations diff --git a/hpobench/benchmarks/ml/rbv2_benchmark.py b/hpobench/benchmarks/ml/rbv2_benchmark.py new file mode 100644 index 00000000..e69de29b diff --git a/hpobench/benchmarks/ml/rf_benchmark.py b/hpobench/benchmarks/ml/rf_benchmark.py index 596f03b6..b6874788 100644 --- a/hpobench/benchmarks/ml/rf_benchmark.py +++ b/hpobench/benchmarks/ml/rf_benchmark.py @@ -4,28 +4,39 @@ 0.0.1: * First implementation of the RF Benchmarks. +0.0.2: +* Restructuring for consistency and to match ML Benchmark Template updates. +0.0.3: +* Adding Learning Curve support. """ +import time from copy import deepcopy from typing import Union, Tuple, Dict import ConfigSpace as CS import numpy as np +import pandas as pd from ConfigSpace.hyperparameters import Hyperparameter from sklearn.ensemble import RandomForestClassifier +from hpobench.util.rng_helper import get_rng from hpobench.dependencies.ml.ml_benchmark_template import MLBenchmark -__version__ = '0.0.1' +__version__ = '0.0.3' class RandomForestBenchmark(MLBenchmark): - def __init__(self, - task_id: int, - rng: Union[np.random.RandomState, int, None] = None, - valid_size: float = 0.33, - data_path: Union[str, None] = None): - super(RandomForestBenchmark, self).__init__(task_id, rng, valid_size, data_path) + """ Multi-multi-fidelity Random Forest Benchmark + """ + def __init__( + self, + task_id: int, + valid_size: float = 0.33, + rng: Union[np.random.RandomState, int, None] = None, + data_path: Union[str, None] = None + ): + super(RandomForestBenchmark, self).__init__(task_id, valid_size, rng, data_path) @staticmethod def get_configuration_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace: @@ -54,12 +65,16 @@ def get_fidelity_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace: fidelity_space = CS.ConfigurationSpace(seed=seed) fidelity_space.add_hyperparameters( # gray-box setting (multi-multi-fidelity) - ntrees + data subsample - RandomForestBenchmark._get_fidelity_choices(n_estimators_choice='variable', subsample_choice='variable') + RandomForestBenchmark._get_fidelity_choices( + n_estimators_choice='variable', subsample_choice='variable' + ) ) return fidelity_space @staticmethod - def _get_fidelity_choices(n_estimators_choice: str, subsample_choice: str) -> Tuple[Hyperparameter, Hyperparameter]: + def _get_fidelity_choices( + n_estimators_choice: str, subsample_choice: str + ) -> Tuple[Hyperparameter, Hyperparameter]: assert n_estimators_choice in ['fixed', 'variable'] assert subsample_choice in ['fixed', 'variable'] @@ -70,7 +85,6 @@ def _get_fidelity_choices(n_estimators_choice: str, subsample_choice: str) -> Tu 'n_estimators', lower=16, upper=512, default_value=512, log=False ) ) - fidelity2 = dict( fixed=CS.Constant('subsample', value=1), variable=CS.UniformFloatHyperparameter( @@ -81,11 +95,13 @@ def _get_fidelity_choices(n_estimators_choice: str, subsample_choice: str) -> Tu subsample = fidelity2[subsample_choice] return n_estimators, subsample - def init_model(self, config: Union[CS.Configuration, Dict], - fidelity: Union[CS.Configuration, Dict, None] = None, - rng: Union[int, np.random.RandomState, None] = None): - """ Function that returns the model initialized based on the configuration and fidelity - """ + def init_model( + self, + config: Union[CS.Configuration, Dict], + fidelity: Union[CS.Configuration, Dict, None] = None, + rng: Union[int, np.random.RandomState, None] = None + ): + # initializing model rng = self.rng if rng is None else rng if isinstance(config, CS.Configuration): config = config.get_dictionary() @@ -103,23 +119,194 @@ def init_model(self, config: Union[CS.Configuration, Dict], ) return model + def get_model_size(self, model: RandomForestClassifier) -> float: + """ Returns the total number of decision nodes in the entire Random Forest model + + Parameters + ---------- + model : RandomForestClassifier + Trained RF model. + + Returns + ------- + float + """ + nodes = 0 + for tree in model.estimators_: + # total number of nodes in the tree (internal + leaf) + nodes += tree.tree_.node_count + return nodes + + def _train_objective( + self, + config: Dict, + fidelity: Dict, + shuffle: bool, + rng: Union[np.random.RandomState, int, None] = None, + evaluation: Union[str, None] = "valid", + record_stats: bool = False, + get_learning_curve: bool = False, + lc_every_k: int = 1, + **kwargs + ): + """Function that instantiates a 'config' on a 'fidelity' and trains it + + The ML model is instantiated and trained on the training split. Optionally, the model is + evaluated on the training set. Optionally, the learning curves are collected. + + Parameters + ---------- + config : CS.Configuration, Dict + The hyperparameter configuration. + fidelity : CS.Configuration, Dict + The fidelity configuration. + shuffle : bool (optional) + If True, shuffles the training split before fitting the ML model. + rng : np.random.RandomState, int (optional) + The random seed passed to the ML model and if applicable, used for shuffling the data + and subsampling the dataset fraction. + evaluation : str (optional) + If "valid", the ML model is trained on the training set alone. + If "test", the ML model is trained on the training + validation sets. + record_stats : bool (optional) + If True, records the evaluation metrics of the trained ML model on the training set. + This is set to False by default to reduce overall compute time. + get_learning_curve : bool (optional) + If True, records the learning curve using partial_fit or warm starting, if applicable. + This is set to False by default to reduce overall compute time. + Enabling True, implies that the for each iteration, the model will be evaluated on both + the validation and test sets, optionally on the training set also. + lc_every_k : int (optional) + If True, records the learning curve after every k iterations. + """ + if rng is not None: + rng = get_rng(rng, self.rng) + + # initializing model + model = self.init_model(config, fidelity, rng) + + # preparing data + if evaluation == "valid": + train_X = self.train_X + train_y = self.train_y + elif evaluation == "test": + train_X = np.vstack((self.train_X, self.valid_X)) + train_y = pd.concat((self.train_y, self.valid_y)) + else: + raise ValueError("{} not in ['valid', 'test']".format(evaluation)) + train_idx = np.arange(len(train_X)) if self.train_idx is None else self.train_idx + + # shuffling data + if shuffle: + train_idx = self.shuffle_data_idx(train_idx, rng) + if isinstance(train_idx, np.ndarray): + train_X = train_X[train_idx] + else: + train_X = train_X.iloc[train_idx] + train_y = train_y.iloc[train_idx] + + # subsample here: + # application of the other fidelity to the dataset that the model interfaces + # carried over from previous HPOBench code that borrowed from FABOLAS' SVM + lower_bound_lim = 1.0 / 512.0 + if self.lower_bound_train_size is None: + self.lower_bound_train_size = (10 * self.n_classes) / self.train_X.shape[0] + self.lower_bound_train_size = np.max((lower_bound_lim, self.lower_bound_train_size)) + subsample = np.max((fidelity['subsample'], self.lower_bound_train_size)) + train_idx = self.rng.choice( + np.arange(len(train_X)), size=int( + subsample * len(train_X) + ) + ) + # fitting the model with subsampled data + if get_learning_curve: + lc_spacings = self._get_lc_spacing(model.n_estimators, lc_every_k) + # IMPORTANT to allow refitting with more estimators + model.warm_start = True + model.n_estimators = 0 + lc_time = 0.0 + model_fit_time = 0.0 + learning_curves = dict(train=[], valid=[], test=[]) + iter_start = 0 + # for i in range(fidelity['n_estimators']): + for i in range(len(lc_spacings)): + iter_end = lc_spacings[i] + start = time.time() + # adds k new estimators to the model for training + model.n_estimators += iter_end - iter_start + model.fit(train_X[train_idx], train_y.iloc[train_idx]) + model_fit_time += time.time() - start + lc_start = time.time() + if record_stats: + train_pred = model.predict(train_X) + train_loss = 1 - self.scorers['acc']( + train_y, train_pred, **self.scorer_args['acc'] + ) + learning_curves['train'].append(train_loss) + val_pred = model.predict(self.valid_X) + val_loss = 1 - self.scorers['acc']( + self.valid_y, val_pred, **self.scorer_args['acc'] + ) + learning_curves['valid'].append(val_loss) + test_pred = model.predict(self.test_X) + test_loss = 1 - self.scorers['acc']( + self.test_y, test_pred, **self.scorer_args['acc'] + ) + learning_curves['test'].append(test_loss) + lc_time += time.time() - lc_start + else: + learning_curves = None + lc_time = None + start = time.time() + model.fit(train_X[train_idx], train_y.iloc[train_idx]) + model_fit_time = time.time() - start + # model inference + inference_time = 0.0 + # can optionally not record evaluation metrics on training set to save compute + if record_stats: + start = time.time() + pred_train = model.predict(train_X) + inference_time = time.time() - start + # computing statistics on training data + scores = dict() + score_cost = dict() + for k, v in self.scorers.items(): + scores[k] = 0.0 + score_cost[k] = 0.0 + _start = time.time() + if record_stats: + scores[k] = v(train_y, pred_train, **self.scorer_args[k]) + score_cost[k] = time.time() - _start + inference_time + train_loss = 1 - scores["acc"] + return model, model_fit_time, train_loss, scores, score_cost, learning_curves, lc_time + class RandomForestBenchmarkBB(RandomForestBenchmark): - def get_fidelity_space(self, seed: Union[int, None] = None) -> CS.ConfigurationSpace: + """ Black-box version of the RandomForestBenchmark + """ + @staticmethod + def get_fidelity_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace: fidelity_space = CS.ConfigurationSpace(seed=seed) fidelity_space.add_hyperparameters( # black-box setting (full fidelity) - RandomForestBenchmark._get_fidelity_choices(n_estimators_choice='fixed', subsample_choice='fixed') + RandomForestBenchmark._get_fidelity_choices( + n_estimators_choice='fixed', subsample_choice='fixed' + ) ) return fidelity_space class RandomForestBenchmarkMF(RandomForestBenchmark): - def get_fidelity_space(self, seed: Union[int, None] = None) -> CS.ConfigurationSpace: + """ Multi-fidelity version of the RandomForestBenchmark + """ + @staticmethod + def get_fidelity_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace: fidelity_space = CS.ConfigurationSpace(seed=seed) fidelity_space.add_hyperparameters( # gray-box setting (multi-fidelity) - ntrees - RandomForestBenchmark._get_fidelity_choices(n_estimators_choice='variable', subsample_choice='fixed') + RandomForestBenchmark._get_fidelity_choices( + n_estimators_choice='variable', subsample_choice='fixed' + ) ) return fidelity_space diff --git a/hpobench/benchmarks/ml/svm_benchmark.py b/hpobench/benchmarks/ml/svm_benchmark.py index 9462442f..c7b6a816 100644 --- a/hpobench/benchmarks/ml/svm_benchmark.py +++ b/hpobench/benchmarks/ml/svm_benchmark.py @@ -4,6 +4,10 @@ 0.0.1: * First implementation of the new SVM Benchmarks. +0.0.2: +* Restructuring for consistency and to match ML Benchmark Template updates. +0.0.3: +* Adding Learning Curve support. """ from typing import Union, Dict @@ -15,18 +19,21 @@ from hpobench.dependencies.ml.ml_benchmark_template import MLBenchmark -__version__ = '0.0.1' +__version__ = '0.0.3' class SVMBenchmark(MLBenchmark): - def __init__(self, - task_id: int, - rng: Union[np.random.RandomState, int, None] = None, - valid_size: float = 0.33, - data_path: Union[str, None] = None): - super(SVMBenchmark, self).__init__(task_id, rng, valid_size, data_path) - - self.cache_size = 200 + """ Multi-multi-fidelity SVM Benchmark + """ + def __init__( + self, + task_id: int, + valid_size: float = 0.33, + rng: Union[np.random.RandomState, int, None] = None, + data_path: Union[str, None] = None + ): + super(SVMBenchmark, self).__init__(task_id, valid_size, rng, data_path) + self.cache_size = 1024 # in MB @staticmethod def get_configuration_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace: @@ -54,7 +61,8 @@ def get_fidelity_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace: @staticmethod def _get_fidelity_choices(subsample_choice: str) -> Hyperparameter: - + """Fidelity space available --- specifies the fidelity dimensions + """ assert subsample_choice in ['fixed', 'variable'] fidelity = dict( @@ -64,12 +72,14 @@ def _get_fidelity_choices(subsample_choice: str) -> Hyperparameter: ) ) subsample = fidelity[subsample_choice] - return subsample - def init_model(self, config: Union[CS.Configuration, Dict], - fidelity: Union[CS.Configuration, Dict, None] = None, - rng: Union[int, np.random.RandomState, None] = None): + def init_model( + self, + config: Union[CS.Configuration, Dict], + fidelity: Union[CS.Configuration, Dict, None] = None, + rng: Union[int, np.random.RandomState, None] = None + ): # initializing model rng = self.rng if rng is None else rng if isinstance(config, CS.Configuration): @@ -81,9 +91,27 @@ def init_model(self, config: Union[CS.Configuration, Dict], ) return model + def get_model_size(self, model: SVC) -> float: + """ Returns the number of support vectors in the SVM model + + Parameters + ---------- + model : SVC + Trained SVM model. + + Returns + ------- + float + """ + nsupport = model.support_.shape[0] + return nsupport + class SVMBenchmarkBB(SVMBenchmark): - def get_fidelity_space(self, seed: Union[int, None] = None) -> CS.ConfigurationSpace: + """ Black-box version of the SVMBenchmark + """ + @staticmethod + def get_fidelity_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace: fidelity_space = CS.ConfigurationSpace(seed=seed) fidelity_space.add_hyperparameter( # uses the entire data (subsample=1), reflecting the black-box setup diff --git a/hpobench/benchmarks/ml/svm_benchmark_old.py b/hpobench/benchmarks/ml/svm_benchmark_old.py deleted file mode 100644 index 9aad5e44..00000000 --- a/hpobench/benchmarks/ml/svm_benchmark_old.py +++ /dev/null @@ -1,354 +0,0 @@ -""" - -Changelog: -========== -0.0.3 -* New container release due to a general change in the communication between container and HPOBench. - Works with HPOBench >= v0.0.8 - -0.0.2: -* Standardize the structure of the meta information - -0.0.1: -* First implementation - -""" - -import logging -import time -from typing import Union, Tuple, Dict, List - -import ConfigSpace as CS -import numpy as np -from scipy import sparse -from sklearn import pipeline -from sklearn import svm -from sklearn.compose import ColumnTransformer -from sklearn.impute import SimpleImputer -from sklearn.metrics import accuracy_score, make_scorer -from sklearn.preprocessing import OneHotEncoder, MinMaxScaler - -import hpobench.util.rng_helper as rng_helper -from hpobench.abstract_benchmark import AbstractBenchmark -from hpobench.util.openml_data_manager import OpenMLHoldoutDataManager - -__version__ = '0.0.3' - -logger = logging.getLogger('SVMBenchmark') - - -class SupportVectorMachine(AbstractBenchmark): - """ - Hyperparameter optimization task to optimize the regularization - parameter C and the kernel parameter gamma of a support vector machine. - Both hyperparameters are optimized on a log scale in [-10, 10]. - The X_test data set is only used for a final offline evaluation of - a configuration. For that the validation and training data is - concatenated to form the whole training data set. - """ - - def __init__(self, task_id: Union[int, None] = None, - rng: Union[np.random.RandomState, int, None] = None): - """ - Parameters - ---------- - task_id : int, None - rng : np.random.RandomState, int, None - """ - super(SupportVectorMachine, self).__init__(rng=rng) - - self.task_id = task_id - self.cache_size = 200 # Cache for the SVC in MB - self.accuracy_scorer = make_scorer(accuracy_score) - - self.x_train, self.y_train, self.x_valid, self.y_valid, self.x_test, self.y_test, variable_types = \ - self.get_data() - self.categorical_data = np.array([var_type == 'categorical' for var_type in variable_types]) - - # Sort data (Categorical + numerical) so that categorical and continous are not mixed. - categorical_idx = np.argwhere(self.categorical_data) - continuous_idx = np.argwhere(~self.categorical_data) - sorting = np.concatenate([categorical_idx, continuous_idx]).squeeze() - self.categorical_data = self.categorical_data[sorting] - self.x_train = self.x_train[:, sorting] - self.x_valid = self.x_valid[:, sorting] - self.x_test = self.x_test[:, sorting] - - nan_columns = np.all(np.isnan(self.x_train), axis=0) - self.categorical_data = self.categorical_data[~nan_columns] - self.x_train, self.x_valid, self.x_test, self.categories = \ - OpenMLHoldoutDataManager.replace_nans_in_cat_columns(self.x_train, self.x_valid, self.x_test, - is_categorical=self.categorical_data) - - self.train_idx = self.rng.choice(a=np.arange(len(self.x_train)), - size=len(self.x_train), - replace=False) - - # Similar to [Fast Bayesian Optimization of Machine Learning Hyperparameters on Large Datasets] - # (https://arxiv.org/pdf/1605.07079.pdf), - # use 10 time the number of classes as lower bound for the dataset fraction - n_classes = np.unique(self.y_train).shape[0] - self.lower_bound_train_size = (10 * n_classes) / self.x_train.shape[0] - - def get_data(self) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, List]: - """ Loads the data given a task or another source. """ - - assert self.task_id is not None, NotImplementedError('No task-id given. Please either specify a task-id or ' - 'overwrite the get_data method.') - - data_manager = OpenMLHoldoutDataManager(openml_task_id=self.task_id, rng=self.rng) - x_train, y_train, x_val, y_val, x_test, y_test = data_manager.load() - - return x_train, y_train, x_val, y_val, x_test, y_test, data_manager.variable_types - - def shuffle_data(self, rng=None): - """ Reshuffle the training data. If 'rng' is None, the training idx are shuffled according to the - class-random-state""" - random_state = rng_helper.get_rng(rng, self.rng) - random_state.shuffle(self.train_idx) - - # pylint: disable=arguments-differ - @AbstractBenchmark.check_parameters - def objective_function(self, configuration: Union[CS.Configuration, Dict], - fidelity: Union[CS.Configuration, Dict, None] = None, - shuffle: bool = False, - rng: Union[np.random.RandomState, int, None] = None, **kwargs) -> Dict: - """ - Trains a SVM model given a hyperparameter configuration and - evaluates the model on the validation set. - - Parameters - ---------- - configuration : Dict, CS.Configuration - Configuration for the SVM model - fidelity: Dict, None - Fidelity parameters for the SVM model, check get_fidelity_space(). Uses default (max) value if None. - shuffle : bool - If ``True``, shuffle the training idx. If no parameter ``rng`` is given, use the class random state. - Defaults to ``False``. - rng : np.random.RandomState, int, None, - Random seed for benchmark. By default the class level random seed. - - To prevent overfitting on a single seed, it is possible to pass a - parameter ``rng`` as 'int' or 'np.random.RandomState' to this function. - If this parameter is not given, the default random state is used. - kwargs - - Returns - ------- - Dict - - function_value : validation loss - cost : time to train and evaluate the model - info : Dict - train_loss : training loss - fidelity : used fidelities in this evaluation - """ - start_time = time.time() - - self.rng = rng_helper.get_rng(rng=rng, self_rng=self.rng) - - if shuffle: - self.shuffle_data(self.rng) - - # Split of dataset subset - if self.lower_bound_train_size > fidelity['dataset_fraction']: - train_size = self.lower_bound_train_size - logger.warning(f'The given data set fraction is lower than the lower bound (10 * number of classes.) ' - f'Increase the fidelity from {fidelity["dataset_fraction"]:.8f} to ' - f'{self.lower_bound_train_size:.8f}') - else: - train_size = fidelity['dataset_fraction'] - - train_size = int(train_size * len(self.train_idx)) - train_idx = self.train_idx[:train_size] - - # Transform hyperparameters to linear scale - hp_c = np.exp(float(configuration['C'])) - hp_gamma = np.exp(float(configuration['gamma'])) - - # Train support vector machine - model = self.get_pipeline(hp_c, hp_gamma) - model.fit(self.x_train[train_idx], self.y_train[train_idx]) - - # Compute validation error - train_loss = 1 - self.accuracy_scorer(model, self.x_train[train_idx], self.y_train[train_idx]) - val_loss = 1 - self.accuracy_scorer(model, self.x_valid, self.y_valid) - - cost = time.time() - start_time - - return {'function_value': float(val_loss), - "cost": cost, - 'info': {'train_loss': float(train_loss), - 'fidelity': fidelity}} - - # pylint: disable=arguments-differ - @AbstractBenchmark.check_parameters - def objective_function_test(self, configuration: Union[CS.Configuration, Dict], - fidelity: Union[CS.Configuration, Dict, None] = None, - shuffle: bool = False, - rng: Union[np.random.RandomState, int, None] = None, **kwargs) -> Dict: - """ - Trains a SVM model with a given configuration on both the X_train - and validation data set and evaluates the model on the X_test data set. - - Parameters - ---------- - configuration : Dict, CS.Configuration - Configuration for the SVM Model - fidelity: Dict, None - Fidelity parameters, check get_fidelity_space(). Uses default (max) value if None. - shuffle : bool - If ``True``, shuffle the training idx. If no parameter ``rng`` is given, use the class random state. - Defaults to ``False``. - rng : np.random.RandomState, int, None, - Random seed for benchmark. By default the class level random seed. - To prevent overfitting on a single seed, it is possible to pass a - parameter ``rng`` as 'int' or 'np.random.RandomState' to this function. - If this parameter is not given, the default random state is used. - kwargs - - Returns - ------- - Dict - - function_value : X_test loss - cost : time to X_train and evaluate the model - info : Dict - train_valid_loss: Loss on the train+valid data set - fidelity : used fidelities in this evaluation - """ - assert np.isclose(fidelity['dataset_fraction'], 1), \ - f'Data set fraction must be 1 but was {fidelity["dataset_fraction"]}' - - self.rng = rng_helper.get_rng(rng=rng, self_rng=self.rng) - - if shuffle: - self.shuffle_data(self.rng) - - start_time = time.time() - - # Concatenate training and validation dataset - if isinstance(self.x_train, sparse.csr.csr_matrix) or isinstance(self.x_valid, sparse.csr.csr_matrix): - data = sparse.vstack((self.x_train, self.x_valid)) - else: - data = np.concatenate((self.x_train, self.x_valid)) - targets = np.concatenate((self.y_train, self.y_valid)) - - # Transform hyperparameters to linear scale - hp_c = np.exp(float(configuration['C'])) - hp_gamma = np.exp(float(configuration['gamma'])) - - model = self.get_pipeline(hp_c, hp_gamma) - model.fit(data, targets) - - # Compute validation error - train_valid_loss = 1 - self.accuracy_scorer(model, data, targets) - - # Compute test error - test_loss = 1 - self.accuracy_scorer(model, self.x_test, self.y_test) - - cost = time.time() - start_time - - return {'function_value': float(test_loss), - "cost": cost, - 'info': {'train_valid_loss': float(train_valid_loss), - 'fidelity': fidelity}} - - def get_pipeline(self, C: float, gamma: float) -> pipeline.Pipeline: - """ Create the scikit-learn (training-)pipeline """ - - model = pipeline.Pipeline([ - ('preprocess_impute', - ColumnTransformer([ - ("categorical", "passthrough", self.categorical_data), - ("continuous", SimpleImputer(strategy="mean"), ~self.categorical_data)])), - ('preprocess_one_hot', - ColumnTransformer([ - ("categorical", OneHotEncoder(categories=self.categories, sparse=False), self.categorical_data), - ("continuous", MinMaxScaler(feature_range=(0, 1)), ~self.categorical_data)])), - ('svm', - svm.SVC(gamma=gamma, C=C, random_state=self.rng, cache_size=self.cache_size)) - ]) - return model - - @staticmethod - def get_configuration_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace: - """ - Creates a ConfigSpace.ConfigurationSpace containing all parameters for - the SVM Model - - For a detailed explanation of the hyperparameters: - https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html - - Parameters - ---------- - seed : int, None - Fixing the seed for the ConfigSpace.ConfigurationSpace - - Returns - ------- - ConfigSpace.ConfigurationSpace - """ - - seed = seed if seed is not None else np.random.randint(1, 100000) - cs = CS.ConfigurationSpace(seed=seed) - - cs.add_hyperparameters([ - CS.UniformFloatHyperparameter('C', lower=-10., upper=10., default_value=0., log=False), - CS.UniformFloatHyperparameter('gamma', lower=-10., upper=10., default_value=1., log=False), - ]) - # cs.generate_all_continuous_from_bounds(SupportVectorMachine.get_meta_information()['bounds']) - return cs - - @staticmethod - def get_fidelity_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace: - """ - Creates a ConfigSpace.ConfigurationSpace containing all fidelity parameters for - the SupportVector Benchmark - - Fidelities - ---------- - dataset_fraction: float - [0.1, 1] - fraction of training data set to use - - Parameters - ---------- - seed : int, None - Fixing the seed for the ConfigSpace.ConfigurationSpace - - Returns - ------- - ConfigSpace.ConfigurationSpace - """ - seed = seed if seed is not None else np.random.randint(1, 100000) - fidel_space = CS.ConfigurationSpace(seed=seed) - - fidel_space.add_hyperparameters([ - CS.UniformFloatHyperparameter("dataset_fraction", lower=0.0, upper=1.0, default_value=1.0, log=False), - ]) - return fidel_space - - def get_meta_information(self): - """ Returns the meta information for the benchmark """ - return {'name': 'Support Vector Machine', - 'references': ["@InProceedings{pmlr-v54-klein17a", - "author = {Aaron Klein and Stefan Falkner and Simon Bartels and Philipp Hennig and " - "Frank Hutter}, " - "title = {{Fast Bayesian Optimization of Machine Learning Hyperparameters on " - "Large Datasets}}" - "pages = {528--536}, year = {2017}," - "editor = {Aarti Singh and Jerry Zhu}," - "volume = {54}," - "series = {Proceedings of Machine Learning Research}," - "address = {Fort Lauderdale, FL, USA}," - "month = {20--22 Apr}," - "publisher = {PMLR}," - "pdf = {http://proceedings.mlr.press/v54/klein17a/klein17a.pdf}, " - "url = {http://proceedings.mlr.press/v54/klein17a.html}, " - ], - 'code': 'https://github.com/automl/HPOlib1.5/blob/container/hpolib/benchmarks/ml/svm_benchmark.py', - 'shape of train data': self.x_train.shape, - 'shape of test data': self.x_test.shape, - 'shape of valid data': self.x_valid.shape, - 'initial random seed': self.rng, - 'task_id': self.task_id - } diff --git a/hpobench/benchmarks/ml/tabular_benchmark.py b/hpobench/benchmarks/ml/tabular_benchmark.py index 72e5fb31..342766b4 100644 --- a/hpobench/benchmarks/ml/tabular_benchmark.py +++ b/hpobench/benchmarks/ml/tabular_benchmark.py @@ -4,6 +4,10 @@ 0.0.1: * First implementation of the Tabular Benchmark. +0.0.2: +* Restructuring for consistency and to match ML Benchmark Template updates. +0.0.3: +* Adding Learning Curve support. """ from pathlib import Path @@ -17,7 +21,7 @@ from hpobench.dependencies.ml.ml_benchmark_template import metrics from hpobench.util.data_manager import TabularDataManager -__version__ = '0.0.1' +__version__ = '0.0.3' class TabularBenchmark(AbstractBenchmark): @@ -145,8 +149,8 @@ def _search_dataframe(self, row_dict, df): for i, param in enumerate(df.drop("result", axis=1).columns): mask *= df[param].values == row_dict[param] idx = np.where(mask) - assert len(idx) == 1, 'The query has resulted into mulitple matches. This should not happen. ' \ - f'The Query was {row_dict}' + assert len(idx) == 1, 'The query has resulted into mulitple matches. ' \ + 'This should not happen. The Query was {row_dict}' idx = idx[0][0] result = df.iloc[idx]["result"] return result @@ -163,7 +167,7 @@ def _objective( metric_str = ', '.join(list(metrics.keys())) assert metric in list(metrics.keys()), f"metric not found among: {metric_str}" score_key = f"{evaluation}_scores" - cost_key = f"{evaluation}_scores" + cost_key = f"{evaluation}_costs" key_path = dict() for name in self.configuration_space.get_hyperparameter_names(): diff --git a/hpobench/benchmarks/ml/xgboost_benchmark.py b/hpobench/benchmarks/ml/xgboost_benchmark.py index ae554628..234c2cee 100644 --- a/hpobench/benchmarks/ml/xgboost_benchmark.py +++ b/hpobench/benchmarks/ml/xgboost_benchmark.py @@ -4,7 +4,12 @@ 0.0.1: * First implementation of the new XGB Benchmarks. +0.0.2: +* Restructuring for consistency and to match ML Benchmark Template updates. +0.0.3: +* Adding Learning Curve support. """ + from typing import Union, Tuple, Dict import ConfigSpace as CS @@ -12,18 +17,23 @@ import xgboost as xgb from ConfigSpace.hyperparameters import Hyperparameter +from hpobench.util.rng_helper import get_rng from hpobench.dependencies.ml.ml_benchmark_template import MLBenchmark -__version__ = '0.0.1' +__version__ = '0.0.3' class XGBoostBenchmark(MLBenchmark): - def __init__(self, - task_id: int, - rng: Union[np.random.RandomState, int, None] = None, - valid_size: float = 0.33, - data_path: Union[str, None] = None): - super(XGBoostBenchmark, self).__init__(task_id, rng, valid_size, data_path) + """ Multi-multi-fidelity XGBoost Benchmark + """ + def __init__( + self, + task_id: int, + valid_size: float = 0.33, + rng: Union[np.random.RandomState, int, None] = None, + data_path: Union[str, None] = None + ): + super(XGBoostBenchmark, self).__init__(task_id, valid_size, rng, data_path) @staticmethod def get_configuration_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace: @@ -52,12 +62,16 @@ def get_fidelity_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace: fidelity_space = CS.ConfigurationSpace(seed=seed) fidelity_space.add_hyperparameters( # gray-box setting (multi-multi-fidelity) - ntrees + data subsample - XGBoostBenchmark._get_fidelity_choices(n_estimators_choice='variable', subsample_choice='variable') + XGBoostBenchmark._get_fidelity_choices( + n_estimators_choice='variable', subsample_choice='variable' + ) ) return fidelity_space @staticmethod - def _get_fidelity_choices(n_estimators_choice: str, subsample_choice: str) -> Tuple[Hyperparameter, Hyperparameter]: + def _get_fidelity_choices( + n_estimators_choice: str, subsample_choice: str + ) -> Tuple[Hyperparameter, Hyperparameter]: assert n_estimators_choice in ['fixed', 'variable'] assert subsample_choice in ['fixed', 'variable'] @@ -74,28 +88,31 @@ def _get_fidelity_choices(n_estimators_choice: str, subsample_choice: str) -> Tu 'subsample', lower=0.1, upper=1, default_value=1, log=False ) ) - n_estimators = fidelity1[n_estimators_choice] subsample = fidelity2[subsample_choice] return n_estimators, subsample - def init_model(self, - config: Union[CS.Configuration, Dict], - fidelity: Union[CS.Configuration, Dict, None] = None, - rng: Union[int, np.random.RandomState, None] = None): - """ Function that returns the model initialized based on the configuration and fidelity - """ + def init_model( + self, + config: Union[CS.Configuration, Dict], + fidelity: Union[CS.Configuration, Dict, None] = None, + rng: Union[int, np.random.RandomState, None] = None + ): + # initializing model + rng = self.rng if rng is None else get_rng(rng) + # xgb.XGBClassifier when trainied using the scikit-learn API of `fit`, requires + # random_state to be an integer and doesn't accept a RandomState + seed = rng.randint(1, 10**6) + if isinstance(config, CS.Configuration): config = config.get_dictionary() if isinstance(fidelity, CS.Configuration): fidelity = fidelity.get_dictionary() - - rng = rng if (rng is None or isinstance(rng, int)) else self.seed extra_args = dict( booster="gbtree", n_estimators=fidelity['n_estimators'], objective="binary:logistic", - random_state=rng, + random_state=seed, subsample=1 ) if self.n_classes > 2: @@ -108,23 +125,48 @@ def init_model(self, ) return model + def get_model_size(self, model: xgb.XGBClassifier) -> float: + """ Returns the total number of decision nodes in the sequence of Gradient Boosted trees + + Parameters + ---------- + model : xgb.XGBClassifier + Trained XGB model. + + Returns + ------- + float + """ + nodes = model.get_booster().trees_to_dataframe().shape[0] + return nodes + class XGBoostBenchmarkBB(XGBoostBenchmark): - def get_fidelity_space(self, seed: Union[int, None] = None) -> CS.ConfigurationSpace: + """ Black-box version of the XGBoostBenchmark + """ + @staticmethod + def get_fidelity_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace: fidelity_space = CS.ConfigurationSpace(seed=seed) fidelity_space.add_hyperparameters( # black-box setting (full fidelity) - XGBoostBenchmark._get_fidelity_choices(n_estimators_choice='fixed', subsample_choice='fixed') + XGBoostBenchmark._get_fidelity_choices( + n_estimators_choice='fixed', subsample_choice='fixed' + ) ) return fidelity_space class XGBoostBenchmarkMF(XGBoostBenchmark): - def get_fidelity_space(self, seed: Union[int, None] = None) -> CS.ConfigurationSpace: + """ Multi-fidelity version of the XGBoostBenchmark + """ + @staticmethod + def get_fidelity_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace: fidelity_space = CS.ConfigurationSpace(seed=seed) fidelity_space.add_hyperparameters( # gray-box setting (multi-fidelity) - ntrees - XGBoostBenchmark._get_fidelity_choices(n_estimators_choice='variable', subsample_choice='fixed') + XGBoostBenchmark._get_fidelity_choices( + n_estimators_choice='variable', subsample_choice='fixed' + ) ) return fidelity_space diff --git a/hpobench/benchmarks/ml/xgboost_benchmark_old.py b/hpobench/benchmarks/ml/xgboost_benchmark_old.py deleted file mode 100644 index f8730f52..00000000 --- a/hpobench/benchmarks/ml/xgboost_benchmark_old.py +++ /dev/null @@ -1,430 +0,0 @@ -""" - -Changelog: -========== -0.0.3 -* New container release due to a general change in the communication between container and HPOBench. - Works with HPOBench >= v0.0.8 - -0.0.2: -* Change the search space definiton to match the paper: (https://arxiv.org/pdf/1802.09596.pdf) - eta: [1e-5, 1] (def: 0.3) -> [2**-10, 1] (def: 0.3) - min_child_weight: [0,05, 10] (def: 1) -> [1, 2**7] (def: 1) - colsample_bytree: [0,05, 1] (def: 1) -> [0.01, 1] (def: 1) - colsample_bylevel: [0,05, 1] (def: 1) -> [0.01, 1] (def: 1) - reg_lambda: [1e-5, 2] (def: 1) -> [2**-10, 2**10] (def: 1) - reg_alpha: [1e-5, 2] (def: 1e-5) -> [2**-10, 2**10] (def: 1) - max_depth: - -> [1, 15] (def: 6) - subsample_per_it: - -> [0.01, 1] (def: 1) - [booster: - -> [gbtree, gblinear, dart] (def: gbtree)] *) - - *) This parameter is only in the XGBoostExtendedBenchmark. Not in the XGBoostBenchmark class. - -* Increase the fidelity `n_estimators` - n_estimators [2, 128] (def: 128) -> [1, 256] (def: 256) - -* Add class to optimize also the used booster method: (gbtree, gblinear or dart) - We have introduced a new class, which adds the used booster as parameter to the configuration space. To read more - about booster, please take a look in the official XGBoost-documentation (https://xgboost.readthedocs.io/en/latest). - - -0.0.1: -* First implementation of a XGBoost Benchmark. - - -""" - -import logging -import time -from typing import Union, Tuple, Dict, List - -import ConfigSpace as CS -import numpy as np -import xgboost as xgb -from sklearn import pipeline -from sklearn.compose import ColumnTransformer -from sklearn.impute import SimpleImputer -from sklearn.metrics import accuracy_score, make_scorer -from sklearn.preprocessing import OneHotEncoder - -import hpobench.util.rng_helper as rng_helper -from hpobench.abstract_benchmark import AbstractBenchmark -from hpobench.util.openml_data_manager import OpenMLHoldoutDataManager - -__version__ = '0.0.3' - -logger = logging.getLogger('XGBBenchmark') - - -class XGBoostBenchmark(AbstractBenchmark): - - def __init__(self, task_id: Union[int, None] = None, n_threads: int = 1, - rng: Union[np.random.RandomState, int, None] = None): - """ - - Parameters - ---------- - task_id : int, None - n_threads : int, None - rng : np.random.RandomState, int, None - """ - - super(XGBoostBenchmark, self).__init__(rng=rng) - self.n_threads = n_threads - self.task_id = task_id - self.accuracy_scorer = make_scorer(accuracy_score) - - self.x_train, self.y_train, self.x_valid, self.y_valid, self.x_test, self.y_test, variable_types = \ - self.get_data() - self.categorical_data = np.array([var_type == 'categorical' for var_type in variable_types]) - - # XGB needs sorted data. Data should be (Categorical + numerical) not mixed. - categorical_idx = np.argwhere(self.categorical_data) - continuous_idx = np.argwhere(~self.categorical_data) - sorting = np.concatenate([categorical_idx, continuous_idx]).squeeze() - self.categorical_data = self.categorical_data[sorting] - self.x_train = self.x_train[:, sorting] - self.x_valid = self.x_valid[:, sorting] - self.x_test = self.x_test[:, sorting] - - nan_columns = np.all(np.isnan(self.x_train), axis=0) - self.categorical_data = self.categorical_data[~nan_columns] - - self.x_train, self.x_valid, self.x_test, self.categories = \ - OpenMLHoldoutDataManager.replace_nans_in_cat_columns(self.x_train, self.x_valid, self.x_test, - is_categorical=self.categorical_data) - - # Determine the number of categories in the labels. - # In case of binary classification ``self.num_class`` has to be 1 for xgboost. - self.num_class = len(np.unique(np.concatenate([self.y_train, self.y_test, self.y_valid]))) - self.num_class = 1 if self.num_class == 2 else self.num_class - - self.train_idx = self.rng.choice(a=np.arange(len(self.x_train)), - size=len(self.x_train), - replace=False) - - # Similar to [Fast Bayesian Optimization of Machine Learning Hyperparameters on Large Datasets] - # (https://arxiv.org/pdf/1605.07079.pdf), - # use 10 time the number of classes as lower bound for the dataset fraction - n_classes = np.unique(self.y_train).shape[0] - self.lower_bound_train_size = (10 * n_classes) / self.x_train.shape[0] - - def get_data(self) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, np.ndarray, List]: - """ Loads the data given a task or another source. """ - - assert self.task_id is not None, NotImplementedError('No task-id given. Please either specify a task-id or ' - 'overwrite the get_data method.') - - data_manager = OpenMLHoldoutDataManager(openml_task_id=self.task_id, rng=self.rng) - x_train, y_train, x_val, y_val, x_test, y_test = data_manager.load() - - return x_train, y_train, x_val, y_val, x_test, y_test, data_manager.variable_types - - def shuffle_data(self, rng=None): - """ Reshuffle the training data. If 'rng' is None, the training idx are shuffled according to the - class-random-state""" - random_state = rng_helper.get_rng(rng, self.rng) - random_state.shuffle(self.train_idx) - - # pylint: disable=arguments-differ - @AbstractBenchmark.check_parameters - def objective_function(self, configuration: Union[CS.Configuration, Dict], - fidelity: Union[CS.Configuration, Dict, None] = None, - shuffle: bool = False, - rng: Union[np.random.RandomState, int, None] = None, **kwargs) -> Dict: - """ - Trains a XGBoost model given a hyperparameter configuration and - evaluates the model on the validation set. - - Parameters - ---------- - configuration : Dict, CS.Configuration - Configuration for the XGBoost model - fidelity: Dict, None - Fidelity parameters for the XGBoost model, check get_fidelity_space(). Uses default (max) value if None. - shuffle : bool - If ``True``, shuffle the training idx. If no parameter ``rng`` is given, use the class random state. - Defaults to ``False``. - rng : np.random.RandomState, int, None, - Random seed for benchmark. By default the class level random seed. - - To prevent overfitting on a single seed, it is possible to pass a - parameter ``rng`` as 'int' or 'np.random.RandomState' to this function. - If this parameter is not given, the default random state is used. - kwargs - - Returns - ------- - Dict - - function_value : validation loss - cost : time to train and evaluate the model - info : Dict - train_loss : trainings loss - fidelity : used fidelities in this evaluation - """ - self.rng = rng_helper.get_rng(rng=rng, self_rng=self.rng) - - if shuffle: - self.shuffle_data(self.rng) - - start = time.time() - - if self.lower_bound_train_size > fidelity['dataset_fraction']: - train_data_fraction = self.lower_bound_train_size - logger.warning(f'The given data set fraction is lower than the lower bound (10 * number of classes.) ' - f'Increase the fidelity from {fidelity["dataset_fraction"]:.8f} to ' - f'{self.lower_bound_train_size:.8f}') - else: - train_data_fraction = fidelity['dataset_fraction'] - - train_idx = self.train_idx[:int(len(self.train_idx) * train_data_fraction)] - - model = self._get_pipeline(n_estimators=fidelity["n_estimators"], **configuration) - model.fit(X=self.x_train[train_idx], y=self.y_train[train_idx]) - - train_loss = 1 - self.accuracy_scorer(model, self.x_train[train_idx], self.y_train[train_idx]) - val_loss = 1 - self.accuracy_scorer(model, self.x_valid, self.y_valid) - cost = time.time() - start - - return {'function_value': float(val_loss), - 'cost': cost, - 'info': {'train_loss': float(train_loss), - 'fidelity': fidelity} - } - - # pylint: disable=arguments-differ - @AbstractBenchmark.check_parameters - def objective_function_test(self, configuration: Union[CS.Configuration, Dict], - fidelity: Union[CS.Configuration, Dict, None] = None, - shuffle: bool = False, - rng: Union[np.random.RandomState, int, None] = None, **kwargs) -> Dict: - """ - Trains a XGBoost model with a given configuration on both the train - and validation data set and evaluates the model on the test data set. - - Parameters - ---------- - configuration : Dict, CS.Configuration - Configuration for the XGBoost Model - fidelity: Dict, None - Fidelity parameters, check get_fidelity_space(). Uses default (max) value if None. - shuffle : bool - If ``True``, shuffle the training idx. If no parameter ``rng`` is given, use the class random state. - Defaults to ``False``. - rng : np.random.RandomState, int, None, - Random seed for benchmark. By default the class level random seed. - To prevent overfitting on a single seed, it is possible to pass a - parameter ``rng`` as 'int' or 'np.random.RandomState' to this function. - If this parameter is not given, the default random state is used. - kwargs - - Returns - ------- - Dict - - function_value : test loss - cost : time to train and evaluate the model - info : Dict - fidelity : used fidelities in this evaluation - """ - default_dataset_fraction = self.get_fidelity_space().get_hyperparameter('dataset_fraction').default_value - if fidelity['dataset_fraction'] != default_dataset_fraction: - raise NotImplementedError(f'Test error can not be computed for dataset_fraction <= ' - f'{default_dataset_fraction}') - - self.rng = rng_helper.get_rng(rng=rng, self_rng=self.rng) - - if shuffle: - self.shuffle_data(self.rng) - - start = time.time() - - # Impute potential nan values with the feature- - data = np.concatenate((self.x_train, self.x_valid)) - targets = np.concatenate((self.y_train, self.y_valid)) - - model = self._get_pipeline(n_estimators=fidelity['n_estimators'], **configuration) - model.fit(X=data, y=targets) - - test_loss = 1 - self.accuracy_scorer(model, self.x_test, self.y_test) - cost = time.time() - start - - return {'function_value': float(test_loss), - 'cost': cost, - 'info': {'fidelity': fidelity}} - - @staticmethod - def get_configuration_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace: - """ - Creates a ConfigSpace.ConfigurationSpace containing all parameters for - the XGBoost Model - - Parameters - ---------- - seed : int, None - Fixing the seed for the ConfigSpace.ConfigurationSpace - - Returns - ------- - ConfigSpace.ConfigurationSpace - """ - seed = seed if seed is not None else np.random.randint(1, 100000) - cs = CS.ConfigurationSpace(seed=seed) - - cs.add_hyperparameters([ - CS.UniformFloatHyperparameter('eta', lower=2**-10, upper=1., default_value=0.3, log=True), - CS.UniformIntegerHyperparameter('max_depth', lower=1, upper=15, default_value=6, log=False), - CS.UniformFloatHyperparameter('min_child_weight', lower=1., upper=2**7., default_value=1., log=True), - CS.UniformFloatHyperparameter('colsample_bytree', lower=0.01, upper=1., default_value=1.), - CS.UniformFloatHyperparameter('colsample_bylevel', lower=0.01, upper=1., default_value=1.), - CS.UniformFloatHyperparameter('reg_lambda', lower=2**-10, upper=2**10, default_value=1, log=True), - CS.UniformFloatHyperparameter('reg_alpha', lower=2**-10, upper=2**10, default_value=1, log=True), - CS.UniformFloatHyperparameter('subsample_per_it', lower=0.1, upper=1, default_value=1, log=False) - ]) - - return cs - - @staticmethod - def get_fidelity_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace: - """ - Creates a ConfigSpace.ConfigurationSpace containing all fidelity parameters for - the XGBoost Benchmark - - Parameters - ---------- - seed : int, None - Fixing the seed for the ConfigSpace.ConfigurationSpace - - Returns - ------- - ConfigSpace.ConfigurationSpace - """ - seed = seed if seed is not None else np.random.randint(1, 100000) - fidel_space = CS.ConfigurationSpace(seed=seed) - - fidel_space.add_hyperparameters([ - CS.UniformFloatHyperparameter("dataset_fraction", lower=0.0, upper=1.0, default_value=1.0, log=False), - CS.UniformIntegerHyperparameter("n_estimators", lower=1, upper=256, default_value=256, log=False) - ]) - - return fidel_space - - def get_meta_information(self) -> Dict: - """ Returns the meta information for the benchmark """ - return {'name': 'XGBoost', - 'references': ['@article{probst2019tunability,' - 'title={Tunability: Importance of hyperparameters of machine learning algorithms.},' - 'author={Probst, Philipp and Boulesteix, Anne-Laure and Bischl, Bernd},' - 'journal={J. Mach. Learn. Res.},' - 'volume={20},' - 'number={53},' - 'pages={1--32},' - 'year={2019}' - '}'], - 'code': 'https://github.com/automl/HPOlib1.5/blob/development/hpolib/benchmarks/ml/' - 'xgboost_benchmark_old.py', - 'shape of train data': self.x_train.shape, - 'shape of test data': self.x_test.shape, - 'shape of valid data': self.x_valid.shape, - 'initial random seed': self.rng, - 'task_id': self.task_id - } - - def _get_pipeline(self, max_depth: int, eta: float, min_child_weight: int, - colsample_bytree: float, colsample_bylevel: float, reg_lambda: int, reg_alpha: int, - n_estimators: int, subsample_per_it: float) \ - -> pipeline.Pipeline: - """ Create the scikit-learn (training-)pipeline """ - objective = 'binary:logistic' if self.num_class <= 2 else 'multi:softmax' - - clf = pipeline.Pipeline([ - ('preprocess_impute', - ColumnTransformer([ - ("categorical", "passthrough", self.categorical_data), - ("continuous", SimpleImputer(strategy="mean"), ~self.categorical_data)])), - ('preprocess_one_hot', - ColumnTransformer([ - ("categorical", OneHotEncoder(categories=self.categories, sparse=False), self.categorical_data), - ("continuous", "passthrough", ~self.categorical_data)])), - ('xgb', - xgb.XGBClassifier( - max_depth=max_depth, - learning_rate=eta, - min_child_weight=min_child_weight, - colsample_bytree=colsample_bytree, - colsample_bylevel=colsample_bylevel, - reg_alpha=reg_alpha, - reg_lambda=reg_lambda, - n_estimators=n_estimators, - objective=objective, - n_jobs=self.n_threads, - random_state=self.rng.randint(1, 100000), - num_class=self.num_class, - subsample=subsample_per_it)) - ]) - return clf - - -class XGBoostExtendedBenchmark(XGBoostBenchmark): - """ - Similar to XGBoostBenchmark but enables also the optimization of the used booster. - """ - - def __init__(self, task_id: Union[int, None] = None, n_threads: int = 1, - rng: Union[np.random.RandomState, int, None] = None): - super(XGBoostExtendedBenchmark, self).__init__(task_id=task_id, n_threads=n_threads, rng=rng) - - @staticmethod - def get_configuration_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace: - cs = XGBoostBenchmark.get_configuration_space(seed) - hp_booster = CS.CategoricalHyperparameter('booster', choices=['gbtree', 'gblinear', 'dart'], - default_value='gbtree') - cs.add_hyperparameter(hp_booster) - - # XGBoost with 'gblinear' can not use some - # parameters. Exclude them from the configuration space by introducing a condition. - hps = ['colsample_bylevel', 'colsample_bytree', 'max_depth', 'min_child_weight', 'subsample_per_it'] - - # The NotEqualsCondition means: "Make parameter X active if hp_booster is not equal to gblinear." - conditions = [CS.NotEqualsCondition(cs.get_hyperparameter(hp), hp_booster, 'gblinear') for hp in hps] - cs.add_conditions(conditions) - return cs - - # noinspection PyMethodOverriding - # pylint: disable=arguments-differ - def _get_pipeline(self, n_estimators: int, booster: str, reg_lambda: int, reg_alpha: int, eta: float, - min_child_weight: int = None, max_depth: int = None, colsample_bytree: float = None, - colsample_bylevel: float = None, subsample_per_it: float = None) \ - -> pipeline.Pipeline: - """ Create the scikit-learn (training-)pipeline """ - objective = 'binary:logistic' if self.num_class <= 2 else 'multi:softmax' - - configuration = dict(booster=booster, - max_depth=max_depth, - learning_rate=eta, - min_child_weight=min_child_weight, - colsample_bytree=colsample_bytree, - colsample_bylevel=colsample_bylevel, - reg_alpha=reg_alpha, - reg_lambda=reg_lambda, - n_estimators=n_estimators, - objective=objective, - n_jobs=self.n_threads, - random_state=self.rng.randint(1, 100000), - num_class=self.num_class, - subsample=subsample_per_it) - - configuration = {k: v for k, v in configuration.items() if v is not None} - - clf = pipeline.Pipeline([ - ('preprocess_impute', - ColumnTransformer([ - ("categorical", "passthrough", self.categorical_data), - ("continuous", SimpleImputer(strategy="mean"), ~self.categorical_data)])), - ('preprocess_one_hot', - ColumnTransformer([ - ("categorical", OneHotEncoder(categories=self.categories, sparse=False), self.categorical_data), - ("continuous", "passthrough", ~self.categorical_data)])), - ('xgb', - xgb.XGBClassifier(**configuration)) - ]) - return clf diff --git a/hpobench/benchmarks/ml/yahpo_benchmark.py b/hpobench/benchmarks/ml/yahpo_benchmark.py new file mode 100644 index 00000000..d06d23fc --- /dev/null +++ b/hpobench/benchmarks/ml/yahpo_benchmark.py @@ -0,0 +1,317 @@ +""" +How to use this benchmark: +-------------------------- + +We recommend using the containerized version of this benchmark. +If you want to use this benchmark locally (without running it via the corresponding container), +you need to perform the following steps. + +Prerequisites: 1) Install Conda +=============================== +Conda environment in which the HPOBench is installed (pip install .). Activate your environment. +``` +conda activate +``` + +Prerequisites: 2) Install R +=========================== + +Install R (4.0.5 - IMPORTANT!) and the required dependencies: # works also with higher R versions(?) + +``` bash +Rscript -e 'install.packages("remotes", repos = "http://cran.r-project.org")' + +# Install OpenML dependencies +Rscript -e 'install.packages("curl", repos = "http://cran.r-project.org")' \ +&& Rscript -e 'install.packages("httr", repos = "http://cran.r-project.org")' \ +&& Rscript -e 'install.packages("farff", repos = "http://cran.r-project.org")' \ +&& Rscript -e 'install.packages("OpenML", repos = "http://cran.r-project.org")' \ + +# Install rbv2 dependencies +Rscript -e 'remotes::install_version("BBmisc", version = "1.11", upgrade = "never", repos = "http://cran.r-project.org")' \ +&& Rscript -e 'remotes::install_version("glmnet", version = "2.0-16", upgrade = "never", repos = "http://cran.r-project.o")' \ +&& Rscript -e 'remotes::install_version("rpart", version = "4.1-13", upgrade = "never", repos = "http://cran.r-project.org")' \ +&& Rscript -e 'remotes::install_version("e1071", version = "1.7-0.1", upgrade = "never", repos = "http://cran.r-project.org")' \ +&& Rscript -e 'remotes::install_version("xgboost", version = "0.82.1", upgrade = "never", repos = "http://cran.r-project.org")' \ +&& Rscript -e 'remotes::install_version("ranger", version = "0.11.2", upgrade = "never", repos = "http://cran.r-project.org")' \ +&& Rscript -e 'remotes::install_version("RcppHNSW", version = "0.1.0", upgrade = "never", repos = "http://cran.r-project.org")' \ +&& Rscript -e 'remotes::install_version("mlr", version = "2.14", upgrade = "never", repos = "http://cran.r-project.org")' \ +&& Rscript -e 'remotes::install_github("mlr-org/mlr3misc", upgrade = "never", repos = "http://cran.r-project.org")' \ +&& Rscript -e 'remotes::install_version("mlrCPO", version = "0.3.6", upgrade = "never", repos = "http://cran.r-projt.org")' \ +&& Rscript -e 'remotes::install_github("pfistfl/rbv2", upgrade = "never")' \ +&& Rscript -e 'remotes::install_version("testthat", version = "3.1.4", upgrade = "never", repos = "http://cran.r-project.org")' \ +&& Rscript -e 'remotes::install_github("sumny/iaml", upgrade = "never")' +``` +Prerequisites: 3) Install rpy2 +============================== +Installing the connector between R and python might be a little bit tricky. +Official installation guide: https://rpy2.github.io/doc/latest/html/introduction.html + +We received in some cases the error: "/opt/R/4.0.5/lib/R/library/methods/libs/methods.so: undefined symbol". +To solve this error, we had to execute the following command: +``` +export LD_LIBRARY_PATH=$(python -m rpy2.situation LD_LIBRARY_PATH):${LD_LIBRARY_PATH} +``` + +1. Download data: +================= +Normally, the data will be downloaded automatically. + +If you want to download the data on your own, you can download the data with the following command: + +``` bash +git clone --depth 1 -b main https://github.com/pfistfl/yahpo_data.git +``` + +Later, you have to give yahpo the link to the data. + +```python +from yahpo_gym import local_config +local_config.init_config() +local_config.set_data_path("path-to-data") +``` + +The data consist of surrogates for different data sets. Each surrogate is a compressed ONNX neural network. + + +2. Install HPOBench: +==================== +``` +git clone HPOBench +cd /path/to/HPOBench +pip install .[yahpo_gym_raw] +``` + +Changelog: +========== +0.0.1: +* First implementation +""" # noqa: E501 + +import logging +from pathlib import Path +from typing import Union, Dict, List + +import pandas as pd +import ConfigSpace as CS +import numpy as np +import rpy2.robjects as robjects +from rpy2.robjects.packages import importr +from yahpo_gym.benchmark_set import BenchmarkSet + +from hpobench.abstract_benchmark import AbstractBenchmark, AbstractMultiObjectiveBenchmark + +__version__ = '0.0.1' + +logger = logging.getLogger('YAHPO-Raw') + + +class YAHPOGymMORawBenchmark(AbstractMultiObjectiveBenchmark): + def __init__(self, scenario: str, instance: str, + rng: Union[np.random.RandomState, int, None] = None, + data_dir: Union[Path, str, None] = None): + """ + Parameters + ---------- + scenario : str + Name for the learner. Must be one of [ + "rbv2_ranger", "rbv2_rpart", "rbv2_glmnet", "rbv2_xgboost", "rbv2_svm", "rbv2_aknn", "rbv2_super", + "iaml_ranger", "iaml_rpart", "iaml_glmnet", "iaml_xgboost" + ] + instance : str + A valid instance for the scenario. See `self.benchset.instances`. + https://slds-lmu.github.io/yahpo_gym/scenarios.html#instances + rng : np.random.RandomState, int, None + """ + + assert scenario.startswith('rbv2_') or scenario.startswith('iaml_'), \ + 'Currently, we only support the experiments with rbv2_ and iaml from yahpo. ' \ + f'The scenario has to start with either rbv2_ or iaml_, but was {scenario}' + + from hpobench.util.data_manager import YAHPODataManager + self.data_manager = YAHPODataManager(data_dir=data_dir) + self.data_manager.load() + + self.scenario = scenario + self.instance = instance + self.benchset = BenchmarkSet(scenario, active_session=True) + self.benchset.set_instance(instance) + + logger.info(f'Start Benchmark for scenario {scenario} and instance {instance}') + super(YAHPOGymMORawBenchmark, self).__init__(rng=rng) + + # pylint: disable=arguments-differ + def get_configuration_space(self, seed: Union[int, None] = None) -> CS.ConfigurationSpace: + return self.benchset.get_opt_space(drop_fidelity_params=True, seed=seed) + + # pylint: disable=arguments-differ + def get_fidelity_space(self, seed: Union[int, None] = None) -> CS.ConfigurationSpace: + return self.benchset.get_fidelity_space(seed=seed) + + @AbstractMultiObjectiveBenchmark.check_parameters + def objective_function(self, configuration: Union[CS.Configuration, Dict], + fidelity: Union[CS.Configuration, Dict, None] = None, + rng: Union[np.random.RandomState, int, None] = None, **kwargs) -> Dict: + + # Cast python dict to R list: + parameters = {**configuration, **fidelity} + r_list = YAHPOGymMORawBenchmark._cast_dict_to_rlist(parameters) + + # Call the random bot evaluation method + if self.scenario.startswith('rbv2_'): + + # Establish a connection to the R package + rbv2pkg = importr('rbv2') + + learner = self.scenario.replace('rbv2_', 'classif.') + r_out = rbv2pkg.eval_config( + learner=learner, task_id=int(configuration['task_id']), configuration=r_list + ) + # Extract the run data frame via replications and cast the R list (result) back to a python dictionary + result_r_df = r_out[0][0][0][4] + result_dict = YAHPOGymMORawBenchmark._cast_to_dict(result_r_df) + result_df = pd.DataFrame(result_dict) + result = result_df.mean(axis=0) + result = result.to_dict() + time_cols = [col for col in result_df.columns if 'time' in col] + times = {col: result_df.loc[:, col].sum() for col in time_cols} + result.update(times) + + elif self.scenario.startswith('iaml_'): + + iaml = importr('iaml') + out = iaml.eval_yahpo(scenario=robjects.StrVector([self.scenario]), configuration=r_list) + result = YAHPOGymMORawBenchmark._cast_to_dict(out) + + elif self.scenario.startswith('fair_'): + + fair_pkg = importr('fair') + out = fair_pkg.eval_yahpo(scenario=robjects.StrVector([self.scenario]), configuration=r_list) + result = YAHPOGymMORawBenchmark._cast_to_dict(out) + + else: + raise NotImplementedError() + + objectives = {target: value for target, value in result.items() if target in self.benchset.config.y_names} + additional = {target: value for target, value in result.items() if target not in self.benchset.config.y_names} + + return { + 'function_value': objectives, + 'cost': result['timetrain'], + 'info': {'fidelity': fidelity, 'additional_info': additional} + } + + @AbstractMultiObjectiveBenchmark.check_parameters + def objective_function_test(self, configuration: Union[CS.Configuration, Dict], + fidelity: Union[CS.Configuration, Dict, None] = None, + rng: Union[np.random.RandomState, int, None] = None, **kwargs) -> Dict: + return self.objective_function(configuration, fidelity=fidelity, rng=rng) + + @staticmethod + def get_meta_information(): + """ Returns the meta information for the benchmark """ + return {'name': 'YAHPO Gym', + 'references': ['@misc{pfisterer2021yahpo,', + 'title={YAHPO Gym -- Design Criteria and a new Multifidelity Benchmark ' + ' for Hyperparameter Optimization},', + 'author={Florian Pfisterer and Lennart Schneider and Julia Moosbauer ' + ' and Martin Binder and Bernd Bischl},', + 'eprint={2109.03670},', + 'archivePrefix={arXiv},', + 'year={2021}}'], + 'code': ['https://github.com/pfistfl/yahpo_gym/yahpo_gym', + 'https://github.com/pfistfl/rbv2/', + 'https://github.com/sumny/iaml', + 'https://github.com/sumny/fair'] + } + + # pylint: disable=arguments-differ + def get_objective_names(self) -> List[str]: + return self.benchset.config.y_names + + @staticmethod + def _cast_dict_to_rlist(py_dict): + """ Convert a python dictionary to a RPy2 ListVector""" + pairs = [f'{key} = {value}' if not isinstance(value, str) else f'{key} = \"{value}\"' + for key, value in py_dict.items()] + pairs = ",".join(pairs) + str_list = f"list({pairs})" + r_list = robjects.r(str_list) + return r_list + + @staticmethod + def _cast_to_dict(r_list_object) -> Dict: + """ + Convert an RPy2 ListVector to a Python dict. + Source: https://ogeek.cn/qa/?qa=815151/ + """ + result = {} + for i, name in enumerate(r_list_object.names): + if isinstance(r_list_object[i], robjects.ListVector): + result[name] = YAHPOGymMORawBenchmark._cast_to_dict(r_list_object[i]) + elif len(r_list_object[i]) == 1: + result[name] = r_list_object[i][0] + else: + result[name] = r_list_object[i] + return result + + +class YAHPOGymRawBenchmark(AbstractBenchmark): + def __init__(self, scenario: str, instance: str, objective: str = None, + rng: Union[np.random.RandomState, int, None] = None): + """ + Parameters + ---------- + scenario : str + Name for the surrogate data. Must be one of ["lcbench", "fcnet", "nb301", "rbv2_svm", + "rbv2_ranger", "rbv2_rpart", "rbv2_glmnet", "rbv2_aknn", "rbv2_xgboost", "rbv2_super"] + instance : str + A valid instance for the scenario. See `self.benchset.instances`. + https://slds-lmu.github.io/yahpo_gym/scenarios.html#instances + objective : str + Name of the (single-crit) objective. See `self.benchset.config.y_names`. + Initialized to None, picks the first element in y_names. + rng : np.random.RandomState, int, None + """ + self.backbone = YAHPOGymMORawBenchmark(scenario=scenario, instance=instance, rng=rng) + self.objective = objective + super(YAHPOGymRawBenchmark, self).__init__(rng=rng) + + @AbstractBenchmark.check_parameters + def objective_function(self, configuration: Union[CS.Configuration, Dict], + fidelity: Union[Dict, CS.Configuration, None] = None, + rng: Union[np.random.RandomState, int, None] = None, **kwargs) -> Dict: + + mo_results = self.backbone.objective_function(configuration=configuration, + fidelity=fidelity, + **kwargs) + + # If not objective is set, we just grab the first returned entry. + if self.objective is None: + self.objective = self.backbone.benchset.config.y_names[0] + + obj_value = mo_results['function_value'][self.objective] + + return {'function_value': obj_value, + "cost": mo_results['cost'], + 'info': {'fidelity': fidelity, + 'additional_info': mo_results['info']['additional_info'], + 'objectives': mo_results['function_value']}} + + @AbstractBenchmark.check_parameters + def objective_function_test(self, configuration: Union[CS.Configuration, Dict], + fidelity: Union[Dict, CS.Configuration, None] = None, + rng: Union[np.random.RandomState, int, None] = None, **kwargs) -> Dict: + return self.objective_function(configuration, fidelity=fidelity, rng=rng) + + # pylint: disable=arguments-differ + def get_configuration_space(self, seed: Union[int, None] = None) -> CS.ConfigurationSpace: + return self.backbone.get_configuration_space(seed=seed) + + # pylint: disable=arguments-differ + def get_fidelity_space(self, seed: Union[int, None] = None) -> CS.ConfigurationSpace: + return self.backbone.get_fidelity_space(seed=seed) + + @staticmethod + def get_meta_information() -> Dict: + return YAHPOGymMORawBenchmark.get_meta_information() diff --git a/hpobench/benchmarks/mo/adult_benchmark.py b/hpobench/benchmarks/mo/adult_benchmark.py index a12e8a70..30631cae 100644 --- a/hpobench/benchmarks/mo/adult_benchmark.py +++ b/hpobench/benchmarks/mo/adult_benchmark.py @@ -1,6 +1,8 @@ """ Changelog: ========== +0.0.2: +* Change the objective value from accuracy to misclassification rate. (1 - accuracy) 0.0.1: * First implementation of the Multi-Objective Fair Adult Benchmark. @@ -127,7 +129,7 @@ def get_meta_information() -> Dict: @staticmethod def get_objective_names() -> List[str]: """Get a list of objectives evaluated in the objective_function. """ - return ['accuracy', 'DSP', 'DEO', 'DFP'] + return ['misclassification_rate', 'DSP', 'DEO', 'DFP'] @AbstractMultiObjectiveBenchmark.check_parameters def objective_function(self, configuration: Union[CS.Configuration, Dict], @@ -165,7 +167,7 @@ def objective_function(self, configuration: Union[CS.Configuration, Dict], ------- Dict - function_value : Dict - validation metrics after training on train - accuracy: float + misclassification_rate: float: 1 - validation accuracy DSO: float DEO: float DFP: float @@ -247,7 +249,7 @@ def objective_function(self, configuration: Union[CS.Configuration, Dict], elapsed_time = time.time() - ts_start - return {'function_value': {'accuracy': float(val_accuracy), + return {'function_value': {'misclassification_rate': 1 - float(val_accuracy), 'DSO': float(val_statistical_disparity), 'DEO': float(val_unequal_opportunity), 'DFP': float(val_unequalized_odds) @@ -310,7 +312,7 @@ def objective_function_test(self, configuration: Union[CS.Configuration, Dict], ------- Dict - function_value : Dict - test metrics reported after training on (train+valid) - accuracy: float + misclassification_rate: float: 1 - test accuracy DSO: float DEO: float DFP: float @@ -381,7 +383,7 @@ def objective_function_test(self, configuration: Union[CS.Configuration, Dict], logger.debug(f"config:{configuration}, test_score: {test_accuracy}, train score:{train_accuracy}," f"dsp:{test_statistical_disparity}, deo :{test_unequal_opportunity}, dfp :{test_unequalized_odds}") - return {'function_value': {'accuracy': float(test_accuracy), + return {'function_value': {'misclassification_rate': 1 - float(test_accuracy), 'DSO': float(test_statistical_disparity), 'DEO': float(test_unequal_opportunity), 'DFP': float(test_unequalized_odds) diff --git a/hpobench/benchmarks/mo/cnn_benchmark.py b/hpobench/benchmarks/mo/cnn_benchmark.py index d8bfd939..516b459a 100644 --- a/hpobench/benchmarks/mo/cnn_benchmark.py +++ b/hpobench/benchmarks/mo/cnn_benchmark.py @@ -1,6 +1,9 @@ """ Changelog: ========== +0.0.2: +* Rename the returned function value + 'negative_accuracy' -> 'misclassification_rate' 0.0.1: * First implementation of the Multi-Objective CNN Benchmark. @@ -22,7 +25,7 @@ from hpobench.abstract_benchmark import AbstractMultiObjectiveBenchmark from hpobench.util.data_manager import CNNDataManager -__version__ = '0.0.1' +__version__ = '0.0.2' logger = logging.getLogger('MO_CNN') @@ -284,7 +287,7 @@ def get_meta_information() -> Dict: @staticmethod def get_objective_names() -> List[str]: """Get the names of the objectives reported in the objective function.""" - return ['accuracy', 'model_size'] + return ['misclassification_rate', 'model_size'] def init_model(self, config: Union[CS.Configuration, Dict]) -> Net: """ @@ -361,7 +364,7 @@ def objective_function(self, configuration: Union[CS.Configuration, Dict], ------- Dict - function_value : Dict - negative_accuracy: float + misclassification_rate: float 1 - validation accuracy log_model_size: float log10 of the number of parameters @@ -435,7 +438,7 @@ def objective_function(self, configuration: Union[CS.Configuration, Dict], elapsed_time = time.time() - time_in - return {'function_value': {'negative_accuracy': 1 - val_accuracy, + return {'function_value': {'misclassification_rate': 1 - val_accuracy, 'log_model_size': float(np.log10(num_params))}, 'cost': float(training_runtime), 'info': {'train_accuracy': train_accuracy, @@ -479,7 +482,7 @@ def objective_function_test(self, configuration: Union[CS.Configuration, Dict], ------- Dict - function_value : Dict - negative_accuracy: float + misclassification_rate: float 1 - test accuracy log_model_size: float log10 of the number of parameters @@ -546,7 +549,7 @@ def objective_function_test(self, configuration: Union[CS.Configuration, Dict], elapsed_time = time.time() - time_in - return {'function_value': {'negative_accuracy': 1 - test_accuracy, + return {'function_value': {'misclassification_rate': 1 - test_accuracy, 'log_model_size': float(np.log10(num_params))}, 'cost': training_runtime, 'info': {'train_accuracy': train_accuracy, diff --git a/hpobench/benchmarks/nas/nasbench_101.py b/hpobench/benchmarks/nas/nasbench_101.py index f7ee1b20..c0f80737 100644 --- a/hpobench/benchmarks/nas/nasbench_101.py +++ b/hpobench/benchmarks/nas/nasbench_101.py @@ -42,6 +42,11 @@ Changelog: ========== +0.0.5 +* ADD Multi Objective version. Introduce objectives: + - misclassification_rate (0, 1) - lower is better + - trainable_parameters (0, 10**8) - lower is better + 0.0.4 * New container release due to a general change in the communication between container and HPOBench. Works with HPOBench >= v0.0.8 @@ -61,23 +66,22 @@ """ import logging - from pathlib import Path -from typing import Union, Dict, Any, Tuple, List +from typing import Union, Dict, Any, Tuple, List, Type import ConfigSpace as CS import numpy as np -from tabular_benchmarks.nas_cifar10 import NASCifar10 from nasbench import api from nasbench.api import OutOfDomainError from nasbench.lib import graph_util +from tabular_benchmarks.nas_cifar10 import NASCifar10 -from hpobench import config_file import hpobench.util.rng_helper as rng_helper -from hpobench.abstract_benchmark import AbstractBenchmark +from hpobench import config_file +from hpobench.abstract_benchmark import AbstractBenchmark, AbstractMultiObjectiveBenchmark from hpobench.util.data_manager import NASBench_101DataManager -__version__ = '0.0.4' +__version__ = '0.0.5' logger = logging.getLogger('NasBench101') MAX_EDGES = 9 @@ -85,17 +89,19 @@ DEFAULT_API_FILE = config_file.data_dir / "nasbench_101" -class NASCifar10BaseBenchmark(AbstractBenchmark): - def __init__(self, benchmark: NASCifar10, +class _NAS101BaseBenchmark: + def __init__(self, + benchmark_type: Type[NASCifar10], data_path: Union[Path, str, None] = None, - rng: Union[np.random.RandomState, int, None] = None, **kwargs): + rng: Union[np.random.RandomState, int, None] = None, + **kwargs): """ Baseclass for the tabular benchmarks https://github.com/automl/nas_benchmarks/tree/master/tabular_benchmarks. Please install the benchmark first. Place the data under ``data_path``. Parameters ---------- - benchmark : NASCifar10 + benchmark_type : Type[NASCifar10] Type of the benchmark to use. Don't call this class directly. Instantiate via subclasses (see below). data_path : str, Path, None Path to the folder, which contains the downloaded file nasbench_full.tfrecord. @@ -103,21 +109,76 @@ def __init__(self, benchmark: NASCifar10, Random seed for the benchmarks """ - super(NASCifar10BaseBenchmark, self).__init__(rng=rng) - - self.benchmark = benchmark + data_path = self._try_download_api_file(data_path) self.data_path = data_path + self.rng = rng + self.benchmark: NASCifar10 = benchmark_type(data_dir=str(data_path), multi_fidelity=True) + super(_NAS101BaseBenchmark, self).__init__(rng=rng, **kwargs) def _query_benchmark(self, config: Dict, run_index: int, budget: int = 108) -> Dict: raise NotImplementedError - # pylint: disable=arguments-differ - @AbstractBenchmark.check_parameters - def objective_function(self, configuration: Union[CS.Configuration, Dict], - fidelity: Union[CS.Configuration, Dict, None] = None, - run_index: Union[int, Tuple, None] = (0, 1, 2), - rng: Union[np.random.RandomState, int, None] = None, - **kwargs) -> Dict: + @staticmethod + def get_configuration_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace: + raise NotImplementedError + + @staticmethod + def get_meta_information() -> Dict: + """ Returns the meta information for the benchmark """ + return {'name': 'Tabular Benchmarks for Hyperparameter Optimization and Neural Architecture Search', + 'references': ['@article{klein2019tabular,' + 'title = {Tabular benchmarks for joint architecture and hyperparameter optimization},' + 'author = {Klein, Aaron and Hutter, Frank},' + 'journal = {arXiv preprint arXiv:1905.04970},' + 'year = {2019}}', + 'https://arxiv.org/abs/1905.04970', + ], + 'code': 'https://github.com/automl/nas_benchmarks', + } + + @staticmethod + def _get_configuration_space(benchmark: Any, seed: Union[int, None] = None) -> CS.ConfigurationSpace: + """ Helper function to pass a seed to the configuration space """ + seed = seed if seed is not None else np.random.randint(1, 100000) + cs = benchmark.get_configuration_space() + cs.seed(seed) + return cs + + @staticmethod + def get_fidelity_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace: + """ + Creates a ConfigSpace.ConfigurationSpace containing all fidelity parameters for + the NAS Benchmark 101. + + Parameters + ---------- + seed : int, None + Fixing the seed for the ConfigSpace.ConfigurationSpace + + Returns + ------- + ConfigSpace.ConfigurationSpace + """ + seed = seed if seed is not None else np.random.randint(1, 100000) + fidel_space = CS.ConfigurationSpace(seed=seed) + + fidel_space.add_hyperparameters([ + CS.OrdinalHyperparameter('budget', sequence=[4, 12, 36, 108], default_value=108) + ]) + + return fidel_space + + @staticmethod + def _try_download_api_file(save_to: Union[Path, str, None]): + data_manager = NASBench_101DataManager(save_to) + data_manager.download() + return data_manager.save_dir + + def _mo_objective_function(self, configuration: Union[CS.Configuration, Dict], + fidelity: Union[CS.Configuration, Dict, None] = None, + run_index: Union[int, Tuple, None] = (0, 1, 2), + rng: Union[np.random.RandomState, int, None] = None, + **kwargs) -> Dict: """ Query the NAS-benchmark using a given configuration and a epoch (=budget). @@ -144,7 +205,12 @@ def objective_function(self, configuration: Union[CS.Configuration, Dict], Returns ------- Dict - - function_value : validation error + function_value : + misclassification_rate: float [0,1] (lower is better) + 1-accuracy on validation set + trainable_parameters: int [0, 10**8] (lower is better) + Number of trainable parameters in the network + cost : runtime info : Dict fidelity : used fidelities in this evaluation @@ -176,6 +242,7 @@ def objective_function(self, configuration: Union[CS.Configuration, Dict], test_accuracies = [] training_times = [] additional = {} + failure = False for run_id in run_index: data = self._query_benchmark(config=configuration, budget=fidelity['budget'], run_index=run_id) @@ -186,25 +253,31 @@ def objective_function(self, configuration: Union[CS.Configuration, Dict], training_times.append(data['training_time']) # Since those information are the same for all run ids, just store one of them. - additional = {'trainable_parameters': data['trainable_parameters'], + # Also, if the configuration is invalid, set the number of parameters to its upper limit. + trainable_parameters = data['trainable_parameters'] + failure = trainable_parameters == 0 + trainable_parameters = 10**8 if trainable_parameters == 0 else trainable_parameters + + additional = {'trainable_parameters': trainable_parameters, 'module_operations': data['module_operations']} - return {'function_value': float(1 - np.mean(valid_accuracies)), + return {'function_value': {'misclassification_rate': float(1 - np.mean(valid_accuracies)), + 'trainable_parameters': additional['trainable_parameters']}, 'cost': float(np.sum(training_times)), 'info': {'fidelity': fidelity, 'train_accuracies': train_accuracies, 'valid_accuracies': valid_accuracies, 'test_accuracies': test_accuracies, 'training_times': training_times, + 'failure': 1 if failure else 0, 'data': additional } } - @AbstractBenchmark.check_parameters - def objective_function_test(self, configuration: Union[Dict, CS.Configuration], - fidelity: Union[CS.Configuration, Dict, None] = None, - rng: Union[np.random.RandomState, int, None] = None, - **kwargs) -> Dict: + def _mo_objective_function_test(self, configuration: Union[Dict, CS.Configuration], + fidelity: Union[CS.Configuration, Dict, None] = None, + rng: Union[np.random.RandomState, int, None] = None, + **kwargs) -> Dict: """ Validate a configuration on the maximum available budget. @@ -222,83 +295,29 @@ def objective_function_test(self, configuration: Union[Dict, CS.Configuration], Returns ------- Dict - - function_value : test error + function_value : + misclassification_rate: float [0,1] (lower is better) + 1-accuracy on test set + trainable_parameters: int [0, 10**8] (lower is better) + Number of trainable parameters in the network + cost : runtime info : Dict fidelity : used fidelities in this evaluation """ - result = self.objective_function(configuration=configuration, fidelity=fidelity, run_index=(0, 1, 2), rng=rng) - result['function_value'] = float(1 - np.mean(result['info']['test_accuracies'])) + result = self._mo_objective_function( + configuration=configuration, fidelity=fidelity, run_index=(0, 1, 2), rng=rng + ) + result['function_value']['misclassification_rate'] = float(1 - np.mean(result['info']['test_accuracies'])) return result - @staticmethod - def get_configuration_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace: - raise NotImplementedError - - @staticmethod - def get_meta_information() -> Dict: - """ Returns the meta information for the benchmark """ - return {'name': 'Tabular Benchmarks for Hyperparameter Optimization and Neural Architecture Search', - 'references': ['@article{klein2019tabular,' - 'title = {Tabular benchmarks for joint architecture and hyperparameter optimization},' - 'author = {Klein, Aaron and Hutter, Frank},' - 'journal = {arXiv preprint arXiv:1905.04970},' - 'year = {2019}}', - 'https://arxiv.org/abs/1905.04970', - ], - 'code': 'https://github.com/automl/nas_benchmarks', - } - - @staticmethod - def _get_configuration_space(benchmark: Any, seed: Union[int, None] = None) -> CS.ConfigurationSpace: - """ Helper function to pass a seed to the configuration space """ - seed = seed if seed is not None else np.random.randint(1, 100000) - cs = benchmark.get_configuration_space() - cs.seed(seed) - return cs - - @staticmethod - def get_fidelity_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace: - """ - Creates a ConfigSpace.ConfigurationSpace containing all fidelity parameters for - the NAS Benchmark 101. - - Parameters - ---------- - seed : int, None - Fixing the seed for the ConfigSpace.ConfigurationSpace - - Returns - ------- - ConfigSpace.ConfigurationSpace - """ - seed = seed if seed is not None else np.random.randint(1, 100000) - fidel_space = CS.ConfigurationSpace(seed=seed) - - fidel_space.add_hyperparameters([ - CS.OrdinalHyperparameter('budget', sequence=[4, 12, 36, 108], default_value=108) - ]) - - return fidel_space - - @staticmethod - def _try_download_api_file(save_to: Union[Path, str, None]): - data_manager = NASBench_101DataManager(save_to) - data_manager.download() - return data_manager.save_dir - - -class NASCifar10ABenchmark(NASCifar10BaseBenchmark): - def __init__(self, data_path: Union[Path, str, None] = None, - rng: Union[np.random.RandomState, int, None] = None, **kwargs): - - data_path = self._try_download_api_file(data_path) +class _QueryA(_NAS101BaseBenchmark): + def __init__(self, **kwargs): from tabular_benchmarks.nas_cifar10 import NASCifar10A - benchmark = NASCifar10A(data_dir=str(data_path), multi_fidelity=True) - super(NASCifar10ABenchmark, self).__init__(benchmark=benchmark, data_path=data_path, rng=rng, **kwargs) + super(_QueryA, self).__init__(benchmark_type=NASCifar10A) @staticmethod def get_configuration_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace: @@ -315,7 +334,7 @@ def get_configuration_space(seed: Union[int, None] = None) -> CS.ConfigurationSp """ from tabular_benchmarks.nas_cifar10 import NASCifar10A - return NASCifar10BBenchmark._get_configuration_space(NASCifar10A, seed) + return _NAS101BaseBenchmark._get_configuration_space(NASCifar10A, seed) def _query_benchmark(self, config: Dict, run_index: int, budget: int = 108) -> Dict: """ @@ -372,15 +391,10 @@ def _query_benchmark(self, config: Dict, run_index: int, budget: int = 108) -> D return data -class NASCifar10BBenchmark(NASCifar10BaseBenchmark): - def __init__(self, data_path: Union[Path, str, None] = None, - rng: Union[np.random.RandomState, int, None] = None, **kwargs): - - data_path = self._try_download_api_file(data_path) - +class _QueryB(_NAS101BaseBenchmark): + def __init__(self, **kwargs): from tabular_benchmarks.nas_cifar10 import NASCifar10B - benchmark = NASCifar10B(data_dir=str(data_path), multi_fidelity=True) - super(NASCifar10BBenchmark, self).__init__(benchmark=benchmark, data_path=data_path, rng=rng, **kwargs) + super(_QueryB, self).__init__(benchmark_type=NASCifar10B, **kwargs) @staticmethod def get_configuration_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace: @@ -397,9 +411,10 @@ def get_configuration_space(seed: Union[int, None] = None) -> CS.ConfigurationSp """ from tabular_benchmarks.nas_cifar10 import NASCifar10B - return NASCifar10BBenchmark._get_configuration_space(NASCifar10B, seed) + return _NAS101BaseBenchmark._get_configuration_space(NASCifar10B, seed) def _query_benchmark(self, config: Dict, run_index: int, budget: int = 108) -> Dict: + """ Copied from the 'objective_function' from nas_cifar10.py We adapted the file in such a way, that the complete result is returned. The original implementation returns @@ -408,6 +423,8 @@ def _query_benchmark(self, config: Dict, run_index: int, budget: int = 108) -> D Parameters ---------- config : Dict + run_index : int + Specifies the seed to use. Can be one of 0, 1, 2. budget : int The number of epochs. Must be one of: 4 12 36 108. Otherwise a accuracy of 0 is returned. @@ -415,6 +432,7 @@ def _query_benchmark(self, config: Dict, run_index: int, budget: int = 108) -> D ------- Dict """ + failure = {"test_accuracy": 0, "train_accuracy": 0, "validation_accuracy": 0, "training_time": 0, "info": "failure", "trainable_parameters": 0, "module_operations": 0} @@ -439,6 +457,7 @@ def _query_benchmark(self, config: Dict, run_index: int, budget: int = 108) -> D labeling = [config["op_node_%d" % i] for i in range(5)] labeling = ['input'] + list(labeling) + ['output'] model_spec = api.ModelSpec(matrix, labeling) + try: data = modified_query(self.benchmark, run_index=run_index, model_spec=model_spec, epochs=budget) except api.OutOfDomainError: @@ -453,15 +472,10 @@ def _query_benchmark(self, config: Dict, run_index: int, budget: int = 108) -> D return data -class NASCifar10CBenchmark(NASCifar10BaseBenchmark): - def __init__(self, data_path: Union[Path, str, None] = None, - rng: Union[np.random.RandomState, int, None] = None, **kwargs): - - data_path = self._try_download_api_file(data_path) - +class _QueryC(_NAS101BaseBenchmark): + def __init__(self, **kwargs): from tabular_benchmarks.nas_cifar10 import NASCifar10C - benchmark = NASCifar10C(data_dir=str(data_path), multi_fidelity=True) - super(NASCifar10CBenchmark, self).__init__(benchmark=benchmark, data_path=data_path, rng=rng, **kwargs) + super(_QueryC, self).__init__(benchmark_type=NASCifar10C, **kwargs) @staticmethod def get_configuration_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace: @@ -478,7 +492,7 @@ def get_configuration_space(seed: Union[int, None] = None) -> CS.ConfigurationSp """ from tabular_benchmarks.nas_cifar10 import NASCifar10C - return NASCifar10BBenchmark._get_configuration_space(NASCifar10C, seed) + return _NAS101BaseBenchmark._get_configuration_space(NASCifar10C, seed) def _query_benchmark(self, config: Dict, run_index: int, budget: int = 108) -> Dict: """ @@ -538,6 +552,221 @@ def _query_benchmark(self, config: Dict, run_index: int, budget: int = 108) -> D return data +class _NASCifar10BaseMOBenchmark(_NAS101BaseBenchmark, AbstractMultiObjectiveBenchmark): + + # pylint: disable=arguments-differ + @AbstractMultiObjectiveBenchmark.check_parameters + def objective_function(self, configuration: Union[CS.Configuration, Dict], + fidelity: Union[CS.Configuration, Dict, None] = None, + run_index: Union[int, Tuple, None] = (0, 1, 2), + rng: Union[np.random.RandomState, int, None] = None, + **kwargs) -> Dict: + """ + Query the NAS-benchmark using a given configuration and a epoch (=budget). + + Parameters + ---------- + configuration : Dict, CS.Configuration + fidelity: Dict, None + Fidelity parameters, check get_fidelity_space(). Uses default (max) value if None. + run_index : int, Tuple, None + The nas benchmark has for each configuration-budget-pair results from 3 different runs. + - If multiple `run_id`s are given as Tuple, the benchmark returns the mean over the given runs. + - By default (no parameter is specified) all runs are used. A specific run can be chosen by setting the + `run_id` to a value from [0, 3]. While the performance is averaged across the `run_index`, the costs are + the sum of the runtime per `run_index`. + - When this value is explicitly set to `None`, the function will use a random seed. + rng : np.random.RandomState, int, None + Random seed to use in the benchmark. + + To prevent overfitting on a single seed, it is possible to pass a + parameter ``rng`` as 'int' or 'np.random.RandomState' to this function. + If this parameter is not given, the default random state is used. + kwargs + + Returns + ------- + Dict - + function_value : + misclassification_rate: float [0,1] (lower is better) + 1-accuracy on validation set + trainable_parameters: int [0, 10**8] (lower is better) + Number of trainable parameters in the network + + cost : runtime + info : Dict + fidelity : used fidelities in this evaluation + """ + return self._mo_objective_function( + configuration=configuration, fidelity=fidelity, run_index=run_index, rng=rng, **kwargs + ) + + @AbstractMultiObjectiveBenchmark.check_parameters + def objective_function_test(self, configuration: Union[Dict, CS.Configuration], + fidelity: Union[CS.Configuration, Dict, None] = None, + rng: Union[np.random.RandomState, int, None] = None, + **kwargs) -> Dict: + """ + Validate a configuration on the maximum available budget. + + Parameters + ---------- + configuration : Dict, CS.Configuration + fidelity: Dict, None + Fidelity parameters, check get_fidelity_space(). Uses default (max) value if None. + rng : np.random.RandomState, int, None + Random seed to use in the benchmark. To prevent overfitting on a single seed, it is + possible to pass a parameter ``rng`` as 'int' or 'np.random.RandomState' to this + function. If this parameter is not given, the default random state is used. + kwargs + + Returns + ------- + Dict - + function_value : + misclassification_rate: float [0,1] (lower is better) + 1-accuracy on test set + trainable_parameters: int [0, 10**8] (lower is better) + Number of trainable parameters in the network + + cost : runtime + info : Dict + fidelity : used fidelities in this evaluation + """ + + return self._mo_objective_function_test( + configuration=configuration, fidelity=fidelity, rng=rng, **kwargs + ) + + @staticmethod + def get_objective_names() -> List[str]: + return ['misclassification_rate', 'trainable_parameters'] + + +class _NASCifar10BaseSOBenchmark(_NAS101BaseBenchmark, AbstractBenchmark): + + # pylint: disable=arguments-differ + @AbstractBenchmark.check_parameters + def objective_function(self, configuration: Union[CS.Configuration, Dict], + fidelity: Union[CS.Configuration, Dict, None] = None, + run_index: Union[int, Tuple, None] = (0, 1, 2), + rng: Union[np.random.RandomState, int, None] = None, + **kwargs) -> Dict: + """ + Query the NAS-benchmark using a given configuration and a epoch (=budget). + + Parameters + ---------- + configuration : Dict, CS.Configuration + fidelity: Dict, None + Fidelity parameters, check get_fidelity_space(). Uses default (max) value if None. + run_index : int, Tuple, None + The nas benchmark has for each configuration-budget-pair results from 3 different runs. + - If multiple `run_id`s are given as Tuple, the benchmark returns the mean over the given runs. + - By default (no parameter is specified) all runs are used. A specific run can be chosen by setting the + `run_id` to a value from [0, 3]. While the performance is averaged across the `run_index`, the costs are + the sum of the runtime per `run_index`. + - When this value is explicitly set to `None`, the function will use a random seed. + rng : np.random.RandomState, int, None + Random seed to use in the benchmark. + + To prevent overfitting on a single seed, it is possible to pass a + parameter ``rng`` as 'int' or 'np.random.RandomState' to this function. + If this parameter is not given, the default random state is used. + kwargs + + Returns + ------- + Dict - + function_value : + misclassification_rate: float [0,1] (lower is better) + 1-accuracy on validation set + cost : runtime + info : Dict + fidelity : used fidelities in this evaluation + """ + result_dict = self._mo_objective_function( + configuration=configuration, fidelity=fidelity, run_index=run_index, rng=rng, **kwargs + ) + + # swap function_value dict to value + result_dict['function_value'] = result_dict['function_value']['misclassification_rate'] + return result_dict + + @AbstractBenchmark.check_parameters + def objective_function_test(self, configuration: Union[Dict, CS.Configuration], + fidelity: Union[CS.Configuration, Dict, None] = None, + rng: Union[np.random.RandomState, int, None] = None, + **kwargs) -> Dict: + """ + Validate a configuration on the maximum available budget. + + Parameters + ---------- + configuration : Dict, CS.Configuration + fidelity: Dict, None + Fidelity parameters, check get_fidelity_space(). Uses default (max) value if None. + rng : np.random.RandomState, int, None + Random seed to use in the benchmark. To prevent overfitting on a single seed, it is + possible to pass a parameter ``rng`` as 'int' or 'np.random.RandomState' to this + function. If this parameter is not given, the default random state is used. + kwargs + + Returns + ------- + Dict - + function_value : + equals misclassification_rate: float [0,1] (lower is better) + 1-accuracy on test set + cost : runtime + info : Dict + fidelity : used fidelities in this evaluation + """ + result_dict = self._mo_objective_function_test( + configuration=configuration, fidelity=fidelity, rng=rng, **kwargs + ) + + # swap function_value dict to value + result_dict['function_value'] = result_dict['function_value']['misclassification_rate'] + return result_dict + + +class NASCifar10ABenchmark(_QueryA, _NASCifar10BaseSOBenchmark): + def __init__(self, data_path: Union[Path, str, None] = None, + rng: Union[np.random.RandomState, int, None] = None, **kwargs): + super(NASCifar10ABenchmark, self).__init__(data_path=data_path, rng=rng, **kwargs) + + +class NASCifar10AMOBenchmark(_QueryA, _NASCifar10BaseMOBenchmark): + def __init__(self, data_path: Union[Path, str, None] = None, + rng: Union[np.random.RandomState, int, None] = None, **kwargs): + super(NASCifar10AMOBenchmark, self).__init__(data_path=data_path, rng=rng, **kwargs) + + +class NASCifar10BBenchmark(_QueryB, _NASCifar10BaseSOBenchmark): + def __init__(self, data_path: Union[Path, str, None] = None, + rng: Union[np.random.RandomState, int, None] = None, **kwargs): + super(NASCifar10BBenchmark, self).__init__(data_path=data_path, rng=rng, **kwargs) + + +class NASCifar10BMOBenchmark(_QueryB, _NASCifar10BaseMOBenchmark): + def __init__(self, data_path: Union[Path, str, None] = None, + rng: Union[np.random.RandomState, int, None] = None, **kwargs): + super(NASCifar10BMOBenchmark, self).__init__(data_path=data_path, rng=rng, **kwargs) + + +class NASCifar10CBenchmark(_QueryC, _NASCifar10BaseSOBenchmark): + def __init__(self, data_path: Union[Path, str, None] = None, + rng: Union[np.random.RandomState, int, None] = None, **kwargs): + super(NASCifar10CBenchmark, self).__init__(data_path=data_path, rng=rng, **kwargs) + + +class NASCifar10CMOBenchmark(_QueryC, _NASCifar10BaseMOBenchmark): + def __init__(self, data_path: Union[Path, str, None] = None, + rng: Union[np.random.RandomState, int, None] = None, **kwargs): + super(NASCifar10CMOBenchmark, self).__init__(data_path=data_path, rng=rng, **kwargs) + + def modified_query(benchmark, model_spec, run_index: int, epochs=108, stop_halfway=False): """ NOTE: @@ -607,3 +836,11 @@ def modified_query(benchmark, model_spec, run_index: int, epochs=108, stop_halfw benchmark.dataset.total_epochs_spent += epochs return data + + +__all__ = ["NASCifar10ABenchmark", + "NASCifar10AMOBenchmark", + "NASCifar10BBenchmark", + "NASCifar10BMOBenchmark", + "NASCifar10CBenchmark", + "NASCifar10CMOBenchmark"] diff --git a/hpobench/benchmarks/nas/nasbench_1shot1.py b/hpobench/benchmarks/nas/nasbench_1shot1.py index 4d8231a0..5d94631e 100644 --- a/hpobench/benchmarks/nas/nasbench_1shot1.py +++ b/hpobench/benchmarks/nas/nasbench_1shot1.py @@ -34,7 +34,7 @@ pip install .[nasbench_1shot1] pip install git+https://github.com/google-research/nasbench.git@master -git clone https://github.com/automl/nasbench-1shot1/tree/master/nasbench_analysis/ +git clone https://github.com/automl/nasbench-1shot1 3. Environment setup ==================== @@ -46,6 +46,9 @@ Changelog: ========== +0.0.5 +* Add MO Version + 0.0.4 * New container release due to a general change in the communication between container and HPOBench. Works with HPOBench >= v0.0.8 @@ -62,34 +65,33 @@ """ import logging - +from ast import literal_eval from pathlib import Path from typing import Union, Dict, Any, Tuple, List -from ast import literal_eval import ConfigSpace as CS import numpy as np from nasbench import api from nasbench.api import OutOfDomainError - -from hpobench.abstract_benchmark import AbstractBenchmark -from hpobench.util.data_manager import NASBench_101DataManager -from hpobench.util import rng_helper - from nasbench_analysis.search_spaces.search_space_1 import SearchSpace1 # noqa from nasbench_analysis.search_spaces.search_space_2 import SearchSpace2 # noqa from nasbench_analysis.search_spaces.search_space_3 import SearchSpace3 # noqa from nasbench_analysis.utils import INPUT, OUTPUT, CONV1X1, CONV3X3, MAXPOOL3X3 # noqa -__version__ = '0.0.4' +from hpobench.abstract_benchmark import AbstractSingleObjectiveBenchmark, AbstractMultiObjectiveBenchmark +from hpobench.util import rng_helper +from hpobench.util.data_manager import NASBench_101DataManager + +__version__ = '0.0.5' logger = logging.getLogger('NasBench1shot1') -class NASBench1shot1BaseBenchmark(AbstractBenchmark): +class _NASBench1shot1BaseBenchmark: + def __init__(self, data_path: Union[Path, str, None] = None, rng: Union[np.random.RandomState, int, None] = None): """ - Baseclass for the nasbench 1shot1 benchmarks. + Baseclass for the all nasbench 1shot1 benchmarks. Please install the benchmark first. Place the data under ``data_path``. Parameters @@ -99,18 +101,18 @@ def __init__(self, data_path: Union[Path, str, None] = None, rng : np.random.RandomState, int, None Random seed for the benchmarks """ - super(NASBench1shot1BaseBenchmark, self).__init__(rng=rng) + data_manager = NASBench_101DataManager(data_path) self.api = data_manager.load() self.search_space = None - - # pylint: disable=arguments-differ - @AbstractBenchmark.check_parameters - def objective_function(self, configuration: Union[CS.Configuration, Dict], - fidelity: Union[CS.Configuration, Dict, None] = None, - run_index: Union[int, Tuple, List, None] = (0, 1, 2), - rng: Union[np.random.RandomState, int, None] = None, - **kwargs) -> Dict: + self.rng = rng + super(_NASBench1shot1BaseBenchmark, self).__init__(rng=rng) + + def _mo_objective_function(self, configuration: Union[CS.Configuration, Dict], + fidelity: Union[CS.Configuration, Dict, None] = None, + run_index: Union[int, Tuple, List, None] = (0, 1, 2), + rng: Union[np.random.RandomState, int, None] = None, + **kwargs) -> Dict: """ Query the NAS1shot1-benchmark using a given configuration and an epoch (=budget). Only data for the budgets 4, 12, 36, 108 are available. @@ -171,7 +173,8 @@ def objective_function(self, configuration: Union[CS.Configuration, Dict], 'module_operations': data['module_operations']} failure = failure or ('info' in data and data['info'] == 'failure') - return {'function_value': float(1 - np.mean(valid_accuracies)), + return {'function_value': {'misclassification_rate': float(1 - np.mean(valid_accuracies)), + 'trainable_parameters': additional['trainable_parameters']}, 'cost': float(np.sum(training_times)), 'info': {'fidelity': fidelity, 'train_accuracies': train_accuracies, @@ -179,50 +182,24 @@ def objective_function(self, configuration: Union[CS.Configuration, Dict], 'test_accuracies': test_accuracies, 'training_times': training_times, 'data': additional, - 'failure': 'False' if not failure else 'True' + 'failure': 0 if not failure else 1 } } - @AbstractBenchmark.check_parameters - def objective_function_test(self, configuration: Union[Dict, CS.Configuration], - fidelity: Union[CS.Configuration, Dict, None] = None, - rng: Union[np.random.RandomState, int, None] = None, - **kwargs) -> Dict: - """ - Validate a configuration on the maximum available budget (108) and on all three seeds. - - Parameters - ---------- - configuration : Dict, CS.Configuration - fidelity: Dict, None - Fidelity parameters, check get_fidelity_space(). Uses default (max) value if None. - rng : np.random.RandomState, int, None - Random seed to use in the benchmark. To prevent overfitting on a single seed, it is - possible to pass a parameter ``rng`` as 'int' or 'np.random.RandomState' to this - function. If this parameter is not given, the default random state is used. - kwargs + def _mo_objective_function_test(self, configuration: Union[CS.Configuration, Dict], + fidelity: Union[CS.Configuration, Dict, None] = None, + rng: Union[np.random.RandomState, int, None] = None, + **kwargs) -> Dict: - Returns - ------- - Dict - - function_value : test error on largest fidelity. - cost : runtime - info : Dict - train_accuracies - test_accuracies - valid_accuracies - training_times - fidelity : used fidelities in this evaluation - data : additional data such as trainable parameters and used operations - """ assert fidelity['budget'] == 108, 'Only test data for the 108th epoch is available.' - result = self.objective_function(configuration=configuration, fidelity=fidelity, run_index=(0, 1, 2), rng=rng) - result['function_value'] = float(1 - np.mean(result['info']['test_accuracies'])) + result = self._mo_objective_function(configuration=configuration, fidelity=fidelity, + run_index=(0, 1, 2), rng=rng, **kwargs) + result['function_value']['misclassification_rate'] = float(1 - np.mean(result['info']['test_accuracies'])) return result @staticmethod def get_configuration_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace: - raise NotImplementedError + raise NotImplementedError() @staticmethod def get_fidelity_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace: @@ -264,7 +241,6 @@ def get_meta_information() -> Dict: } def _check_run_index(self, run_index): - if isinstance(run_index, int): assert 0 <= run_index <= 2, f'run_index must be in [0, 2], not {run_index}' run_index = (run_index, ) @@ -426,7 +402,223 @@ def _get_configuration_space(search_space: Any, seed: Union[int, None] = None) - return cs -class NASBench1shot1SearchSpace1Benchmark(NASBench1shot1BaseBenchmark): +class NASBench1shot1BaseMOBenchmark(_NASBench1shot1BaseBenchmark, AbstractMultiObjectiveBenchmark): + + # pylint: disable=arguments-differ + @AbstractMultiObjectiveBenchmark.check_parameters + def objective_function(self, configuration: Union[CS.Configuration, Dict], + fidelity: Union[CS.Configuration, Dict, None] = None, + run_index: Union[int, Tuple, List, None] = (0, 1, 2), + rng: Union[np.random.RandomState, int, None] = None, + **kwargs) -> Dict: + """ + Query the NAS1shot1-benchmark using a given configuration and an epoch (=budget). + Only data for the budgets 4, 12, 36, 108 are available. + + Parameters + ---------- + configuration : Dict, CS.Configuration + fidelity: Dict, None + Fidelity parameters, check get_fidelity_space(). Uses default (max) value if None. + run_index : int, Tuple, None + The nas benchmark has for each configuration-budget-pair results from 3 different runs. + - If multiple `run_id`s are given as Tuple/List, the benchmark returns the mean over the given runs. + - By default (no parameter is specified) all runs are used. A specific run can be chosen by setting the + `run_id` to a value from [0, 3]. While the performance is averaged across the `run_index`, the costs are + the sum of the runtime per `run_index`. + - When this value is explicitly set to `None`, the function will use a random seed. + rng : np.random.RandomState, int, None + Random seed to use in the benchmark. + + To prevent overfitting on a single seed, it is possible to pass a + parameter ``rng`` as 'int' or 'np.random.RandomState' to this function. + If this parameter is not given, the default random state is used. + kwargs + + Returns + ------- + Dict - + function_value : validation error + cost : runtime + info : Dict + train_accuracies + test_accuracies + valid_accuracies + training_times + fidelity : used fidelities in this evaluation + data : additional data such as trainable parameters and used operations + """ + return self._mo_objective_function(configuration=configuration, fidelity=fidelity, + run_index=run_index, rng=rng, **kwargs) + + @AbstractMultiObjectiveBenchmark.check_parameters + def objective_function_test(self, configuration: Union[Dict, CS.Configuration], + fidelity: Union[CS.Configuration, Dict, None] = None, + rng: Union[np.random.RandomState, int, None] = None, + **kwargs) -> Dict: + """ + Validate a configuration on the maximum available budget (108) and on all three seeds. + + Parameters + ---------- + configuration : Dict, CS.Configuration + fidelity: Dict, None + Fidelity parameters, check get_fidelity_space(). Uses default (max) value if None. + rng : np.random.RandomState, int, None + Random seed to use in the benchmark. To prevent overfitting on a single seed, it is + possible to pass a parameter ``rng`` as 'int' or 'np.random.RandomState' to this + function. If this parameter is not given, the default random state is used. + kwargs + + Returns + ------- + Dict - + function_value : test error on largest fidelity. + cost : runtime + info : Dict + train_accuracies + test_accuracies + valid_accuracies + training_times + fidelity : used fidelities in this evaluation + data : additional data such as trainable parameters and used operations + """ + return self._mo_objective_function_test(configuration=configuration, fidelity=fidelity, + rng=rng, **kwargs) + + @staticmethod + def get_objective_names() -> List[str]: + return ['misclassification_rate', 'trainable_parameters'] + + +class NASBench1shot1BaseSOBenchmark(_NASBench1shot1BaseBenchmark, AbstractSingleObjectiveBenchmark): + + # pylint: disable=arguments-differ + @AbstractSingleObjectiveBenchmark.check_parameters + def objective_function(self, configuration: Union[CS.Configuration, Dict], + fidelity: Union[CS.Configuration, Dict, None] = None, + run_index: Union[int, Tuple, List, None] = (0, 1, 2), + rng: Union[np.random.RandomState, int, None] = None, + **kwargs) -> Dict: + """ + Query the NAS1shot1-benchmark using a given configuration and an epoch (=budget). + Only data for the budgets 4, 12, 36, 108 are available. + + Parameters + ---------- + configuration : Dict, CS.Configuration + fidelity: Dict, None + Fidelity parameters, check get_fidelity_space(). Uses default (max) value if None. + run_index : int, Tuple, None + The nas benchmark has for each configuration-budget-pair results from 3 different runs. + - If multiple `run_id`s are given as Tuple/List, the benchmark returns the mean over the given runs. + - By default (no parameter is specified) all runs are used. A specific run can be chosen by setting the + `run_id` to a value from [0, 3]. While the performance is averaged across the `run_index`, the costs are + the sum of the runtime per `run_index`. + - When this value is explicitly set to `None`, the function will use a random seed. + rng : np.random.RandomState, int, None + Random seed to use in the benchmark. + + To prevent overfitting on a single seed, it is possible to pass a + parameter ``rng`` as 'int' or 'np.random.RandomState' to this function. + If this parameter is not given, the default random state is used. + kwargs + + Returns + ------- + Dict - + function_value : validation error + cost : runtime + info : Dict + train_accuracies + test_accuracies + valid_accuracies + training_times + fidelity : used fidelities in this evaluation + data : additional data such as trainable parameters and used operations + """ + result = self._mo_objective_function( + configuration=configuration, fidelity=fidelity, rng=rng, run_index=run_index, **kwargs + ) + result['info'].update(result['function_value']) + result['function_value'] = result['function_value']['misclassification_rate'] + return result + + @AbstractSingleObjectiveBenchmark.check_parameters + def objective_function_test(self, configuration: Union[Dict, CS.Configuration], + fidelity: Union[CS.Configuration, Dict, None] = None, + rng: Union[np.random.RandomState, int, None] = None, + **kwargs) -> Dict: + """ + Validate a configuration on the maximum available budget (108) and on all three seeds. + + Parameters + ---------- + configuration : Dict, CS.Configuration + fidelity: Dict, None + Fidelity parameters, check get_fidelity_space(). Uses default (max) value if None. + rng : np.random.RandomState, int, None + Random seed to use in the benchmark. To prevent overfitting on a single seed, it is + possible to pass a parameter ``rng`` as 'int' or 'np.random.RandomState' to this + function. If this parameter is not given, the default random state is used. + kwargs + + Returns + ------- + Dict - + function_value : test error on largest fidelity. + cost : runtime + info : Dict + train_accuracies + test_accuracies + valid_accuracies + training_times + fidelity : used fidelities in this evaluation + data : additional data such as trainable parameters and used operations + """ + + result = self._mo_objective_function_test( + configuration=configuration, fidelity=fidelity, rng=rng, **kwargs + ) + result['info'].update(result['function_value']) + result['function_value'] = result['function_value']['misclassification_rate'] + return result + + +class NASBench1shot1SearchSpace1MOBenchmark(NASBench1shot1BaseMOBenchmark): + def __init__(self, data_path: Union[Path, str, None] = None, + rng: Union[np.random.RandomState, int, None] = None): + super(NASBench1shot1SearchSpace1MOBenchmark, self).__init__(data_path=data_path, rng=rng) + self.search_space = SearchSpace1() + + @staticmethod + def get_configuration_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace: + return _NASBench1shot1BaseBenchmark._get_configuration_space(SearchSpace1(), seed) + + +class NASBench1shot1SearchSpace2MOBenchmark(NASBench1shot1BaseMOBenchmark): + def __init__(self, data_path: Union[Path, str, None] = None, + rng: Union[np.random.RandomState, int, None] = None): + super(NASBench1shot1SearchSpace2MOBenchmark, self).__init__(data_path=data_path, rng=rng) + self.search_space = SearchSpace2() + + @staticmethod + def get_configuration_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace: + return _NASBench1shot1BaseBenchmark._get_configuration_space(SearchSpace2(), seed) + + +class NASBench1shot1SearchSpace3MOBenchmark(NASBench1shot1BaseMOBenchmark): + def __init__(self, data_path: Union[Path, str, None] = None, + rng: Union[np.random.RandomState, int, None] = None): + super(NASBench1shot1SearchSpace3MOBenchmark, self).__init__(data_path=data_path, rng=rng) + self.search_space = SearchSpace3() + + @staticmethod + def get_configuration_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace: + return _NASBench1shot1BaseBenchmark._get_configuration_space(SearchSpace3(), seed) + + +class NASBench1shot1SearchSpace1Benchmark(NASBench1shot1BaseSOBenchmark): def __init__(self, data_path: Union[Path, str, None] = None, rng: Union[np.random.RandomState, int, None] = None): super(NASBench1shot1SearchSpace1Benchmark, self).__init__(data_path=data_path, rng=rng) @@ -434,10 +626,10 @@ def __init__(self, data_path: Union[Path, str, None] = None, @staticmethod def get_configuration_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace: - return NASBench1shot1BaseBenchmark._get_configuration_space(SearchSpace1(), seed) + return _NASBench1shot1BaseBenchmark._get_configuration_space(SearchSpace1(), seed) -class NASBench1shot1SearchSpace2Benchmark(NASBench1shot1BaseBenchmark): +class NASBench1shot1SearchSpace2Benchmark(NASBench1shot1BaseSOBenchmark): def __init__(self, data_path: Union[Path, str, None] = None, rng: Union[np.random.RandomState, int, None] = None): super(NASBench1shot1SearchSpace2Benchmark, self).__init__(data_path=data_path, rng=rng) @@ -445,10 +637,10 @@ def __init__(self, data_path: Union[Path, str, None] = None, @staticmethod def get_configuration_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace: - return NASBench1shot1BaseBenchmark._get_configuration_space(SearchSpace2(), seed) + return _NASBench1shot1BaseBenchmark._get_configuration_space(SearchSpace2(), seed) -class NASBench1shot1SearchSpace3Benchmark(NASBench1shot1BaseBenchmark): +class NASBench1shot1SearchSpace3Benchmark(NASBench1shot1BaseSOBenchmark): def __init__(self, data_path: Union[Path, str, None] = None, rng: Union[np.random.RandomState, int, None] = None): super(NASBench1shot1SearchSpace3Benchmark, self).__init__(data_path=data_path, rng=rng) @@ -456,4 +648,14 @@ def __init__(self, data_path: Union[Path, str, None] = None, @staticmethod def get_configuration_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace: - return NASBench1shot1BaseBenchmark._get_configuration_space(SearchSpace3(), seed) + return _NASBench1shot1BaseBenchmark._get_configuration_space(SearchSpace3(), seed) + + +__all__ = [ + "NASBench1shot1SearchSpace1Benchmark", + "NASBench1shot1SearchSpace2Benchmark", + "NASBench1shot1SearchSpace3Benchmark", + "NASBench1shot1SearchSpace1MOBenchmark", + "NASBench1shot1SearchSpace3MOBenchmark", + "NASBench1shot1SearchSpace3MOBenchmark", +] diff --git a/hpobench/benchmarks/nas/nasbench_201.py b/hpobench/benchmarks/nas/nasbench_201.py index 0c2324c2..1ca0beb3 100644 --- a/hpobench/benchmarks/nas/nasbench_201.py +++ b/hpobench/benchmarks/nas/nasbench_201.py @@ -30,6 +30,8 @@ 0.0.6 * Add the multiobjective version of this benchmark by returning flops, model size, latency and missclassification rate * Integrate #138: Improve the docstrings about the seeds. +* Scale the returned misclassification rate from range [0, 100] to [0, 1]. +* Improve naming in the result object ("*_precision" -> "*_misclassification_rate") 0.0.5 * Add for each benchmark a new one with a different fidelity space. @@ -51,25 +53,23 @@ * First implementation """ import logging -from typing import Union, Dict, List, Text, Tuple from copy import deepcopy +from typing import Union, Dict, List, Text, Tuple import ConfigSpace as CS import numpy as np import hpobench.util.rng_helper as rng_helper -from hpobench.abstract_benchmark import AbstractBenchmark, AbstractMultiObjectiveBenchmark - +from hpobench.abstract_benchmark import AbstractSingleObjectiveBenchmark, AbstractMultiObjectiveBenchmark from hpobench.util.data_manager import NASBench_201Data - __version__ = '0.0.6' MAX_NODES = 4 logger = logging.getLogger('NASBENCH201') -class NasBench201BaseMOBenchmark(AbstractMultiObjectiveBenchmark): +class _NasBench201BaseBenchmark: def __init__(self, dataset: str, rng: Union[np.random.RandomState, int, None] = None, **kwargs): """ @@ -153,13 +153,12 @@ def __init__(self, dataset: str, Random seed for the benchmark's random state. """ # noqa: E501 - super(NasBench201BaseMOBenchmark, self).__init__(rng=rng) - data_manager = NASBench_201Data(dataset=dataset) self.dataset = dataset self.data = data_manager.load() - self.config_to_structure = NasBench201BaseMOBenchmark.config_to_structure_func(max_nodes=MAX_NODES) + self.config_to_structure = _NasBench201BaseMOBenchmark.config_to_structure_func(max_nodes=MAX_NODES) + super(_NasBench201BaseBenchmark, self).__init__(rng=rng, **kwargs) def dataset_mapping(self, dataset): mapping = {'cifar10-valid': ('x-valid', 'ori-test'), @@ -167,76 +166,115 @@ def dataset_mapping(self, dataset): 'cifar100': ('ori-test', 'x-test')} return mapping[dataset] - # pylint: disable=arguments-differ - @AbstractMultiObjectiveBenchmark.check_parameters - def objective_function(self, configuration: Union[CS.Configuration, Dict], - fidelity: Union[Dict, CS.Configuration, None] = None, - rng: Union[np.random.RandomState, int, None] = None, - data_seed: Union[List, Tuple, int, None] = (777, 888, 999), - **kwargs) -> Dict: + @staticmethod + def config_to_structure_func(max_nodes: int): + """ + From https://github.com/D-X-Y/AutoDL-Projects/blob/master/exps/algos/BOHB.py + Author: https://github.com/D-X-Y [Xuanyi.Dong@student.uts.edu.au] """ - Objective function for the NASBench201 benchmark. - This functions sends a query to NASBench201 and evaluates the configuration. - As already explained in the class definition, different data sets are trained on different splits. - The table above gives a detailed summary over the available splits, epochs, and which identifier are used per - dataset. + def config_to_structure(config): + genotypes = [] + for i in range(1, max_nodes): + x_list = [] + for j in range(i): + node_str = f'{i}<-{j}' + op_name = config[node_str] + x_list.append((op_name, j)) + genotypes.append(tuple(x_list)) + return _NasBench201BaseMOBenchmark._Structure(genotypes) + + return config_to_structure + + @staticmethod + def get_configuration_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace: + """ + Return the CS representation of the search space. + From https://github.com/D-X-Y/AutoDL-Projects/blob/master/exps/algos/BOHB.py + Author: https://github.com/D-X-Y [Xuanyi.Dong@student.uts.edu.au] Parameters ---------- - configuration - fidelity: Dict, None - epoch: int - Values: [1, 200] - Number of epochs an architecture was trained. - Note: the number of epoch is 1 indexed! (Results after the first epoch: epoch = 1) + seed : int, None + Random seed for the configuration space. - Fidelity parameters, check get_fidelity_space(). Uses default (max) value if None. - rng : np.random.RandomState, int, None - Random seed to use in the benchmark. + Returns + ------- + CS.ConfigurationSpace - + Containing the benchmark's hyperparameter + """ + seed = seed if seed is not None else np.random.randint(1, 100000) + cs = CS.ConfigurationSpace(seed=seed) - To prevent overfitting on a single seed, it is possible to pass a - parameter ``rng`` as 'int' or 'np.random.RandomState' to this function. - If this parameter is not given, the default random state is used. - data_seed : List, Tuple, None, int - The nasbench_201 benchmark include for each run 3 different seeds: 777, 888, 999. - The user can specify which seed to use. If more than one seed is given, the results are averaged - across the seeds but then the training time is the sum of the costs per seed. - When this value is explicitly set to `None`, the function will chose randomly one out of [777, 888, 999]. + search_space = _NasBench201BaseMOBenchmark.get_search_spaces('cell', 'nas-bench-201') + hps = [CS.CategoricalHyperparameter(f'{i}<-{j}', search_space) for i in range(1, MAX_NODES) for j in range(i)] + cs.add_hyperparameters(hps) + return cs - Note: - For some architectures (configurations) no run was available. We've set missing values to an - available value from another seed. Therefore, it is possible that run results are exactly the same for - different seeds. + @staticmethod + def get_fidelity_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace: + """ + Creates a ConfigSpace.ConfigurationSpace containing all fidelity parameters for + the NAS Benchmark 201. - kwargs + Fidelities + ---------- + epoch: int + The loss / accuracy at `epoch`. Can be from 0 to 199. + + Parameters + ---------- + seed : int, None + Fixing the seed for the ConfigSpace.ConfigurationSpace Returns ------- - Dict - - function_value : Dict - misclassification_rate : float - 1 - validation accuracy - num_flops : float - Number of floating point operations in M - model_size : float - Model size in MB - latency : float - Time to evaluate a configuration in seconds - cost : time to train the network - info : Dict - train_precision : float - train_losses : float - train_cost : float - Time needed to train the network for 'epoch' many epochs. If more than one seed is given, - this field is the sum of the training time per network - eval_precision : float - eval_losses : float - eval_cost : float - Time needed to train the network for 'epoch many epochs plus the time to evaluate the network on the - evaluation split. If more than one seed is given, this field is the sum of the eval cost per network - fidelity : Dict - used fidelities in this evaluation + ConfigSpace.ConfigurationSpace + """ + seed = seed if seed is not None else np.random.randint(1, 100000) + fidel_space = CS.ConfigurationSpace(seed=seed) + + fidel_space.add_hyperparameters([ + CS.UniformIntegerHyperparameter('epoch', lower=1, upper=200, default_value=200) + ]) + return fidel_space + + @staticmethod + def get_meta_information() -> Dict: + """ Returns the meta information for the benchmark """ + return {'name': 'NAS-Bench-201: Extending the Scope of Reproducible Neural Architecture Search', + 'references': ['@article{dong2020bench,' + 'title = {Nas-bench-201: Extending the scope of reproducible neural ' + ' architecture search},' + 'author = {Dong, Xuanyi and Yang, Yi},' + 'journal = {arXiv preprint arXiv:2001.00326},' + 'year = {2020}}', + 'https://openreview.net/forum?id=HJxyZkBKDr', + ], + 'code': 'https://github.com/D-X-Y/AutoDL-Projects', + } + + @staticmethod + def get_search_spaces(xtype: str, name: str) -> List[Text]: + """ obtain the search space, i.e., a dict mapping the operation name into a python-function for this op + From https://github.com/D-X-Y/AutoDL-Projects/blob/master/lib/models/__init__.py + Author: https://github.com/D-X-Y [Xuanyi.Dong@student.uts.edu.au] """ + # pylint: disable=no-else-return + if xtype == 'cell': + NAS_BENCH_201 = ['none', 'skip_connect', 'nor_conv_1x1', 'nor_conv_3x3', 'avg_pool_3x3'] + SearchSpaceNames = {'nas-bench-201': NAS_BENCH_201} + assert name in SearchSpaceNames, 'invalid name [{:}] in {:}'.format(name, SearchSpaceNames.keys()) + return SearchSpaceNames[name] + else: + raise ValueError('invalid search-space type is {:}'.format(xtype)) + + def _mo_objective_function(self, configuration: Union[CS.Configuration, Dict], + fidelity: Union[Dict, CS.Configuration, None] = None, + rng: Union[np.random.RandomState, int, None] = None, + data_seed: Union[List, Tuple, int, None] = (777, 888, 999), + **kwargs) -> Dict: + self.rng = rng_helper.get_rng(rng) if isinstance(data_seed, (List, Tuple)): @@ -245,7 +283,7 @@ def objective_function(self, configuration: Union[CS.Configuration, Dict], logger.debug('There are some values more than once in the run_index. We remove the redundant entries.') data_seed = tuple(set(data_seed)) elif isinstance(data_seed, int): - data_seed = (data_seed, ) + data_seed = (data_seed,) elif data_seed is None: logger.debug('The data seed is explicitly set to None! A random seed will be selected.') data_seed = tuple(self.rng.choice((777, 888, 999), size=1)) @@ -254,7 +292,7 @@ def objective_function(self, configuration: Union[CS.Configuration, Dict], raise ValueError(f'data seed has unknown data type {type(data_seed)}, ' f'but should be tuple or int (777,888,999)') - assert len(set(data_seed) - {777, 888, 999}) == 0,\ + assert len(set(data_seed) - {777, 888, 999}) == 0, \ f'data seed can only contain the elements 777, 888, 999, but was {data_seed}' structure = self.config_to_structure(configuration) @@ -291,44 +329,112 @@ def objective_function(self, configuration: Union[CS.Configuration, Dict], return { 'function_value': { - 'misclassification_rate': float(100 - np.mean(valid_accuracies)), + # The original benchmark returned the accuracy with range [0, 100]. + # We cast it to a minimization problem with range [0-1] to have a more standardized return value. + 'misclassification_rate': 0.01 * float(100 - np.mean(valid_accuracies)), 'num_flops': float(np.mean(num_flops)), 'model_size': float(np.mean(model_size)), 'latency': float(np.mean(latency)), }, 'cost': float(np.sum(valid_times) + np.sum(train_times)), 'info': { - 'train_precision': float(100 - np.mean(train_accuracies)), + 'train_misclassification_rate': 0.01 * float(100 - np.mean(train_accuracies)), 'train_losses': float(np.mean(train_losses)), 'train_cost': float(np.sum(train_times)), - 'valid_precision': float(100 - np.mean(valid_accuracies)), + 'valid_misclassification_rate': 0.01 * float(100 - np.mean(valid_accuracies)), 'valid_losses': float(np.mean(valid_losses)), 'valid_cost': float(np.sum(valid_times) + np.sum(train_times)), - 'test_precision': float(100 - np.mean(test_accuracies)), + 'test_misclassification_rate': 0.01 * float(100 - np.mean(test_accuracies)), 'test_losses': float(np.mean(test_losses)), 'test_cost': float(np.sum(train_times)) + float(np.sum(test_times)), 'fidelity': fidelity } } + def _mo_objective_function_test(self, configuration: Union[CS.Configuration, Dict], + fidelity: Union[Dict, CS.Configuration, None] = None, + rng: Union[np.random.RandomState, int, None] = None, + **kwargs) -> Dict: + # The result dict should contain already all necessary information -> Just swap the function value from valid + # to test and the corresponding time cost + assert fidelity['epoch'] == 200, 'Only test data for the 200. epoch is available. ' + + if 'data_seed' in kwargs: + all_seeds_available = all([seed in kwargs['data_seed'] for seed in (777, 888, 999)]) + if not all_seeds_available: + logger.warning('You have not specified all available seeds for the ' + '`objective_function_test`. However, we are going to ignore them, ' + ' because we report test values only as mean across all seeds.' + f' Your given seeds: {kwargs["seed"]}') + del kwargs['data_seed'] + + result = self._mo_objective_function(configuration=configuration, fidelity=fidelity, + data_seed=(777, 888, 999), + rng=rng, **kwargs) + result['function_value']['misclassification_rate'] = result['info']['test_misclassification_rate'] + result['cost'] = result['info']['test_cost'] + return result + + class _Structure: + def __init__(self, genotype): + assert isinstance(genotype, (list, tuple)), 'invalid class of genotype : {:}'.format(type(genotype)) + self.node_num = len(genotype) + 1 + self.nodes = [] + self.node_N = [] + for idx, node_info in enumerate(genotype): + assert isinstance(node_info, (list, tuple)), 'invalid class of node_info : {:}'.format(type(node_info)) + assert len(node_info) >= 1, 'invalid length : {:}'.format(len(node_info)) + for node_in in node_info: + assert isinstance(node_in, (list, tuple)), 'invalid class of in-node : {:}'.format(type(node_in)) + assert len(node_in) == 2 and node_in[1] <= idx, 'invalid in-node : {:}'.format(node_in) + self.node_N.append(len(node_info)) + self.nodes.append(tuple(deepcopy(node_info))) + + def tostr(self): + """ Helper function: Create a string representation of the configuration """ + strings = [] + for node_info in self.nodes: + string = '|'.join([x[0] + '~{:}'.format(x[1]) for x in node_info]) + string = '|{:}|'.format(string) + strings.append(string) + return '+'.join(strings) + + def __repr__(self): + return ( + '{name}({node_num} nodes with {node_info})'.format(name=self.__class__.__name__, node_info=self.tostr(), + **self.__dict__)) + + def __len__(self): + return len(self.nodes) + 1 + + def __getitem__(self, index): + return self.nodes[index] + + +class _NasBench201BaseMOBenchmark(_NasBench201BaseBenchmark, AbstractMultiObjectiveBenchmark): + # pylint: disable=arguments-differ @AbstractMultiObjectiveBenchmark.check_parameters - def objective_function_test(self, configuration: Union[CS.Configuration, Dict], - fidelity: Union[Dict, None] = None, - rng: Union[np.random.RandomState, int, None] = None, - **kwargs) -> Dict: + def objective_function(self, configuration: Union[CS.Configuration, Dict], + fidelity: Union[Dict, CS.Configuration, None] = None, + rng: Union[np.random.RandomState, int, None] = None, + data_seed: Union[List, Tuple, int, None] = (777, 888, 999), + **kwargs) -> Dict: """ - Get the validated results from the NASBench201. Runs a given configuration on the largest budget (here: 200). - The test function uses all data set seeds (777, 888, 999). + Objective function for the NASBench201 benchmark. + This functions sends a query to NASBench201 and evaluates the configuration. + As already explained in the class definition, different data sets are trained on different splits. - See also :py:meth:`~hpobench.benchmarks.nas.nasbench_201.objective_function` + The table above gives a detailed summary over the available splits, epochs, and which identifier are used per + dataset. Parameters ---------- configuration fidelity: Dict, None - epoch: int - Values: [200] + epoch: int - Values: [1, 200] Number of epochs an architecture was trained. - Note: We only have test performance on the last epoch. + Note: the number of epoch is 1 indexed! (Results after the first epoch: epoch = 1) + Fidelity parameters, check get_fidelity_space(). Uses default (max) value if None. rng : np.random.RandomState, int, None Random seed to use in the benchmark. @@ -336,6 +442,16 @@ def objective_function_test(self, configuration: Union[CS.Configuration, Dict], To prevent overfitting on a single seed, it is possible to pass a parameter ``rng`` as 'int' or 'np.random.RandomState' to this function. If this parameter is not given, the default random state is used. + data_seed : List, Tuple, None, int + The nasbench_201 benchmark include for each run 3 different seeds: 777, 888, 999. + The user can specify which seed to use. If more than one seed is given, the results are averaged + across the seeds but then the training time is the sum of the costs per seed. + When this value is explicitly set to `None`, the function will chose randomly one out of [777, 888, 999]. + + Note: + For some architectures (configurations) no run was available. We've set missing values to an + available value from another seed. Therefore, it is possible that run results are exactly the same for + different seeds. kwargs @@ -344,292 +460,110 @@ def objective_function_test(self, configuration: Union[CS.Configuration, Dict], Dict - function_value : Dict misclassification_rate : float - 1 - test accuracy + 1 - validation accuracy num_flops : float Number of floating point operations in M model_size : float Model size in MB latency : float Time to evaluate a configuration in seconds - cost : time to the network + time to validate + cost : time to train the network info : Dict - train_precision - train_losses - train_cost - eval_precision - eval_losses - eval_cost - fidelity : used fidelities in this evaluation + train_misclassification_rate : float + train_losses : float + train_cost : float + Time needed to train the network for 'epoch' many epochs. If more than one seed is given, + this field is the sum of the training time per network + eval_misclassification_rate : float + eval_losses : float + eval_cost : float + Time needed to train the network for 'epoch many epochs plus the time to evaluate the network on the + evaluation split. If more than one seed is given, this field is the sum of the eval cost per network + fidelity : Dict + used fidelities in this evaluation """ + return self._mo_objective_function(configuration=configuration, fidelity=fidelity, rng=rng, data_seed=data_seed, + **kwargs) - # The result dict should contain already all necessary information -> Just swap the function value from valid - # to test and the corresponding time cost - assert fidelity['epoch'] == 200, 'Only test data for the 200. epoch is available. ' - - if 'data_seed' in kwargs: - all_seeds_available = all([seed in kwargs['data_seed'] for seed in (777, 888, 999)]) - if not all_seeds_available: - logger.warning('You have not specified all available seeds for the ' - '`objective_function_test`. However, we are going to ignore them, ' - ' because we report test values only as mean across all seeds.' - f' Your given seeds: {kwargs["seed"]}') - del kwargs['data_seed'] - - result = self.objective_function(configuration=configuration, fidelity=fidelity, - data_seed=(777, 888, 999), - rng=rng, **kwargs) - result['function_value']['misclassification_rate'] = result['info']['test_precision'] - result['cost'] = result['info']['test_cost'] - return result - - @staticmethod - def config_to_structure_func(max_nodes: int): - """ - From https://github.com/D-X-Y/AutoDL-Projects/blob/master/exps/algos/BOHB.py - Author: https://github.com/D-X-Y [Xuanyi.Dong@student.uts.edu.au] - """ - def config_to_structure(config): - genotypes = [] - for i in range(1, max_nodes): - x_list = [] - for j in range(i): - node_str = f'{i}<-{j}' - op_name = config[node_str] - x_list.append((op_name, j)) - genotypes.append(tuple(x_list)) - return NasBench201BaseMOBenchmark._Structure(genotypes) - return config_to_structure - - @staticmethod - def get_search_spaces(xtype: str, name: str) -> List[Text]: - """ obtain the search space, i.e., a dict mapping the operation name into a python-function for this op - From https://github.com/D-X-Y/AutoDL-Projects/blob/master/lib/models/__init__.py - Author: https://github.com/D-X-Y [Xuanyi.Dong@student.uts.edu.au] + @AbstractMultiObjectiveBenchmark.check_parameters + def objective_function_test(self, configuration: Union[CS.Configuration, Dict], + fidelity: Union[Dict, None] = None, + rng: Union[np.random.RandomState, int, None] = None, + **kwargs) -> Dict: """ - # pylint: disable=no-else-return - if xtype == 'cell': - NAS_BENCH_201 = ['none', 'skip_connect', 'nor_conv_1x1', 'nor_conv_3x3', 'avg_pool_3x3'] - SearchSpaceNames = {'nas-bench-201': NAS_BENCH_201} - assert name in SearchSpaceNames, 'invalid name [{:}] in {:}'.format(name, SearchSpaceNames.keys()) - return SearchSpaceNames[name] - else: - raise ValueError('invalid search-space type is {:}'.format(xtype)) + Get the validated results from the NASBench201. Runs a given configuration on the largest budget (here: 200). + The test function uses all data set seeds (777, 888, 999). - @staticmethod - def get_configuration_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace: - """ - Return the CS representation of the search space. - From https://github.com/D-X-Y/AutoDL-Projects/blob/master/exps/algos/BOHB.py - Author: https://github.com/D-X-Y [Xuanyi.Dong@student.uts.edu.au] + See also :py:meth:`~hpobench.benchmarks.nas.nasbench_201.objective_function` Parameters ---------- - seed : int, None - Random seed for the configuration space. - - Returns - ------- - CS.ConfigurationSpace - - Containing the benchmark's hyperparameter - """ - seed = seed if seed is not None else np.random.randint(1, 100000) - cs = CS.ConfigurationSpace(seed=seed) - - search_space = NasBench201BaseMOBenchmark.get_search_spaces('cell', 'nas-bench-201') - hps = [CS.CategoricalHyperparameter(f'{i}<-{j}', search_space) for i in range(1, MAX_NODES) for j in range(i)] - cs.add_hyperparameters(hps) - return cs - - @staticmethod - def get_fidelity_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace: - """ - Creates a ConfigSpace.ConfigurationSpace containing all fidelity parameters for - the NAS Benchmark 201. + configuration + fidelity: Dict, None + epoch: int - Values: [200] + Number of epochs an architecture was trained. + Note: We only have test performance on the last epoch. + Fidelity parameters, check get_fidelity_space(). Uses default (max) value if None. + rng : np.random.RandomState, int, None + Random seed to use in the benchmark. - Fidelities: - - epoch: int - The loss / accuracy at `epoch`. Can be from 0 to 199. + To prevent overfitting on a single seed, it is possible to pass a + parameter ``rng`` as 'int' or 'np.random.RandomState' to this function. + If this parameter is not given, the default random state is used. - Parameters - ---------- - seed : int, None - Fixing the seed for the ConfigSpace.ConfigurationSpace + kwargs Returns ------- - ConfigSpace.ConfigurationSpace + Dict - + function_value : Dict + misclassification_rate : float + 1 - test accuracy + num_flops : float + Number of floating point operations in M + model_size : float + Model size in MB + latency : float + Time to evaluate a configuration in seconds + cost : time to the network + time to validate + info : Dict + train_misclassification_rate + train_losses + train_cost + eval_misclassification_rate + eval_losses + eval_cost + fidelity : used fidelities in this evaluation """ - seed = seed if seed is not None else np.random.randint(1, 100000) - fidel_space = CS.ConfigurationSpace(seed=seed) - - fidel_space.add_hyperparameters([ - CS.UniformIntegerHyperparameter('epoch', lower=1, upper=200, default_value=200) - ]) - - return fidel_space + return self._mo_objective_function_test(configuration=configuration, fidelity=fidelity, rng=rng, **kwargs) @staticmethod def get_objective_names() -> List[str]: return ['misclassification_rate', 'num_flops', 'model_size', 'latency'] - @staticmethod - def get_meta_information() -> Dict: - """ Returns the meta information for the benchmark """ - return {'name': 'NAS-Bench-201: Extending the Scope of Reproducible Neural Architecture Search', - 'references': ['@article{dong2020bench,' - 'title = {Nas-bench-201: Extending the scope of reproducible neural ' - ' architecture search},' - 'author = {Dong, Xuanyi and Yang, Yi},' - 'journal = {arXiv preprint arXiv:2001.00326},' - 'year = {2020}}', - 'https://openreview.net/forum?id=HJxyZkBKDr', - ], - 'code': 'https://github.com/D-X-Y/AutoDL-Projects', - } - class _Structure: - def __init__(self, genotype): - assert isinstance(genotype, (list, tuple)), 'invalid class of genotype : {:}'.format(type(genotype)) - self.node_num = len(genotype) + 1 - self.nodes = [] - self.node_N = [] - for idx, node_info in enumerate(genotype): - assert isinstance(node_info, (list, tuple)), 'invalid class of node_info : {:}'.format(type(node_info)) - assert len(node_info) >= 1, 'invalid length : {:}'.format(len(node_info)) - for node_in in node_info: - assert isinstance(node_in, (list, tuple)), 'invalid class of in-node : {:}'.format(type(node_in)) - assert len(node_in) == 2 and node_in[1] <= idx, 'invalid in-node : {:}'.format(node_in) - self.node_N.append(len(node_info)) - self.nodes.append(tuple(deepcopy(node_info))) - - def tostr(self): - """ Helper function: Create a string representation of the configuration """ - strings = [] - for node_info in self.nodes: - string = '|'.join([x[0] + '~{:}'.format(x[1]) for x in node_info]) - string = '|{:}|'.format(string) - strings.append(string) - return '+'.join(strings) - - def __repr__(self): - return ( - '{name}({node_num} nodes with {node_info})'.format(name=self.__class__.__name__, node_info=self.tostr(), - **self.__dict__)) - - def __len__(self): - return len(self.nodes) + 1 - - def __getitem__(self, index): - return self.nodes[index] - - -class Cifar10ValidNasBench201MOBenchmark(NasBench201BaseMOBenchmark): +class Cifar10ValidNasBench201MOBenchmark(_NasBench201BaseMOBenchmark): def __init__(self, rng: Union[np.random.RandomState, int, None] = None, **kwargs): super(Cifar10ValidNasBench201MOBenchmark, self).__init__(dataset='cifar10-valid', rng=rng, **kwargs) -class Cifar100NasBench201MOBenchmark(NasBench201BaseMOBenchmark): +class Cifar100NasBench201MOBenchmark(_NasBench201BaseMOBenchmark): def __init__(self, rng: Union[np.random.RandomState, int, None] = None, **kwargs): super(Cifar100NasBench201MOBenchmark, self).__init__(dataset='cifar100', rng=rng, **kwargs) -class ImageNetNasBench201MOBenchmark(NasBench201BaseMOBenchmark): +class ImageNetNasBench201MOBenchmark(_NasBench201BaseMOBenchmark): def __init__(self, rng: Union[np.random.RandomState, int, None] = None, **kwargs): super(ImageNetNasBench201MOBenchmark, self).__init__(dataset='ImageNet16-120', rng=rng, **kwargs) -class NasBench201SOBenchmark(AbstractBenchmark): - def __init__(self, dataset: str, - rng: Union[np.random.RandomState, int, None] = None, **kwargs): - """ - Benchmark interface to the NASBench201 Benchmarks. The NASBench201 contains - results for architectures on 4 different data sets. - - We have split the "api" file from NASBench201 in separate files per data set. - The original "api" file contains all data sets, but loading this single file took too much RAM. - - We recommend to not call this base class directly but using the correct subclass below. - - The parameter ``dataset`` indicates which data set was used for training. - - For each data set the metrics - 'train_acc1es', 'train_losses', 'train_times', 'eval_acc1es', 'eval_times', 'eval_losses' are available. - However, the data sets report them on different data splits (train, train + valid, test, valid or test+valid). - - We summarize all information about the data sets in the following tables. - - Datastet Metric Avail.Epochs Explanation returned by HPOBENCH - ---------------------------------------------------------------------------------------- - cifar10-valid train [0-199] training set - cifar10-valid x-valid [0-199] validation set objective function - cifar10-valid x-test - cifar10-valid ori-test 199 test set objective function test - - cifar100 train [0-199] training set - cifar100 x-valid 199 validation set - cifar100 x-test 199 test set objective function test - cifar100 ori-test [0-199] validation + test set objective function - - ImageNet16-120 train [0-199] training set - ImageNet16-120 x-valid 199 validation set - ImageNet16-120 x-test 199 test set objective function test - ImageNet16-120 ori-test [0-199] validation + test set objective function - - - We have also extracted the incumbents per split. We report the incumbent accuracy and loss performance - i) by taking the maximum value across all seeds and configurations - ii) averaged across the three available seeds - - i) The best possible incumbents (NO AVG!) ii) The "average" incumbent - Datastet Metric (Index of Arch, Accuracy) (Index, Loss) (Index of Arch, Accuracy) (Index, Loss) - ---------------------------------------------------------------------------------------------------------------------------------------------------------- - cifar10-valid train (258, 100.0) (2778, 0.001179278278425336) (10154, 100) (2778, 0.0013082386429297428) - cifar10-valid x-valid (6111, 91.71999999023437) (14443, 0.3837750501537323) (6111, 91.60666665039064) (3888, 0.3894046771335602) - cifar10-valid x-test - cifar10-valid ori-test (14174, 91.65) (3385, 0.3850496160507202) (1459, 91.52333333333333) (3385, 0.3995230517864227) - - cifar100 train (9930, 99.948) (9930, 0.012630240231156348) (9930, 99.93733333333334) (9930, 0.012843489621082942) - cifar100 x-valid (13714, 73.71999998779297) (13934, 1.1490126512527465) (9930, 73.4933333577474) (7361, 1.1600867895126343) - cifar100 x-test (1459, 74.28000004882813) (15383, 1.1427113876342774) (9930, 73.51333332112631) (7337, 1.1747569534301758) - cifar100 ori-test (9930, 73.88) (13706, 1.1610547459602356) (9930, 73.50333333333333) (7361, 1.1696554500579834) - - ImageNet16-120 train (9930, 73.2524719841793) (9930, 0.9490517352046979) (9930, 73.22918040138735) (9930, 0.9524298415108582) - ImageNet16-120 x-valid (13778, 47.39999985758463) (10721, 2.0826991437276203) (10676, 46.73333327229818) (10721, 2.0915397168795264) - ImageNet16-120 x-test (857, 48.03333317057292) (12887, 2.0940088628133138) (857, 47.31111100599501) (11882, 2.106453532218933) - ImageNet16-120 ori-test (857, 47.083333353678384) (11882, 2.0950548852284747) (857, 46.8444444647895) (11882, 2.1028235816955565) - - - Note: - - The parameter epoch is 0 indexed! - - In the original data, the training splits are always marked with the key 'train' but they use different - identifiers to refer to the available evaluation splits. We report them also in the table below. - - We exclude the data set cifar10 from this benchmark. - - In NasBench201, not all architectures have values for the three seeds. To increase robustness, we have patched - missing values with the values from an available seed. - - Some further remarks: - - cifar10-valid is trained on the train split and tested on the validation split. - - The train metrics are dictionaries with epochs (e.g. 0, 1, 2) as key and the metric as value. - The evaluation metrics, however, have as key the identifiers, e.g. ori-test@0, with 0 indicating the epoch. - Also, each data set reports values for all 200 epochs for a metric on the specified split - and a single value on the 200th epoch for the other splits. - - Parameters - ---------- - dataset : str - One of cifar10-valid, cifar10, cifar100, ImageNet16-120. - rng : np.random.RandomState, int, None - Random seed for the benchmark's random state. - """ # noqa: E501 - - super(NasBench201SOBenchmark, self).__init__(rng=rng, **kwargs) - self.mo_benchmark = NasBench201BaseMOBenchmark(rng=rng, dataset=dataset, **kwargs) +class _NasBench201SOBenchmark(_NasBench201BaseBenchmark, AbstractSingleObjectiveBenchmark): # pylint: disable=arguments-differ - @AbstractBenchmark.check_parameters + @AbstractSingleObjectiveBenchmark.check_parameters def objective_function(self, configuration: Union[CS.Configuration, Dict], fidelity: Union[Dict, CS.Configuration, None] = None, rng: Union[np.random.RandomState, int, None] = None, @@ -674,15 +608,15 @@ def objective_function(self, configuration: Union[CS.Configuration, Dict], Returns ------- Dict - - function_value : training precision + function_value : training misclassification_rate cost : time to train the network info : Dict - train_precision : float + train_misclassification_rate : float train_losses : float train_cost : float Time needed to train the network for 'epoch' many epochs. If more than one seed is given, this field is the sum of the training time per network - eval_precision : float + eval_misclassification_rate : float eval_losses : float eval_cost : float Time needed to train the network for 'epoch many epochs plus the time to evaluate the network on the @@ -690,14 +624,13 @@ def objective_function(self, configuration: Union[CS.Configuration, Dict], fidelity : Dict used fidelities in this evaluation """ - results = self.mo_benchmark.objective_function( + results = self._mo_objective_function( configuration=configuration, fidelity=fidelity, rng=rng, data_seed=data_seed, **kwargs ) - results['function_value'] = results['function_value']['misclassification_rate'] return results - @AbstractBenchmark.check_parameters + @AbstractSingleObjectiveBenchmark.check_parameters def objective_function_test(self, configuration: Union[CS.Configuration, Dict], fidelity: Union[Dict, None] = None, rng: Union[np.random.RandomState, int, None] = None, @@ -729,90 +662,44 @@ def objective_function_test(self, configuration: Union[CS.Configuration, Dict], Returns ------- Dict - - function_value : evaluation precision + function_value : evaluation misclassification_rate cost : time to the network + time to validate info : Dict - train_precision + train_misclassification_rate train_losses train_cost - eval_precision + eval_misclassification_rate eval_losses eval_cost fidelity : used fidelities in this evaluation """ - results = self.mo_benchmark.objective_function_test( + results = self._mo_objective_function_test( configuration=configuration, fidelity=fidelity, rng=rng, **kwargs ) - results['function_value'] = results['function_value']['misclassification_rate'] return results - @staticmethod - def get_configuration_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace: - """ - Return the CS representation of the search space. - From https://github.com/D-X-Y/AutoDL-Projects/blob/master/exps/algos/BOHB.py - Author: https://github.com/D-X-Y [Xuanyi.Dong@student.uts.edu.au] - - Parameters - ---------- - seed : int, None - Random seed for the configuration space. - - Returns - ------- - CS.ConfigurationSpace - - Containing the benchmark's hyperparameter - """ - return NasBench201BaseMOBenchmark.get_configuration_space(seed=seed) - - @staticmethod - def get_fidelity_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace: - """ - Creates a ConfigSpace.ConfigurationSpace containing all fidelity parameters for - the NAS Benchmark 201. - - Fidelities: - - epoch: int - The loss / accuracy at `epoch`. Can be from 0 to 199. - - Parameters - ---------- - seed : int, None - Fixing the seed for the ConfigSpace.ConfigurationSpace - - Returns - ------- - ConfigSpace.ConfigurationSpace - """ - return NasBench201BaseMOBenchmark.get_fidelity_space(seed=seed) - - @staticmethod - def get_meta_information() -> Dict: - """ Returns the meta information for the benchmark """ - return NasBench201BaseMOBenchmark.get_meta_information() - -class Cifar10ValidNasBench201Benchmark(NasBench201SOBenchmark): +class Cifar10ValidNasBench201Benchmark(_NasBench201SOBenchmark): def __init__(self, rng: Union[np.random.RandomState, int, None] = None, **kwargs): super(Cifar10ValidNasBench201Benchmark, self).__init__(dataset='cifar10-valid', rng=rng, **kwargs) -class Cifar100NasBench201Benchmark(NasBench201SOBenchmark): +class Cifar100NasBench201Benchmark(_NasBench201SOBenchmark): def __init__(self, rng: Union[np.random.RandomState, int, None] = None, **kwargs): super(Cifar100NasBench201Benchmark, self).__init__(dataset='cifar100', rng=rng, **kwargs) -class ImageNetNasBench201Benchmark(NasBench201SOBenchmark): +class ImageNetNasBench201Benchmark(_NasBench201SOBenchmark): def __init__(self, rng: Union[np.random.RandomState, int, None] = None, **kwargs): super(ImageNetNasBench201Benchmark, self).__init__(dataset='ImageNet16-120', rng=rng, **kwargs) -class _NasBench201BaseBenchmarkOriginal(NasBench201SOBenchmark): +class _NasBench201SOBenchmarkOriginal(_NasBench201SOBenchmark): @staticmethod def get_fidelity_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace: @@ -825,7 +712,8 @@ def get_fidelity_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace: experiments from DEHB [DEHB](https://github.com/automl/DEHB/tree/937dd5cf48e79f6d587ea2ff408cb5ad9a8dce46/dehb/examples) - Fidelities: + Fidelities + ---------- epoch: int The loss / accuracy at `epoch`. @@ -851,26 +739,26 @@ def get_fidelity_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace: @staticmethod def get_meta_information() -> Dict: """ Returns the meta information for the benchmark """ - meta_information = NasBench201SOBenchmark.get_meta_information() + meta_information = _NasBench201SOBenchmark.get_meta_information() meta_information['note'] = \ 'This version of the benchmark implements the fidelity space defined in the DEHB paper.' \ 'See [DEHB](https://github.com/automl/DEHB/tree/937dd5cf48e79f6d587ea2ff408cb5ad9a8dce46/dehb/examples)' return meta_information -class Cifar10ValidNasBench201BenchmarkOriginal(_NasBench201BaseBenchmarkOriginal): +class Cifar10ValidNasBench201BenchmarkOriginal(_NasBench201SOBenchmarkOriginal): def __init__(self, rng: Union[np.random.RandomState, int, None] = None, **kwargs): super(Cifar10ValidNasBench201BenchmarkOriginal, self).__init__(dataset='cifar10-valid', rng=rng, **kwargs) -class Cifar100NasBench201BenchmarkOriginal(_NasBench201BaseBenchmarkOriginal): +class Cifar100NasBench201BenchmarkOriginal(_NasBench201SOBenchmarkOriginal): def __init__(self, rng: Union[np.random.RandomState, int, None] = None, **kwargs): super(Cifar100NasBench201BenchmarkOriginal, self).__init__(dataset='cifar100', rng=rng, **kwargs) -class ImageNetNasBench201BenchmarkOriginal(_NasBench201BaseBenchmarkOriginal): +class ImageNetNasBench201BenchmarkOriginal(_NasBench201SOBenchmarkOriginal): def __init__(self, rng: Union[np.random.RandomState, int, None] = None, **kwargs): super(ImageNetNasBench201BenchmarkOriginal, self).__init__(dataset='ImageNet16-120', rng=rng, **kwargs) diff --git a/hpobench/benchmarks/nas/tabular_benchmarks.py b/hpobench/benchmarks/nas/tabular_benchmarks.py index fd7404a0..5db34f2f 100644 --- a/hpobench/benchmarks/nas/tabular_benchmarks.py +++ b/hpobench/benchmarks/nas/tabular_benchmarks.py @@ -50,7 +50,6 @@ * First implementation """ import logging - from pathlib import Path from typing import Union, Dict, Tuple, List diff --git a/hpobench/benchmarks/rl/cartpole.py b/hpobench/benchmarks/rl/cartpole.py index 3bcaeab4..ea9ef053 100644 --- a/hpobench/benchmarks/rl/cartpole.py +++ b/hpobench/benchmarks/rl/cartpole.py @@ -20,12 +20,13 @@ """ import logging +import os import time from typing import Union, Dict import ConfigSpace as CS import numpy as np -import os + os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" import tensorflow as tf # noqa: E402 diff --git a/hpobench/benchmarks/surrogates/paramnet_benchmark.py b/hpobench/benchmarks/surrogates/paramnet_benchmark.py index 2e809b7b..35c7f80d 100644 --- a/hpobench/benchmarks/surrogates/paramnet_benchmark.py +++ b/hpobench/benchmarks/surrogates/paramnet_benchmark.py @@ -61,8 +61,8 @@ 0.0.1: * First implementation """ -import warnings import logging +import warnings from typing import Union, Dict import ConfigSpace as CS diff --git a/hpobench/benchmarks/surrogates/yahpo_gym.py b/hpobench/benchmarks/surrogates/yahpo_gym.py index 19522700..ad552acd 100644 --- a/hpobench/benchmarks/surrogates/yahpo_gym.py +++ b/hpobench/benchmarks/surrogates/yahpo_gym.py @@ -29,54 +29,104 @@ Changelog: ========== +0.0.2: + +* Add support for multi-objective benchmarks +* Add support for fairness benchmarks and interpretability benchmarks. +For these new benchmarks (fairness and interpretability), we recommend the following benchmarks and objectives: +For the entire list of available benchmarks, please take a look in the yahpo benchmark documentation. + +Benchmark Name | Scenario | Objectives +--------------------|---------------|-------------- +fair_fgrrm | 7592 | mmce, feo + | 14965 | mmce, feo +--------------------|---------------|-------------- +fair_rpart | 317599 | mmce, ffomr + | 7592 | mmce, feo +--------------------|---------------|-------------- +fair_ranger | 317599 | mmce, fpredp + | 14965 | mmce, fpredp +--------------------|---------------|-------------- +fair_xgboost | 317599 | mmce, ffomr + | 7592 | mmce, ffnr +--------------------|---------------|-------------- +fair_super | 14965 | mmce, feo + | 317599 | mmce, ffnr +--------------------|---------------|-------------- + + +Benchmark Name | Scenario | Objectives +--------------------|---------------|-------------- +iaml_glmnet | 1489 | mmce, nf + | 40981 | mmce, nf +--------------------|---------------|-------------- +iaml_rpart | 1489 | mmce, nf + | 41146 | mmce, nf +--------------------|---------------|-------------- +iaml_ranger | 40981 | mmce, nf + | 41146 | mmce, nf +--------------------|---------------|-------------- +iaml_xgboost | 40981 | mmce, nf + | 41146 | mmce, nf +--------------------|---------------|-------------- +iaml_super | 40981 | mmce, nf + | 41146 | mmce, nf +--------------------|---------------|-------------- + 0.0.1: * First implementation """ -import os import logging +from pathlib import Path from typing import Union, Dict, List import ConfigSpace as CS import numpy as np - from yahpo_gym.benchmark_set import BenchmarkSet -from hpobench.abstract_benchmark import AbstractMultiObjectiveBenchmark, AbstractBenchmark -__version__ = '0.0.1' +from hpobench.abstract_benchmark import AbstractMultiObjectiveBenchmark, AbstractSingleObjectiveBenchmark +from hpobench.util.data_manager import YAHPODataManager -logger = logging.getLogger('YAHPOGym') +__version__ = '0.0.2' +logger = logging.getLogger('YAHPOGym') -class YAHPOGymMOBenchmark(AbstractMultiObjectiveBenchmark): +class YAHPOGymBaseBenchmark: def __init__(self, scenario: str, instance: str, + data_dir: Union[Path, str, None] = None, + multi_thread: bool = True, rng: Union[np.random.RandomState, int, None] = None): """ - For a list of available scenarios and instances see - 'https://slds-lmu.github.io/yahpo_gym/scenarios.html' + Base Benchmark for all single and multi objective yahpo surrogate benchmarks. Parameters ---------- scenario : str - Name for the surrogate data. Must be one of ["lcbench", "fcnet", "nb301", "rbv2_svm", - "rbv2_ranger", "rbv2_rpart", "rbv2_glmnet", "rbv2_aknn", "rbv2_xgboost", "rbv2_super"] + Name for the surrogate data. Must be one of + ["lcbench", "fcnet", "nb301", "rbv2_svm", + "rbv2_ranger", "rbv2_rpart", "rbv2_glmnet", "rbv2_aknn", "rbv2_xgboost", "rbv2_super", + "fair_ranger", "fair_rpart", "fair_fgrrm", "fair_xgboost", "fair_super", + "iaml_ranger", "iaml_rpart", "iaml_glmnet", "iaml_xgboost", "iaml_super"] instance : str A valid instance for the scenario. See `self.benchset.instances`. + data_dir: Optional, str, Path + Directory, where the yahpo data is stored. + Download automatically from https://github.com/slds-lmu/yahpo_data/tree/fair + multi_thread: bool + Flag to run ONNX runtime with a single thread. Might be important on compute clusters. + Defaults to True rng : np.random.RandomState, int, None """ - - # When in the containerized version, redirect to the data inside the container. - if 'YAHPO_CONTAINER' in os.environ: - from yahpo_gym.local_config import LocalConfiguration - local_config = LocalConfiguration() - local_config.init_config(data_path='/home/data/yahpo_data') + self.data_manager = YAHPODataManager(data_dir=data_dir) + self.data_manager.load() self.scenario = scenario self.instance = instance - self.benchset = BenchmarkSet(scenario, active_session=True) + self.benchset = BenchmarkSet(scenario, active_session=True, multithread=multi_thread) self.benchset.set_instance(instance) logger.info(f'Start Benchmark for scenario {scenario} and instance {instance}') - super(YAHPOGymMOBenchmark, self).__init__(rng=rng) + super(YAHPOGymBaseBenchmark, self).__init__(rng=rng) # pylint: disable=arguments-differ def get_configuration_space(self, seed: Union[int, None] = None) -> CS.ConfigurationSpace: @@ -86,8 +136,7 @@ def get_configuration_space(self, seed: Union[int, None] = None) -> CS.Configura def get_fidelity_space(self, seed: Union[int, None] = None) -> CS.ConfigurationSpace: return self.benchset.get_fidelity_space(seed=seed) - @AbstractMultiObjectiveBenchmark.check_parameters - def objective_function(self, configuration: Union[CS.Configuration, Dict], + def _mo_objective_function(self, configuration: Union[CS.Configuration, Dict], fidelity: Union[CS.Configuration, Dict, None] = None, rng: Union[np.random.RandomState, int, None] = None, **kwargs) -> Dict: @@ -103,17 +152,6 @@ def objective_function(self, configuration: Union[CS.Configuration, Dict], "cost": cost, 'info': {'fidelity': fidelity}} - @AbstractMultiObjectiveBenchmark.check_parameters - def objective_function_test(self, configuration: Union[CS.Configuration, Dict], - fidelity: Union[CS.Configuration, Dict, None] = None, - rng: Union[np.random.RandomState, int, None] = None, **kwargs) \ - -> Dict: - return self.objective_function(configuration, fidelity=fidelity, rng=rng) - - # pylint: disable=arguments-differ - def get_objective_names(self) -> List[str]: - return self.benchset.config.y_names - @staticmethod def get_meta_information(): """ Returns the meta information for the benchmark """ @@ -130,9 +168,11 @@ def get_meta_information(): 'code': 'https://github.com/pfistfl/yahpo_gym/yahpo_gym'} -class YAHPOGymBenchmark(AbstractBenchmark): +class YAHPOGymMOBenchmark(YAHPOGymBaseBenchmark, AbstractMultiObjectiveBenchmark): def __init__(self, scenario: str, instance: str, objective: str = None, + data_dir: Union[Path, str, None] = None, + multi_thread: bool = True, rng: Union[np.random.RandomState, int, None] = None): """ For a list of available scenarios and instances see @@ -140,33 +180,88 @@ def __init__(self, scenario: str, instance: str, objective: str = None, Parameters ---------- scenario : str - Name for the surrogate data. Must be one of ["lcbench", "fcnet", "nb301", "rbv2_svm", - "rbv2_ranger", "rbv2_rpart", "rbv2_glmnet", "rbv2_aknn", "rbv2_xgboost", "rbv2_super"] + Name for the surrogate data. Must be one of + ["lcbench", "fcnet", "nb301", "rbv2_svm", + "rbv2_ranger", "rbv2_rpart", "rbv2_glmnet", "rbv2_aknn", "rbv2_xgboost", "rbv2_super", + "fair_ranger", "fair_rpart", "fair_fgrrm", "fair_xgboost", "fair_super", + "iaml_ranger", "iaml_rpart", "iaml_glmnet", "iaml_xgboost", "iaml_super"] + instance : str + A valid instance for the scenario. See `self.benchset.instances`. + data_dir: Optional, str, Path + Directory, where the yahpo data is stored. + Download automatically from https://github.com/slds-lmu/yahpo_data/tree/fair + multi_thread: bool + Flag to run ONNX runtime with a single thread. Might be important on compute clusters. + Defaults to True + rng : np.random.RandomState, int, None + """ + self.objective = objective + super(YAHPOGymMOBenchmark, self).__init__(scenario=scenario, instance=instance, rng=rng, data_dir=data_dir, multi_thread=multi_thread) + + @AbstractMultiObjectiveBenchmark.check_parameters + def objective_function(self, configuration: Union[CS.Configuration, Dict], + fidelity: Union[CS.Configuration, Dict, None] = None, + rng: Union[np.random.RandomState, int, None] = None, **kwargs) -> Dict: + + return self._mo_objective_function(configuration, fidelity, rng, **kwargs) + + @AbstractMultiObjectiveBenchmark.check_parameters + def objective_function_test(self, configuration: Union[CS.Configuration, Dict], + fidelity: Union[CS.Configuration, Dict, None] = None, + rng: Union[np.random.RandomState, int, None] = None, **kwargs) \ + -> Dict: + return self.objective_function(configuration, fidelity=fidelity, rng=rng) + + # pylint: disable=arguments-differ + def get_objective_names(self) -> List[str]: + return self.benchset.config.y_names + + +class YAHPOGymBenchmark(YAHPOGymBaseBenchmark, AbstractSingleObjectiveBenchmark): + + def __init__(self, scenario: str, instance: str, objective: str = None, + data_dir: Union[Path, str, None] = None, + multi_thread: bool = True, + rng: Union[np.random.RandomState, int, None] = None): + """ + For a list of available scenarios and instances see + 'https://slds-lmu.github.io/yahpo_gym/scenarios.html' + Parameters + ---------- + scenario : str + Name for the surrogate data. Must be one of + ["lcbench", "fcnet", "nb301", "rbv2_svm", + "rbv2_ranger", "rbv2_rpart", "rbv2_glmnet", "rbv2_aknn", "rbv2_xgboost", "rbv2_super", + "fair_ranger", "fair_rpart", "fair_fgrrm", "fair_xgboost", "fair_super", + "iaml_ranger", "iaml_rpart", "iaml_glmnet", "iaml_xgboost", "iaml_super"] instance : str A valid instance for the scenario. See `self.benchset.instances`. objective : str Name of the (single-crit) objective. See `self.benchset.config.y_names`. Initialized to None, picks the first element in y_names. + data_dir: Optional, str, Path + Directory, where the yahpo data is stored. + Download automatically from https://github.com/slds-lmu/yahpo_data/tree/fair + multi_thread: bool + Flag to run ONNX runtime with a single thread. Might be important on compute clusters. + Defaults to True rng : np.random.RandomState, int, None """ - - self.backbone = YAHPOGymMOBenchmark(scenario=scenario, instance=instance, rng=rng) self.objective = objective + super(YAHPOGymBenchmark, self).__init__(scenario=scenario, instance=instance, rng=rng, data_dir=data_dir, multi_thread=multi_thread) - super(YAHPOGymBenchmark, self).__init__(rng=rng) - - @AbstractBenchmark.check_parameters + @AbstractSingleObjectiveBenchmark.check_parameters def objective_function(self, configuration: Union[CS.Configuration, Dict], fidelity: Union[Dict, CS.Configuration, None] = None, rng: Union[np.random.RandomState, int, None] = None, **kwargs) -> Dict: - mo_results = self.backbone.objective_function(configuration=configuration, - fidelity=fidelity, - **kwargs) + mo_results = self._mo_objective_function(configuration=configuration, + fidelity=fidelity, + **kwargs) # If not objective is set, we just grab the first returned entry. if self.objective is None: - self.objective = self.backbone.benchset.config.y_names[0] + self.objective = self.benchset.config.y_names[0] obj_value = mo_results['function_value'][self.objective] @@ -174,20 +269,8 @@ def objective_function(self, configuration: Union[CS.Configuration, Dict], "cost": mo_results['cost'], 'info': {'fidelity': fidelity, 'objectives': mo_results['function_value']}} - @AbstractBenchmark.check_parameters + @AbstractSingleObjectiveBenchmark.check_parameters def objective_function_test(self, configuration: Union[CS.Configuration, Dict], fidelity: Union[Dict, CS.Configuration, None] = None, rng: Union[np.random.RandomState, int, None] = None, **kwargs) -> Dict: return self.objective_function(configuration, fidelity=fidelity, rng=rng) - - # pylint: disable=arguments-differ - def get_configuration_space(self, seed: Union[int, None] = None) -> CS.ConfigurationSpace: - return self.backbone.get_configuration_space(seed=seed) - - # pylint: disable=arguments-differ - def get_fidelity_space(self, seed: Union[int, None] = None) -> CS.ConfigurationSpace: - return self.backbone.get_fidelity_space(seed=seed) - - @staticmethod - def get_meta_information() -> Dict: - return YAHPOGymMOBenchmark.get_meta_information() diff --git a/hpobench/config.py b/hpobench/config.py index 9d7964e0..cd46c6e5 100644 --- a/hpobench/config.py +++ b/hpobench/config.py @@ -64,7 +64,16 @@ def __init__(self): # Options for the singularity container self.socket_dir = Path(self.socket_dir).expanduser().absolute() - self.container_dir = self.cache_dir / f'hpobench-{os.getuid()}' + + # os.getuid is only for posix os. Make it compatible with windows + # https://stackoverflow.com/questions/842059/is-there-a-portable-way-to-get-the-current-username-in-python + if os.name == 'nt': + import getpass + user_name = getpass.getuser() + else: + user_name = os.getuid() + + self.container_dir = self.cache_dir / f'hpobench-{user_name}' self.container_source = 'oras://gitlab.tf.uni-freiburg.de:5050/muelleph/hpobench-registry' self.pyro_connect_max_wait = 400 diff --git a/hpobench/container/benchmarks/ml/__init__.py b/hpobench/container/benchmarks/ml/__init__.py index ed2ce40f..f342f5f8 100644 --- a/hpobench/container/benchmarks/ml/__init__.py +++ b/hpobench/container/benchmarks/ml/__init__.py @@ -6,7 +6,7 @@ from hpobench.container.benchmarks.ml.svm_benchmark import SVMBenchmark, SVMBenchmarkBB, SVMBenchmarkMF from hpobench.container.benchmarks.ml.tabular_benchmark import TabularBenchmark from hpobench.container.benchmarks.ml.xgboost_benchmark import XGBoostBenchmark, XGBoostBenchmarkBB, XGBoostBenchmarkMF - +from hpobench.container.benchmarks.ml.yahpo_benchmark import YAHPOGymRawBenchmark, YAHPOGymMORawBenchmark __all__ = ['HistGBBenchmark', 'HistGBBenchmarkBB', 'HistGBBenchmarkMF', 'LRBenchmark', 'LRBenchmarkBB', 'LRBenchmarkMF', @@ -14,4 +14,5 @@ 'RandomForestBenchmark', 'RandomForestBenchmarkBB', 'RandomForestBenchmarkMF', 'SVMBenchmark', 'SVMBenchmarkBB', 'SVMBenchmarkMF', 'TabularBenchmark', - 'XGBoostBenchmark', 'XGBoostBenchmarkBB', 'XGBoostBenchmarkMF'] + 'XGBoostBenchmark', 'XGBoostBenchmarkBB', 'XGBoostBenchmarkMF', + 'YAHPOGymRawBenchmark', 'YAHPOGymMORawBenchmark'] diff --git a/hpobench/container/benchmarks/ml/lr_benchmark.py b/hpobench/container/benchmarks/ml/lr_benchmark.py index 979cda3e..61b80a13 100644 --- a/hpobench/container/benchmarks/ml/lr_benchmark.py +++ b/hpobench/container/benchmarks/ml/lr_benchmark.py @@ -6,27 +6,31 @@ from hpobench.container.client_abstract_benchmark import AbstractBenchmarkClient +container_name = "ml_mmfb" +container_version = "0.0.4" + + class LRBenchmark(AbstractBenchmarkClient): def __init__(self, **kwargs): kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'LRBenchmark') - kwargs['container_name'] = kwargs.get('container_name', 'ml_mmfb') - kwargs['latest'] = kwargs.get('container_tag', '0.0.1') + kwargs['container_name'] = kwargs.get('container_name', container_name) + kwargs['latest'] = kwargs.get('container_tag', container_version) super(LRBenchmark, self).__init__(**kwargs) class LRBenchmarkBB(AbstractBenchmarkClient): def __init__(self, **kwargs): kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'LRBenchmarkBB') - kwargs['container_name'] = kwargs.get('container_name', 'ml_mmfb') - kwargs['latest'] = kwargs.get('container_tag', '0.0.1') + kwargs['container_name'] = kwargs.get('container_name', container_name) + kwargs['latest'] = kwargs.get('container_tag', container_version) super(LRBenchmarkBB, self).__init__(**kwargs) class LRBenchmarkMF(AbstractBenchmarkClient): def __init__(self, **kwargs): kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'LRBenchmarkMF') - kwargs['container_name'] = kwargs.get('container_name', 'ml_mmfb') - kwargs['latest'] = kwargs.get('container_tag', '0.0.1') + kwargs['container_name'] = kwargs.get('container_name', container_name) + kwargs['latest'] = kwargs.get('container_tag', container_version) super(LRBenchmarkMF, self).__init__(**kwargs) diff --git a/hpobench/container/benchmarks/ml/nn_benchmark.py b/hpobench/container/benchmarks/ml/nn_benchmark.py index 04955e82..d4b0f52a 100644 --- a/hpobench/container/benchmarks/ml/nn_benchmark.py +++ b/hpobench/container/benchmarks/ml/nn_benchmark.py @@ -6,27 +6,31 @@ from hpobench.container.client_abstract_benchmark import AbstractBenchmarkClient +container_name = "ml_mmfb" +container_version = "0.0.4" + + class NNBenchmark(AbstractBenchmarkClient): def __init__(self, **kwargs): kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'NNBenchmark') - kwargs['container_name'] = kwargs.get('container_name', 'ml_mmfb') - kwargs['latest'] = kwargs.get('container_tag', '0.0.1') + kwargs['container_name'] = kwargs.get('container_name', container_name) + kwargs['latest'] = kwargs.get('container_tag', container_version) super(NNBenchmark, self).__init__(**kwargs) class NNBenchmarkBB(AbstractBenchmarkClient): def __init__(self, **kwargs): kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'NNBenchmarkBB') - kwargs['container_name'] = kwargs.get('container_name', 'ml_mmfb') - kwargs['latest'] = kwargs.get('container_tag', '0.0.1') + kwargs['container_name'] = kwargs.get('container_name', container_name) + kwargs['latest'] = kwargs.get('container_tag', container_version) super(NNBenchmarkBB, self).__init__(**kwargs) class NNBenchmarkMF(AbstractBenchmarkClient): def __init__(self, **kwargs): kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'NNBenchmarkMF') - kwargs['container_name'] = kwargs.get('container_name', 'ml_mmfb') - kwargs['latest'] = kwargs.get('container_tag', '0.0.1') + kwargs['container_name'] = kwargs.get('container_name', container_name) + kwargs['latest'] = kwargs.get('container_tag', container_version) super(NNBenchmarkMF, self).__init__(**kwargs) diff --git a/hpobench/container/benchmarks/ml/rf_benchmark.py b/hpobench/container/benchmarks/ml/rf_benchmark.py index a414349d..13e9bb47 100644 --- a/hpobench/container/benchmarks/ml/rf_benchmark.py +++ b/hpobench/container/benchmarks/ml/rf_benchmark.py @@ -6,27 +6,31 @@ from hpobench.container.client_abstract_benchmark import AbstractBenchmarkClient +container_name = "ml_mmfb" +container_version = "0.0.4" + + class RandomForestBenchmark(AbstractBenchmarkClient): def __init__(self, **kwargs): kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'RandomForestBenchmark') - kwargs['container_name'] = kwargs.get('container_name', 'ml_mmfb') - kwargs['latest'] = kwargs.get('container_tag', '0.0.1') + kwargs['container_name'] = kwargs.get('container_name', container_name) + kwargs['latest'] = kwargs.get('container_tag', container_version) super(RandomForestBenchmark, self).__init__(**kwargs) class RandomForestBenchmarkBB(AbstractBenchmarkClient): def __init__(self, **kwargs): kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'RandomForestBenchmarkBB') - kwargs['container_name'] = kwargs.get('container_name', 'ml_mmfb') - kwargs['latest'] = kwargs.get('container_tag', '0.0.1') + kwargs['container_name'] = kwargs.get('container_name', container_name) + kwargs['latest'] = kwargs.get('container_tag', container_version) super(RandomForestBenchmarkBB, self).__init__(**kwargs) class RandomForestBenchmarkMF(AbstractBenchmarkClient): def __init__(self, **kwargs): kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'RandomForestBenchmarkMF') - kwargs['container_name'] = kwargs.get('container_name', 'ml_mmfb') - kwargs['latest'] = kwargs.get('container_tag', '0.0.1') + kwargs['container_name'] = kwargs.get('container_name', container_name) + kwargs['latest'] = kwargs.get('container_tag', container_version) super(RandomForestBenchmarkMF, self).__init__(**kwargs) diff --git a/hpobench/container/benchmarks/ml/svm_benchmark.py b/hpobench/container/benchmarks/ml/svm_benchmark.py index 7547a81a..7a20f40b 100644 --- a/hpobench/container/benchmarks/ml/svm_benchmark.py +++ b/hpobench/container/benchmarks/ml/svm_benchmark.py @@ -6,27 +6,31 @@ from hpobench.container.client_abstract_benchmark import AbstractBenchmarkClient +container_name = "ml_mmfb" +container_version = "0.0.4" + + class SVMBenchmark(AbstractBenchmarkClient): def __init__(self, **kwargs): kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'SVMBenchmark') - kwargs['container_name'] = kwargs.get('container_name', 'ml_mmfb') - kwargs['latest'] = kwargs.get('container_tag', '0.0.1') + kwargs['container_name'] = kwargs.get('container_name', container_name) + kwargs['latest'] = kwargs.get('container_tag', container_version) super(SVMBenchmark, self).__init__(**kwargs) class SVMBenchmarkMF(AbstractBenchmarkClient): def __init__(self, **kwargs): kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'SVMBenchmarkMF') - kwargs['container_name'] = kwargs.get('container_name', 'ml_mmfb') - kwargs['latest'] = kwargs.get('container_tag', '0.0.1') + kwargs['container_name'] = kwargs.get('container_name', container_name) + kwargs['latest'] = kwargs.get('container_tag', container_version) super(SVMBenchmarkMF, self).__init__(**kwargs) class SVMBenchmarkBB(AbstractBenchmarkClient): def __init__(self, **kwargs): kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'SVMBenchmarkBB') - kwargs['container_name'] = kwargs.get('container_name', 'ml_mmfb') - kwargs['latest'] = kwargs.get('container_tag', '0.0.1') + kwargs['container_name'] = kwargs.get('container_name', container_name) + kwargs['latest'] = kwargs.get('container_tag', container_version) super(SVMBenchmarkBB, self).__init__(**kwargs) diff --git a/hpobench/container/benchmarks/ml/svm_benchmark_old.py b/hpobench/container/benchmarks/ml/svm_benchmark_old.py deleted file mode 100644 index 4955f057..00000000 --- a/hpobench/container/benchmarks/ml/svm_benchmark_old.py +++ /dev/null @@ -1,15 +0,0 @@ -#!/usr/bin/python3 -# -*- coding: utf-8 -*- - -""" Benchmark for the XGBoost Benchmark from hpobench/benchmarks/ml/xgboost_benchmark """ - -from hpobench.container.client_abstract_benchmark import AbstractBenchmarkClient - - -class SupportVectorMachine(AbstractBenchmarkClient): - def __init__(self, task_id: int, **kwargs): - kwargs['task_id'] = task_id - kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'SupportVectorMachine') - kwargs['container_name'] = kwargs.get('container_name', 'svm_benchmark') - kwargs['latest'] = kwargs.get('container_tag', '0.0.3') - super(SupportVectorMachine, self).__init__(**kwargs) diff --git a/hpobench/container/benchmarks/ml/tabular_benchmark.py b/hpobench/container/benchmarks/ml/tabular_benchmark.py index 6d19953b..5c8a22ef 100644 --- a/hpobench/container/benchmarks/ml/tabular_benchmark.py +++ b/hpobench/container/benchmarks/ml/tabular_benchmark.py @@ -6,11 +6,15 @@ from hpobench.container.client_abstract_benchmark import AbstractBenchmarkClient +container_name = "ml_tabular_benchmarks" +container_version = "0.0.4" + + class TabularBenchmark(AbstractBenchmarkClient): def __init__(self, **kwargs): kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'TabularBenchmark') - kwargs['container_name'] = kwargs.get('container_name', 'ml_tabular_benchmarks') - kwargs['latest'] = kwargs.get('container_tag', '0.0.1') + kwargs['container_name'] = kwargs.get('container_name', container_name) + kwargs['latest'] = kwargs.get('container_tag', container_version) super(TabularBenchmark, self).__init__(**kwargs) diff --git a/hpobench/container/benchmarks/ml/xgboost_benchmark.py b/hpobench/container/benchmarks/ml/xgboost_benchmark.py index c82ea606..726d6f45 100644 --- a/hpobench/container/benchmarks/ml/xgboost_benchmark.py +++ b/hpobench/container/benchmarks/ml/xgboost_benchmark.py @@ -6,36 +6,42 @@ from hpobench.container.client_abstract_benchmark import AbstractBenchmarkClient +container_name = "ml_mmfb" +container_version = "0.0.4" + + class XGBoostBenchmark(AbstractBenchmarkClient): def __init__(self, **kwargs): kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'XGBoostBenchmark') - kwargs['container_name'] = kwargs.get('container_name', 'ml_mmfb') - kwargs['latest'] = kwargs.get('container_tag', '0.0.1') + kwargs['container_name'] = kwargs.get('container_name', container_name) + kwargs['latest'] = kwargs.get('container_tag', container_version) super(XGBoostBenchmark, self).__init__(**kwargs) class XGBoostBenchmarkBB(AbstractBenchmarkClient): def __init__(self, **kwargs): kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'XGBoostBenchmarkBB') - kwargs['container_name'] = kwargs.get('container_name', 'ml_mmfb') - kwargs['latest'] = kwargs.get('container_tag', '0.0.1') + kwargs['container_name'] = kwargs.get('container_name', container_name) + kwargs['latest'] = kwargs.get('container_tag', container_version) super(XGBoostBenchmarkBB, self).__init__(**kwargs) class XGBoostBenchmarkMF(AbstractBenchmarkClient): def __init__(self, **kwargs): kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'XGBoostBenchmarkMF') - kwargs['container_name'] = kwargs.get('container_name', 'ml_mmfb') - kwargs['latest'] = kwargs.get('container_tag', '0.0.1') + kwargs['container_name'] = kwargs.get('container_name', container_name) + kwargs['latest'] = kwargs.get('container_tag', container_version) super(XGBoostBenchmarkMF, self).__init__(**kwargs) class XGBoostSearchSpace3Benchmark(AbstractBenchmarkClient): def __init__(self, **kwargs): kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'XGBoostSearchSpace3Benchmark') - kwargs['container_name'] = kwargs.get('container_name', 'ml_mmfb') - kwargs['latest'] = kwargs.get('container_tag', '0.0.1') + kwargs['container_name'] = kwargs.get('container_name', container_name) + kwargs['latest'] = kwargs.get('container_tag', container_version) super(XGBoostSearchSpace3Benchmark, self).__init__(**kwargs) -__all__ = ['XGBoostBenchmark', 'XGBoostBenchmarkBB', 'XGBoostBenchmarkMF'] +__all__ = [ + 'XGBoostBenchmark', 'XGBoostBenchmarkBB', 'XGBoostBenchmarkMF', 'XGBoostSearchSpace3Benchmark' +] diff --git a/hpobench/container/benchmarks/ml/xgboost_benchmark_old.py b/hpobench/container/benchmarks/ml/xgboost_benchmark_old.py deleted file mode 100644 index df475748..00000000 --- a/hpobench/container/benchmarks/ml/xgboost_benchmark_old.py +++ /dev/null @@ -1,24 +0,0 @@ -#!/usr/bin/python3 -# -*- coding: utf-8 -*- - -""" Benchmark for the XGBoost Benchmark from hpobench/benchmarks/ml/xgboost_benchmark """ - -from hpobench.container.client_abstract_benchmark import AbstractBenchmarkClient - - -class XGBoostBenchmark(AbstractBenchmarkClient): - def __init__(self, task_id: int, **kwargs): - kwargs['task_id'] = task_id - kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'XGBoostBenchmark') - kwargs['container_name'] = kwargs.get('container_name', 'xgboost_benchmark') - kwargs['latest'] = kwargs.get('container_tag', '0.0.3') - super(XGBoostBenchmark, self).__init__(**kwargs) - - -class XGBoostExtendedBenchmark(AbstractBenchmarkClient): - def __init__(self, task_id: int, **kwargs): - kwargs['task_id'] = task_id - kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'XGBoostExtendedBenchmark') - kwargs['container_name'] = kwargs.get('container_name', 'xgboost_benchmark') - kwargs['latest'] = kwargs.get('container_tag', '0.0.3') - super(XGBoostExtendedBenchmark, self).__init__(**kwargs) diff --git a/hpobench/container/benchmarks/ml/yahpo_benchmark.py b/hpobench/container/benchmarks/ml/yahpo_benchmark.py new file mode 100644 index 00000000..e4d9cf0c --- /dev/null +++ b/hpobench/container/benchmarks/ml/yahpo_benchmark.py @@ -0,0 +1,21 @@ +#!/usr/bin/python3 +# -*- coding: utf-8 -*- + +from hpobench.container.client_abstract_benchmark import AbstractMOBenchmarkClient, \ + AbstractBenchmarkClient + + +class YAHPOGymMORawBenchmark(AbstractMOBenchmarkClient): + def __init__(self, **kwargs): + kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'YAHPOGymMORawBenchmark') + kwargs['container_name'] = kwargs.get('container_name', 'yahpo_raw') + kwargs['latest'] = kwargs.get('container_tag', '0.0.1') + super(YAHPOGymMORawBenchmark, self).__init__(**kwargs) + + +class YAHPOGymRawBenchmark(AbstractBenchmarkClient): + def __init__(self, **kwargs): + kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'YAHPOGymRawBenchmark') + kwargs['container_name'] = kwargs.get('container_name', 'yahpo_raw') + kwargs['latest'] = kwargs.get('container_tag', '0.0.1') + super(YAHPOGymRawBenchmark, self).__init__(**kwargs) diff --git a/hpobench/container/benchmarks/mo/adult_benchmark.py b/hpobench/container/benchmarks/mo/adult_benchmark.py index dbdcaf4d..34baf1b9 100644 --- a/hpobench/container/benchmarks/mo/adult_benchmark.py +++ b/hpobench/container/benchmarks/mo/adult_benchmark.py @@ -8,5 +8,5 @@ class AdultBenchmark(AbstractBenchmarkClient): def __init__(self, **kwargs): kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'AdultBenchmark') kwargs['container_name'] = kwargs.get('container_name', 'fair_adult') - kwargs['latest'] = kwargs.get('container_tag', '0.0.1') + kwargs['latest'] = kwargs.get('container_tag', '0.0.2') super(AdultBenchmark, self).__init__(**kwargs) diff --git a/hpobench/container/benchmarks/mo/cnn_benchmark.py b/hpobench/container/benchmarks/mo/cnn_benchmark.py index c9a1d009..9e5cfe6f 100644 --- a/hpobench/container/benchmarks/mo/cnn_benchmark.py +++ b/hpobench/container/benchmarks/mo/cnn_benchmark.py @@ -8,7 +8,7 @@ class FlowerCNNBenchmark(AbstractMOBenchmarkClient): def __init__(self, **kwargs): kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'FlowerCNNBenchmark') kwargs['container_name'] = kwargs.get('container_name', 'mo_cnn') - kwargs['latest'] = kwargs.get('container_tag', '0.0.1') + kwargs['latest'] = kwargs.get('container_tag', '0.0.2') kwargs['gpu'] = kwargs.get('gpu', True) super(FlowerCNNBenchmark, self).__init__(**kwargs) @@ -17,6 +17,6 @@ class FashionCNNBenchmark(AbstractMOBenchmarkClient): def __init__(self, **kwargs): kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'FashionCNNBenchmark') kwargs['container_name'] = kwargs.get('container_name', 'mo_cnn') - kwargs['latest'] = kwargs.get('container_tag', '0.0.1') + kwargs['latest'] = kwargs.get('container_tag', '0.0.2') kwargs['gpu'] = kwargs.get('gpu', True) super(FashionCNNBenchmark, self).__init__(**kwargs) diff --git a/hpobench/container/benchmarks/nas/nasbench_101.py b/hpobench/container/benchmarks/nas/nasbench_101.py index 7984d786..a47e96a2 100644 --- a/hpobench/container/benchmarks/nas/nasbench_101.py +++ b/hpobench/container/benchmarks/nas/nasbench_101.py @@ -3,14 +3,14 @@ """ Benchmark for the Tabular Benchmark from hpobench/benchmarks/nas/nasbench_101.py """ -from hpobench.container.client_abstract_benchmark import AbstractBenchmarkClient +from hpobench.container.client_abstract_benchmark import AbstractBenchmarkClient, AbstractMOBenchmarkClient class NASCifar10ABenchmark(AbstractBenchmarkClient): def __init__(self, **kwargs): kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'NASCifar10ABenchmark') kwargs['container_name'] = kwargs.get('container_name', 'nasbench_101') - kwargs['latest'] = kwargs.get('container_tag', '0.0.4') + kwargs['latest'] = kwargs.get('container_tag', '0.0.5') super(NASCifar10ABenchmark, self).__init__(**kwargs) @@ -18,7 +18,7 @@ class NASCifar10BBenchmark(AbstractBenchmarkClient): def __init__(self, **kwargs): kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'NASCifar10BBenchmark') kwargs['container_name'] = kwargs.get('container_name', 'nasbench_101') - kwargs['latest'] = kwargs.get('container_tag', '0.0.4') + kwargs['latest'] = kwargs.get('container_tag', '0.0.5') super(NASCifar10BBenchmark, self).__init__(**kwargs) @@ -26,5 +26,29 @@ class NASCifar10CBenchmark(AbstractBenchmarkClient): def __init__(self, **kwargs): kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'NASCifar10CBenchmark') kwargs['container_name'] = kwargs.get('container_name', 'nasbench_101') - kwargs['latest'] = kwargs.get('container_tag', '0.0.4') + kwargs['latest'] = kwargs.get('container_tag', '0.0.5') super(NASCifar10CBenchmark, self).__init__(**kwargs) + + +class NASCifar10AMOBenchmark(AbstractMOBenchmarkClient): + def __init__(self, **kwargs): + kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'NASCifar10AMOBenchmark') + kwargs['container_name'] = kwargs.get('container_name', 'nasbench_101') + kwargs['latest'] = kwargs.get('container_tag', '0.0.5') + super(NASCifar10AMOBenchmark, self).__init__(**kwargs) + + +class NASCifar10BMOBenchmark(AbstractMOBenchmarkClient): + def __init__(self, **kwargs): + kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'NASCifar10BMOBenchmark') + kwargs['container_name'] = kwargs.get('container_name', 'nasbench_101') + kwargs['latest'] = kwargs.get('container_tag', '0.0.5') + super(NASCifar10BMOBenchmark, self).__init__(**kwargs) + + +class NASCifar10CMOBenchmark(AbstractMOBenchmarkClient): + def __init__(self, **kwargs): + kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'NASCifar10CMOBenchmark') + kwargs['container_name'] = kwargs.get('container_name', 'nasbench_101') + kwargs['latest'] = kwargs.get('container_tag', '0.0.5') + super(NASCifar10CMOBenchmark, self).__init__(**kwargs) diff --git a/hpobench/container/benchmarks/nas/nasbench_1shot1.py b/hpobench/container/benchmarks/nas/nasbench_1shot1.py index a88dcf9a..bef0bf16 100644 --- a/hpobench/container/benchmarks/nas/nasbench_1shot1.py +++ b/hpobench/container/benchmarks/nas/nasbench_1shot1.py @@ -3,14 +3,14 @@ """ Benchmark for the nasbench 1shot1 benchmarks from hpobench/benchmarks/nas/nasbench_1shot1.py """ -from hpobench.container.client_abstract_benchmark import AbstractBenchmarkClient +from hpobench.container.client_abstract_benchmark import AbstractBenchmarkClient, AbstractMOBenchmarkClient class NASBench1shot1SearchSpace1Benchmark(AbstractBenchmarkClient): def __init__(self, **kwargs): kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'NASBench1shot1SearchSpace1Benchmark') kwargs['container_name'] = kwargs.get('container_name', 'nasbench_1shot1') - kwargs['latest'] = kwargs.get('container_tag', '0.0.4') + kwargs['latest'] = kwargs.get('container_tag', '0.0.5') super(NASBench1shot1SearchSpace1Benchmark, self).__init__(**kwargs) @@ -18,7 +18,7 @@ class NASBench1shot1SearchSpace2Benchmark(AbstractBenchmarkClient): def __init__(self, **kwargs): kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'NASBench1shot1SearchSpace2Benchmark') kwargs['container_name'] = kwargs.get('container_name', 'nasbench_1shot1') - kwargs['latest'] = kwargs.get('container_tag', '0.0.4') + kwargs['latest'] = kwargs.get('container_tag', '0.0.5') super(NASBench1shot1SearchSpace2Benchmark, self).__init__(**kwargs) @@ -26,5 +26,29 @@ class NASBench1shot1SearchSpace3Benchmark(AbstractBenchmarkClient): def __init__(self, **kwargs): kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'NASBench1shot1SearchSpace3Benchmark') kwargs['container_name'] = kwargs.get('container_name', 'nasbench_1shot1') - kwargs['latest'] = kwargs.get('container_tag', '0.0.4') + kwargs['latest'] = kwargs.get('container_tag', '0.0.5') super(NASBench1shot1SearchSpace3Benchmark, self).__init__(**kwargs) + + +class NASBench1shot1SearchSpace1MOBenchmark(AbstractMOBenchmarkClient): + def __init__(self, **kwargs): + kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'NASBench1shot1SearchSpace1MOBenchmark') + kwargs['container_name'] = kwargs.get('container_name', 'nasbench_1shot1') + kwargs['latest'] = kwargs.get('container_tag', '0.0.5') + super(NASBench1shot1SearchSpace1MOBenchmark, self).__init__(**kwargs) + + +class NASBench1shot1SearchSpace2MOBenchmark(AbstractMOBenchmarkClient): + def __init__(self, **kwargs): + kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'NASBench1shot1SearchSpace2MOBenchmark') + kwargs['container_name'] = kwargs.get('container_name', 'nasbench_1shot1') + kwargs['latest'] = kwargs.get('container_tag', '0.0.5') + super(NASBench1shot1SearchSpace2MOBenchmark, self).__init__(**kwargs) + + +class NASBench1shot1SearchSpace3MOBenchmark(AbstractMOBenchmarkClient): + def __init__(self, **kwargs): + kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'NASBench1shot1SearchSpace3MOBenchmark') + kwargs['container_name'] = kwargs.get('container_name', 'nasbench_1shot1') + kwargs['latest'] = kwargs.get('container_tag', '0.0.5') + super(NASBench1shot1SearchSpace3MOBenchmark, self).__init__(**kwargs) diff --git a/hpobench/container/benchmarks/nas/nasbench_201.py b/hpobench/container/benchmarks/nas/nasbench_201.py index 2a948c6b..83b6f488 100644 --- a/hpobench/container/benchmarks/nas/nasbench_201.py +++ b/hpobench/container/benchmarks/nas/nasbench_201.py @@ -10,7 +10,7 @@ class Cifar10ValidNasBench201Benchmark(AbstractBenchmarkClient): def __init__(self, **kwargs): kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'Cifar10ValidNasBench201Benchmark') kwargs['container_name'] = kwargs.get('container_name', 'nasbench_201') - kwargs['latest'] = kwargs.get('container_tag', '0.0.5') + kwargs['latest'] = kwargs.get('container_tag', '0.0.6') super(Cifar10ValidNasBench201Benchmark, self).__init__(**kwargs) @@ -18,7 +18,7 @@ class Cifar100NasBench201Benchmark(AbstractBenchmarkClient): def __init__(self, **kwargs): kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'Cifar100NasBench201Benchmark') kwargs['container_name'] = kwargs.get('container_name', 'nasbench_201') - kwargs['latest'] = kwargs.get('container_tag', '0.0.5') + kwargs['latest'] = kwargs.get('container_tag', '0.0.6') super(Cifar100NasBench201Benchmark, self).__init__(**kwargs) @@ -26,7 +26,7 @@ class ImageNetNasBench201Benchmark(AbstractBenchmarkClient): def __init__(self, **kwargs): kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'ImageNetNasBench201Benchmark') kwargs['container_name'] = kwargs.get('container_name', 'nasbench_201') - kwargs['latest'] = kwargs.get('container_tag', '0.0.5') + kwargs['latest'] = kwargs.get('container_tag', '0.0.6') super(ImageNetNasBench201Benchmark, self).__init__(**kwargs) @@ -34,7 +34,7 @@ class Cifar10ValidNasBench201BenchmarkOriginal(AbstractBenchmarkClient): def __init__(self, **kwargs): kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'Cifar10ValidNasBench201BenchmarkOriginal') kwargs['container_name'] = kwargs.get('container_name', 'nasbench_201') - kwargs['latest'] = kwargs.get('container_tag', '0.0.5') + kwargs['latest'] = kwargs.get('container_tag', '0.0.6') super(Cifar10ValidNasBench201BenchmarkOriginal, self).__init__(**kwargs) @@ -42,7 +42,7 @@ class Cifar100NasBench201BenchmarkOriginal(AbstractBenchmarkClient): def __init__(self, **kwargs): kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'Cifar100NasBench201BenchmarkOriginal') kwargs['container_name'] = kwargs.get('container_name', 'nasbench_201') - kwargs['latest'] = kwargs.get('container_tag', '0.0.5') + kwargs['latest'] = kwargs.get('container_tag', '0.0.6') super(Cifar100NasBench201BenchmarkOriginal, self).__init__(**kwargs) @@ -50,7 +50,7 @@ class ImageNetNasBench201BenchmarkOriginal(AbstractBenchmarkClient): def __init__(self, **kwargs): kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'ImageNetNasBench201BenchmarkOriginal') kwargs['container_name'] = kwargs.get('container_name', 'nasbench_201') - kwargs['latest'] = kwargs.get('container_tag', '0.0.5') + kwargs['latest'] = kwargs.get('container_tag', '0.0.6') super(ImageNetNasBench201BenchmarkOriginal, self).__init__(**kwargs) diff --git a/hpobench/container/benchmarks/surrogates/yahpo_gym.py b/hpobench/container/benchmarks/surrogates/yahpo_gym.py index 9774975d..64cee463 100644 --- a/hpobench/container/benchmarks/surrogates/yahpo_gym.py +++ b/hpobench/container/benchmarks/surrogates/yahpo_gym.py @@ -8,7 +8,7 @@ class YAHPOGymBenchmark(AbstractBenchmarkClient): def __init__(self, **kwargs): kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'YAHPOGymBenchmark') kwargs['container_name'] = kwargs.get('container_name', 'yahpo_gym') - kwargs['latest'] = kwargs.get('container_tag', '0.0.1') + kwargs['latest'] = kwargs.get('container_tag', '0.0.2') super(YAHPOGymBenchmark, self).__init__(**kwargs) @@ -16,5 +16,5 @@ class YAHPOGymMOBenchmark(AbstractMOBenchmarkClient): def __init__(self, **kwargs): kwargs['benchmark_name'] = kwargs.get('benchmark_name', 'YAHPOGymMOBenchmark') kwargs['container_name'] = kwargs.get('container_name', 'yahpo_gym') - kwargs['latest'] = kwargs.get('container_tag', '0.0.1') + kwargs['latest'] = kwargs.get('container_tag', '0.0.2') super(YAHPOGymMOBenchmark, self).__init__(**kwargs) diff --git a/hpobench/container/client_abstract_benchmark.py b/hpobench/container/client_abstract_benchmark.py index 6bbc3489..d2963c00 100644 --- a/hpobench/container/client_abstract_benchmark.py +++ b/hpobench/container/client_abstract_benchmark.py @@ -14,12 +14,12 @@ The name of the container (``container_name``) is defined either in its belonging container-benchmark definition. (hpobench/container// or via ``container_name``. """ -import os import abc -import sys import json import logging +import os import subprocess +import sys import time from pathlib import Path from typing import Optional, Union, Dict, List, Tuple @@ -27,8 +27,8 @@ import ConfigSpace as CS import Pyro4 -import Pyro4.util import Pyro4.errors +import Pyro4.util import numpy as np from ConfigSpace.read_and_write import json as csjson from oslo_concurrency import lockutils diff --git a/hpobench/container/recipes/ml/Singularity.YahpoRawBenchmark b/hpobench/container/recipes/ml/Singularity.YahpoRawBenchmark new file mode 100644 index 00000000..e79dab4b --- /dev/null +++ b/hpobench/container/recipes/ml/Singularity.YahpoRawBenchmark @@ -0,0 +1,82 @@ +Bootstrap: docker +From: rpy2/rpy2:latest + + +%labels +MAINTAINER pfistererf@googlemail.com +VERSION v0.0.1 + +%help + This is the recipe for the Raw YAHPO Benchmarks. + + +%post + cd /home + + ####################### INSTALL THE R + BASE DEPENDENCIES ################# + FILE="libssl1.1_1.1.1f-1ubuntu2_amd64.deb" + wget http://archive.ubuntu.com/ubuntu/pool/main/o/openssl/${FILE} + sudo dpkg -i ${FILE} + + FILE="libssl-dev_1.1.1f-1ubuntu2_amd64.deb" + wget http://archive.ubuntu.com/ubuntu/pool/main/o/openssl/${FILE} + sudo dpkg -i ${FILE} + + sudo apt-get install openssl + sudo apt-get install libcurl4-openssl-dev git + + # Instal R-Packages + cd /home \ + && Rscript -e 'install.packages("remotes", repos = "http://cran.r-project.org")' + + # Install OpenML dependencies + Rscript -e 'install.packages("curl", repos = "http://cran.r-project.org")' \ + && Rscript -e 'install.packages("openssl", repos = "http://cran.r-project.org")' \ + && Rscript -e 'install.packages("httr", repos = "http://cran.r-project.org")' \ + && Rscript -e 'install.packages("farff", repos = "http://cran.r-project.org")' \ + && Rscript -e 'install.packages("OpenML", repos = "http://cran.r-project.org")' + + # Install rbv2 dependencies + Rscript -e 'remotes::install_version("BBmisc", version = "1.11", upgrade = "never", repos = "http://cran.r-project.org")' \ + && Rscript -e 'remotes::install_version("rpart", version = "4.1-13", upgrade = "never", repos = "http://cran.r-project.org")' \ + && Rscript -e 'remotes::install_version("e1071", version = "1.7-0.1", upgrade = "never", repos = "http://cran.r-project.org")' \ + && Rscript -e 'remotes::install_version("xgboost", version = "0.82.1", upgrade = "never", repos = "http://cran.r-project.org")' \ + && Rscript -e 'remotes::install_version("ranger", version = "0.11.2", upgrade = "never", repos = "http://cran.r-project.org")' \ + && Rscript -e 'remotes::install_version("RcppHNSW", version = "0.1.0", upgrade = "never", repos = "http://cran.r-project.org")' \ + && Rscript -e 'remotes::install_version("mlr", version = "2.14", upgrade = "never", repos = "http://cran.r-project.org")' \ + && Rscript -e 'remotes::install_github("mlr-org/mlr3misc", upgrade = "never", repos = "http://cran.r-project.org")' \ + && Rscript -e 'remotes::install_version("mlrCPO", version = "0.3.6", upgrade = "never", repos = "http://cran.r-project.org")' \ + && Rscript -e 'remotes::install_version("testthat", version = "3.1.4", upgrade = "never", repos = "http://cran.r-project.org")' \ + && Rscript -e 'remotes::install_version("glmnet", version = "4.1-3", upgrade = "never", repos = "http://cran.r-project.org")' + # ################################ BASE DEPENDENCIES ################################ + + Rscript -e 'remotes::install_github("pfistfl/rbv2", upgrade = "never", dependencies = True)' \ + && Rscript -e 'remotes::install_github("sumny/iaml", upgrade = "never", dependencies = True)' \ + && Rscript -e 'remotes::install_github("sumny/fair", upgrade = "never", dependencies = True)' + + cd /home \ + && mkdir data && cd data \ + && git clone --depth 1 -b main https://github.com/pfistfl/yahpo_data.git \ + + # Upgrade pip + python3 -m pip install --upgrade pip + + # Install HPOBench + cd /home \ + && git clone https://github.com/automl/HPOBench.git \ + && cd HPOBench \ + && git checkout development \ + && echo "Please never push a recipe that checks out any other branch than development or master" \ + && pip uninstall -y rpy2 \ + && pip install .[yahpo_gym_raw] + # && git checkout development \ + + # Clean Up. + echo "Please don't touch the following lines" \ + && mkdir /var/lib/hpobench/ \ + && chmod -R 777 /var/lib/hpobench/ \ + && rm -rf /var/lib/apt/lists/* \ + && pip cache purge \ + +%runscript + python3 -s /home/HPOBench/hpobench/container/server_abstract_benchmark.py ml.yahpo_benchmark $@ \ No newline at end of file diff --git a/hpobench/container/recipes/ml/Singularity.ml_tabular_benchmark b/hpobench/container/recipes/ml/Singularity.ml_tabular_benchmarks similarity index 100% rename from hpobench/container/recipes/ml/Singularity.ml_tabular_benchmark rename to hpobench/container/recipes/ml/Singularity.ml_tabular_benchmarks diff --git a/hpobench/container/recipes/ml/Singularity.rbv2Benchmark b/hpobench/container/recipes/ml/Singularity.rbv2Benchmark new file mode 100644 index 00000000..e69de29b diff --git a/hpobench/container/recipes/surrogates/Singularity.YAHPOGymBenchmark b/hpobench/container/recipes/surrogates/Singularity.YAHPOGymBenchmark index 66ee63b1..98914ed1 100644 --- a/hpobench/container/recipes/surrogates/Singularity.YAHPOGymBenchmark +++ b/hpobench/container/recipes/surrogates/Singularity.YAHPOGymBenchmark @@ -3,7 +3,7 @@ From: python:3.7-slim %labels MAINTAINER pfistererf@googlemail.com -VERSION v0.0.1 +VERSION v0.0.2 %help This is a template for a Singularity recipe @@ -20,10 +20,10 @@ VERSION v0.0.1 cd /home \ && mkdir data && cd data \ - && git clone --depth 1 -b main https://github.com/pfistfl/yahpo_data.git\ + && git clone --depth 1 -b fair https://github.com/slds-lmu/yahpo_data.git cd /home \ - && git clone https://github.com/pfistfl/HPOBench.git \ + && git clone https://github.com/automl/HPOBench.git \ && cd HPOBench \ && echo "Please never push a recipe that checks out any other branch than development or master" \ && git checkout master \ diff --git a/hpobench/dependencies/ml/data_manager.py b/hpobench/dependencies/ml/data_manager.py index 526c6756..ebc48c95 100644 --- a/hpobench/dependencies/ml/data_manager.py +++ b/hpobench/dependencies/ml/data_manager.py @@ -1,20 +1,20 @@ -import openml -import numpy as np -import pandas as pd -from typing import Union from pathlib import Path +from typing import Union +import numpy as np +import openml +import pandas as pd +from oslo_concurrency import lockutils +from sklearn.compose import ColumnTransformer from sklearn.impute import SimpleImputer +from sklearn.model_selection import train_test_split from sklearn.pipeline import make_pipeline -from sklearn.utils import check_random_state -from sklearn.compose import ColumnTransformer from sklearn.preprocessing import OneHotEncoder from sklearn.preprocessing import StandardScaler -from sklearn.model_selection import train_test_split -from oslo_concurrency import lockutils +from sklearn.utils import check_random_state -from hpobench.util.data_manager import DataManager from hpobench import config_file +from hpobench.util.data_manager import DataManager class OpenMLDataManager(DataManager): diff --git a/hpobench/dependencies/ml/ml_benchmark_template.py b/hpobench/dependencies/ml/ml_benchmark_template.py index 7cef515f..1ffe7b9e 100644 --- a/hpobench/dependencies/ml/ml_benchmark_template.py +++ b/hpobench/dependencies/ml/ml_benchmark_template.py @@ -33,25 +33,34 @@ class MLBenchmark(AbstractBenchmark): def __init__( self, task_id: int, - rng: Union[np.random.RandomState, int, None] = None, valid_size: float = 0.33, + rng: Union[np.random.RandomState, int, None] = None, data_path: Union[str, Path, None] = None, global_seed: int = 1 ): + """ Base template for the ML multi-fidelity benchmarks. + + Parameters + ---------- + task_id : int + A valid OpenML Task ID. + valid_size : float + The fraction of training set to be used as validation split. + rng : np.random.RandomState, int (optional) + The random seed that will be passed to the ML model if not explicitly passed. + data_path : str, Path (optional) + The path from where the training-validation-testing splits may be loaded. + global_seed : int + The fixed global seed that is used for creating validation splits if not available. + """ super(MLBenchmark, self).__init__(rng=rng) - if isinstance(rng, int): - self.seed = rng - else: - self.seed = self.rng.randint(1, 10**6) - self.global_seed = global_seed # used for fixed training-validation splits self.task_id = task_id self.valid_size = valid_size - self.scorers = dict() - for k, v in metrics.items(): - self.scorers[k] = make_scorer(v, **metrics_kwargs[k]) + self.scorers = metrics + self.scorer_args = metrics_kwargs if data_path is None: from hpobench import config_file @@ -59,7 +68,7 @@ def __init__( self.data_path = Path(data_path) - dm = OpenMLDataManager(task_id, valid_size, data_path, global_seed) + dm = OpenMLDataManager(self.task_id, self.valid_size, self.data_path, self.global_seed) dm.load() # Data variables @@ -77,10 +86,6 @@ def __init__( self.lower_bound_train_size = dm.lower_bound_train_size self.n_classes = dm.n_classes - # Observation and fidelity spaces - self.fidelity_space = self.get_fidelity_space(self.seed) - self.configuration_space = self.get_configuration_space(self.seed) - @staticmethod def get_configuration_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace: """Parameter space to be optimized --- contains the hyperparameters @@ -90,33 +95,33 @@ def get_configuration_space(seed: Union[int, None] = None) -> CS.ConfigurationSp @staticmethod def get_fidelity_space(seed: Union[int, None] = None) -> CS.ConfigurationSpace: """Fidelity space available --- specifies the fidelity dimensions - - If fidelity_choice is 0 - Fidelity space is the maximal fidelity, akin to a black-box function - If fidelity_choice is 1 - Fidelity space is a single fidelity, in this case the number of trees (n_estimators) - If fidelity_choice is 2 - Fidelity space is a single fidelity, in this case the fraction of dataset (subsample) - If fidelity_choice is >2 - Fidelity space is multi-multi fidelity, all possible fidelities """ raise NotImplementedError() # pylint: disable=arguments-differ def get_meta_information(self): - """ Returns the meta information for the benchmark """ + """ Returns the meta information for the benchmark + """ return { 'name': 'Support Vector Machine', 'shape of train data': self.train_X.shape, 'shape of test data': self.test_X.shape, 'shape of valid data': self.valid_X.shape, - 'initial random seed': self.seed, + 'initial random seed': self.rng, 'task_id': self.task_id } - def init_model(self, config: Union[CS.Configuration, Dict], - fidelity: Union[CS.Configuration, Dict, None] = None, - rng: Union[int, np.random.RandomState, None] = None): + def get_model_size(self, model): + """ Returns a custom model size specific to the ML model, if applicable + """ + raise NotImplementedError + + def init_model( + self, + config: Union[CS.Configuration, Dict], + fidelity: Union[CS.Configuration, Dict, None] = None, + rng: Union[int, np.random.RandomState, None] = None + ): """ Function that returns the model initialized based on the configuration and fidelity """ raise NotImplementedError() @@ -135,18 +140,74 @@ def get_fidelity(self, size: Union[int, None] = None): return self.fidelity_space.sample_configuration() return [self.fidelity_space.sample_configuration() for i in range(size)] - def shuffle_data_idx(self, train_idx: Iterable = None, rng: Union[np.random.RandomState, None] = None) -> Iterable: + def shuffle_data_idx( + self, train_idx: Iterable = None, rng: Union[np.random.RandomState, None] = None + ) -> Iterable: rng = self.rng if rng is None else rng train_idx = self.train_idx if train_idx is None else train_idx rng.shuffle(train_idx) return train_idx - def _train_objective(self, - config: Dict, - fidelity: Dict, - shuffle: bool, - rng: Union[np.random.RandomState, int, None] = None, - evaluation: Union[str, None] = "valid"): + def _get_lc_spacing(self, max_iter, k): + """ Creates an integer sequence to record Learning Curves for every k iteration. + + Designed to include the maximum iteration. A k-spaced iteration sequence may not include + the endpoint implicitly. + """ + assert k > 0, "Spacing needs to be at >=1" + assert k < max_iter, "Spacing should be in {1, 2, ..., max_iter-1}" + spacing = np.arange(0, max_iter + 1, step=k).tolist() + spacing = spacing[1:] # eliminating 0 + if spacing[-1] != max_iter: + spacing.append(max_iter) + return spacing + + def _train_objective( + self, + config: Dict, + fidelity: Dict, + shuffle: bool, + rng: Union[np.random.RandomState, int, None] = None, + evaluation: Union[str, None] = "valid", + record_stats: bool = False, + get_learning_curve: bool = False, + lc_every_k: int = 1, + **kwargs + ): + """Function that instantiates a 'config' on a 'fidelity' and trains it + + The ML model is instantiated and trained on the training split. Optionally, the model is + evaluated on the training set. + + Parameters + ---------- + config : CS.Configuration, Dict + The hyperparameter configuration. + fidelity : CS.Configuration, Dict + The fidelity configuration. + shuffle : bool (optional) + If True, shuffles the training split before fitting the ML model. + rng : np.random.RandomState, int (optional) + The random seed passed to the ML model and if applicable, used for shuffling the data + and subsampling the dataset fraction. + evaluation : str (optional) + If "valid", the ML model is trained on the training set alone. + If "test", the ML model is trained on the training + validation sets. + record_stats : bool (optional) + If True, records the evaluation metrics of the trained ML model on the training set. + This is set to False by default to reduce overall compute time. + get_learning_curve : bool (optional) + If True, records the learning curve using partial_fit or warm starting, if applicable. + This is set to False by default to reduce overall compute time. + lc_every_k : int (optional) + If True, records the learning curve after every k iterations. + """ + if get_learning_curve: + raise NotImplementedError( + "Need to implement partial or intermediate training to record Learning curves" + ) + learning_curves = None + lc_time = None if rng is not None: rng = get_rng(rng, self.rng) @@ -155,26 +216,32 @@ def _train_objective(self, model = self.init_model(config, fidelity, rng) # preparing data - if eval == "valid": + if evaluation == "valid": train_X = self.train_X train_y = self.train_y - train_idx = self.train_idx - else: + elif evaluation == "test": train_X = np.vstack((self.train_X, self.valid_X)) train_y = pd.concat((self.train_y, self.valid_y)) - train_idx = np.arange(len(train_X)) + else: + raise ValueError("{} not in ['valid', 'test']".format(evaluation)) + train_idx = np.arange(len(train_X)) if self.train_idx is None else self.train_idx # shuffling data if shuffle: train_idx = self.shuffle_data_idx(train_idx, rng) - train_X = train_X.iloc[train_idx] + if isinstance(train_idx, np.ndarray): + train_X = train_X[train_idx] + else: + train_X = train_X.iloc[train_idx] train_y = train_y.iloc[train_idx] # subsample here: # application of the other fidelity to the dataset that the model interfaces + # carried over from previous HPOBench code that borrowed from FABOLAS' SVM + lower_bound_lim = 1.0 / 512.0 if self.lower_bound_train_size is None: self.lower_bound_train_size = (10 * self.n_classes) / self.train_X.shape[0] - self.lower_bound_train_size = np.max((1 / 512, self.lower_bound_train_size)) + self.lower_bound_train_size = np.max((lower_bound_lim, self.lower_bound_train_size)) subsample = np.max((fidelity['subsample'], self.lower_bound_train_size)) train_idx = self.rng.choice( np.arange(len(train_X)), size=int( @@ -185,102 +252,209 @@ def _train_objective(self, start = time.time() model.fit(train_X[train_idx], train_y.iloc[train_idx]) model_fit_time = time.time() - start + # model inference + inference_time = 0.0 + # can optionally not record evaluation metrics on training set to save compute + if record_stats: + start = time.time() + pred_train = model.predict(train_X) + inference_time = time.time() - start # computing statistics on training data scores = dict() score_cost = dict() for k, v in self.scorers.items(): scores[k] = 0.0 score_cost[k] = 0.0 - if evaluation == "test": - _start = time.time() - scores[k] = v(model, train_X, train_y) - score_cost[k] = time.time() - _start + _start = time.time() + if record_stats: + scores[k] = v(train_y, pred_train, **self.scorer_args[k]) + score_cost[k] = time.time() - _start + inference_time train_loss = 1 - scores["acc"] - return model, model_fit_time, train_loss, scores, score_cost + return model, model_fit_time, train_loss, scores, score_cost, learning_curves, lc_time # pylint: disable=arguments-differ @AbstractBenchmark.check_parameters - def objective_function(self, - configuration: Union[CS.Configuration, Dict], - fidelity: Union[CS.Configuration, Dict, None] = None, - shuffle: bool = False, - rng: Union[np.random.RandomState, int, None] = None, - **kwargs) -> Dict: + def objective_function( + self, + configuration: Union[CS.Configuration, Dict], + fidelity: Union[CS.Configuration, Dict, None] = None, + shuffle: bool = False, + rng: Union[np.random.RandomState, int, None] = None, + record_train: bool = False, + get_learning_curve: bool = False, + lc_every_k: int = 1, + **kwargs + ) -> Dict: """Function that evaluates a 'config' on a 'fidelity' on the validation set + + The ML model is trained on the training split, and evaluated on the valid and test splits. + + Parameters + ---------- + configuration : CS.Configuration, Dict + The hyperparameter configuration. + fidelity : CS.Configuration, Dict + The fidelity configuration. + shuffle : bool (optional) + If True, shuffles the training split before fitting the ML model. + rng : np.random.RandomState, int (optional) + The random seed passed to the ML model and if applicable, used for shuffling the data + and subsampling the dataset fraction. + record_train : bool (optional) + If True, records the evaluation metrics of the trained ML model on the training set. + This is set to False by default to reduce overall compute time. + get_learning_curve : bool (optional) + If True, records the learning curve using partial_fit or warm starting, if applicable. + This is set to False by default to reduce overall compute time. + lc_every_k : int (optional) + If True, records the learning curve after every k iterations. """ - model, model_fit_time, train_loss, train_scores, train_score_cost = self._train_objective( - configuration, fidelity, shuffle, rng, evaluation="val" - ) + # obtaining model and training statistics + model, model_fit_time, train_loss, train_scores, train_score_cost, lcs, lc_time = \ + self._train_objective( + configuration, fidelity, shuffle, rng, + evaluation="valid", record_stats=record_train, + get_learning_curve=get_learning_curve, lc_every_k=lc_every_k + ) + model_size = self.get_model_size(model) + + # model inference on validation set + start = time.time() + pred_val = model.predict(self.valid_X) + val_inference_time = time.time() - start val_scores = dict() val_score_cost = dict() for k, v in self.scorers.items(): + val_scores[k] = 0.0 + val_score_cost[k] = 0.0 _start = time.time() - val_scores[k] = v(model, self.valid_X, self.valid_y) - val_score_cost[k] = time.time() - _start + val_scores[k] = v(self.valid_y, pred_val, **self.scorer_args[k]) + val_score_cost[k] = time.time() - _start + val_inference_time val_loss = 1 - val_scores["acc"] + # model inference on test set + start = time.time() + pred_test = model.predict(self.test_X) + test_inference_time = time.time() - start test_scores = dict() test_score_cost = dict() for k, v in self.scorers.items(): + test_scores[k] = 0.0 + test_score_cost[k] = 0.0 _start = time.time() - test_scores[k] = v(model, self.test_X, self.test_y) - test_score_cost[k] = time.time() - _start + test_scores[k] = v(self.test_y, pred_test, **self.scorer_args[k]) + test_score_cost[k] = time.time() - _start + test_inference_time test_loss = 1 - test_scores["acc"] + fidelity = fidelity.get_dictionary() if isinstance(fidelity, CS.Configuration) else fidelity + configuration = configuration.get_dictionary() \ + if isinstance(configuration, CS.Configuration) else configuration + info = { 'train_loss': train_loss, 'val_loss': val_loss, 'test_loss': test_loss, 'model_cost': model_fit_time, + 'model_size': model_size, 'train_scores': train_scores, 'train_costs': train_score_cost, 'val_scores': val_scores, 'val_costs': val_score_cost, 'test_scores': test_scores, 'test_costs': test_score_cost, + 'learning_curves': lcs, + 'learning_curves_cost': lc_time, + 'learning_curves_spacing': lc_every_k, # storing as dictionary and not ConfigSpace saves tremendous memory 'fidelity': fidelity, 'config': configuration, } return { - 'function_value': info['val_loss'], - 'cost': model_fit_time + info['val_costs']['acc'], + 'function_value': float(info['val_loss']), + 'cost': float(model_fit_time + info['val_costs']['acc']), 'info': info } # pylint: disable=arguments-differ @AbstractBenchmark.check_parameters - def objective_function_test(self, - configuration: Union[CS.Configuration, Dict], - fidelity: Union[CS.Configuration, Dict, None] = None, - shuffle: bool = False, - rng: Union[np.random.RandomState, int, None] = None, - **kwargs) -> Dict: + def objective_function_test( + self, + configuration: Union[CS.Configuration, Dict], + fidelity: Union[CS.Configuration, Dict, None] = None, + shuffle: bool = False, + rng: Union[np.random.RandomState, int, None] = None, + record_train: bool = False, + get_learning_curve: bool = False, + lc_every_k: int = 1, + **kwargs + ) -> Dict: """Function that evaluates a 'config' on a 'fidelity' on the test set + + The ML model is trained on the training+valid split, and evaluated on the test split. + + Parameters + ---------- + configuration : CS.Configuration, Dict + The hyperparameter configuration. + fidelity : CS.Configuration, Dict + The fidelity configuration. + shuffle : bool (optional) + If True, shuffles the training split before fitting the ML model. + rng : np.random.RandomState, int (optional) + The random seed passed to the ML model and if applicable, used for shuffling the data + and subsampling the dataset fraction. + record_train : bool (optional) + If True, records the evaluation metrics of the trained ML model on the training set. + This is set to False by default to reduce overall compute time. + get_learning_curve : bool (optional) + If True, records the learning curve using partial_fit or warm starting, if applicable. + This is set to False by default to reduce overall compute time. + lc_every_k : int (optional) + If True, records the learning curve after every k iterations. """ - model, model_fit_time, train_loss, train_scores, train_score_cost = self._train_objective( - configuration, fidelity, shuffle, rng, evaluation="test" - ) + # obtaining model and training statistics + model, model_fit_time, train_loss, train_scores, train_score_cost, lcs, lc_time = \ + self._train_objective( + configuration, fidelity, shuffle, rng, + evaluation="test", record_stats=record_train, + get_learning_curve=get_learning_curve, lc_every_k=lc_every_k + ) + model_size = self.get_model_size(model) + + # model inference on test set + start = time.time() + pred_test = model.predict(self.test_X) + test_inference_time = time.time() - start test_scores = dict() test_score_cost = dict() for k, v in self.scorers.items(): + test_scores[k] = 0.0 + test_score_cost[k] = 0.0 _start = time.time() - test_scores[k] = v(model, self.test_X, self.test_y) - test_score_cost[k] = time.time() - _start + test_scores[k] = v(self.test_y, pred_test, **self.scorer_args[k]) + test_score_cost[k] = time.time() - _start + test_inference_time test_loss = 1 - test_scores["acc"] + fidelity = fidelity.get_dictionary() if isinstance(fidelity, CS.Configuration) else fidelity + configuration = configuration.get_dictionary() \ + if isinstance(configuration, CS.Configuration) else configuration + info = { 'train_loss': train_loss, 'val_loss': None, 'test_loss': test_loss, 'model_cost': model_fit_time, + 'model_size': model_size, 'train_scores': train_scores, 'train_costs': train_score_cost, - 'val_scores': dict(), - 'val_costs': dict(), + 'val_scores': None, + 'val_costs': None, 'test_scores': test_scores, 'test_costs': test_score_cost, + 'learning_curves': lcs, + 'learning_curves_cost': lc_time, + 'learning_curves_spacing': lc_every_k, # storing as dictionary and not ConfigSpace saves tremendous memory 'fidelity': fidelity, 'config': configuration, diff --git a/hpobench/dependencies/mo/scalar.py b/hpobench/dependencies/mo/scalar.py index 3f434fde..185c2730 100644 --- a/hpobench/dependencies/mo/scalar.py +++ b/hpobench/dependencies/mo/scalar.py @@ -1,6 +1,7 @@ -import numpy as np from typing import Union +import numpy as np + try: from sklearn.preprocessing import MinMaxScaler, StandardScaler except ImportError: diff --git a/hpobench/util/clean_up_script.py b/hpobench/util/clean_up_script.py index 5fe9fd0c..771ab80f 100644 --- a/hpobench/util/clean_up_script.py +++ b/hpobench/util/clean_up_script.py @@ -1,7 +1,8 @@ +import logging +import shutil + from hpobench import config_file -import shutil -import logging logger = logging.getLogger('Clean-up') logger.setLevel(logging.INFO) diff --git a/hpobench/util/container_utils.py b/hpobench/util/container_utils.py index 7fee19e9..bb7221c3 100644 --- a/hpobench/util/container_utils.py +++ b/hpobench/util/container_utils.py @@ -1,11 +1,11 @@ -import os +import enum import importlib import json -import numpy as np -import enum - +import os from typing import Any, Union +import numpy as np + from hpobench.util.rng_helper import serialize_random_state, deserialize_random_state diff --git a/hpobench/util/data_manager.py b/hpobench/util/data_manager.py index c72305e1..914d651c 100644 --- a/hpobench/util/data_manager.py +++ b/hpobench/util/data_manager.py @@ -15,6 +15,7 @@ import gzip import json import logging +import os import pickle import tarfile from io import BytesIO @@ -40,6 +41,15 @@ import hpobench + +tabular_multi_fidelity_urls = dict( + xgb="https://figshare.com/ndownloader/files/35414756", + svm="https://figshare.com/ndownloader/files/35414447", + lr="https://figshare.com/ndownloader/files/35412425", + rf="https://figshare.com/ndownloader/files/35414801", + nn="https://figshare.com/ndownloader/files/35414996" +) + class DataManager(abc.ABC, metaclass=abc.ABCMeta): """ Base Class for loading and managing the data. @@ -1174,21 +1184,14 @@ def get_workclass(x): class TabularDataManager(DataManager): def __init__(self, model: str, task_id: [int, str], data_dir: [str, Path, None] = None): super(TabularDataManager, self).__init__() + + self.model = model + self.task_id = str(task_id) - url_dict = dict( - xgb="https://ndownloader.figshare.com/files/30469920", - svm="https://ndownloader.figshare.com/files/30379359", - lr="https://ndownloader.figshare.com/files/30379038", - rf="https://ndownloader.figshare.com/files/30469089", - nn="https://ndownloader.figshare.com/files/30379005" - ) - + url_dict = tabular_multi_fidelity_urls assert model in url_dict.keys(), \ f'Model has to be one of {list(url_dict.keys())} but was {model}' - self.model = model - self.task_id = str(task_id) - self.url_to_use = url_dict.get(model) if data_dir is None: @@ -1225,3 +1228,42 @@ def _load_json(path): with open(path, "r") as f: data = json.load(f) return data + + +class YAHPODataManager(DataManager): + def __init__(self, data_dir: Union[Path, str, None]): + super(YAHPODataManager, self).__init__() + + if data_dir is None: + data_dir = hpobench.config_file.data_dir / "yahpo_data" + self.data_dir = Path(data_dir) + self.logger.info(f'Read data from data directory: {data_dir}') + + @lockutils.synchronized('not_thread_process_safe', external=True, + lock_path=f'{hpobench.config_file.cache_dir}/lock_yahpo_raw', delay=0.5) + def _try_download(self): + """Clone the data repository.""" + if not self.data_dir.exists(): + self.logger.info( + 'Try to download data from https://github.com/slds-lmu/yahpo_data/tree/fair' + ) + # Create the data directory if not existing + self.create_save_directory(self.data_dir.parent) + + import git + git.Repo.clone_from(url='https://github.com/slds-lmu/yahpo_data.git', + to_path=str(self.data_dir), + branch='fair', + multi_options=['--depth 1']) + self.logger.info(f'Successfully cloned data from repo to {self.data_dir}') + + def load(self): + from yahpo_gym.local_config import LocalConfiguration + local_config = LocalConfiguration() + + # When in the containerized version, redirect to the data inside the container. + if 'YAHPO_CONTAINER' in os.environ: + local_config.init_config(data_path='/home/data/yahpo_data') + else: + self._try_download() + local_config.init_config(data_path=str(self.data_dir)) diff --git a/hpobench/util/test_utils.py b/hpobench/util/test_utils.py new file mode 100644 index 00000000..b2683135 --- /dev/null +++ b/hpobench/util/test_utils.py @@ -0,0 +1,24 @@ +import os + +CONST_RUN_ALL_TESTS_ENV_VAR = 'HPOBENCH_RUN_EXPENSIVE_TESTS' +DEFAULT_SKIP_MSG = 'Skip this test due to time limitations' + + +def check_run_all_tests(): + """ Helper function: Check if all tests should run. """ + return os.environ.get(CONST_RUN_ALL_TESTS_ENV_VAR, 'false').lower() == 'true' + + +def enable_all_tests(): + """ + Some tests are quite expensive. We control if all runs should be executed by this + environment variable. + """ + os.environ[CONST_RUN_ALL_TESTS_ENV_VAR] = 'true' + + +def disable_all_tests(): + """ + This function disables the evaluation of all test functions. + """ + os.environ[CONST_RUN_ALL_TESTS_ENV_VAR] = 'false' diff --git a/requirements.txt b/requirements.txt index aad54f85..b5db0198 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,8 @@ numpy>=1.18.1 ConfigSpace>=0.4.12 Pyro4==4.80 -oslo.concurrency>=4.2.0 \ No newline at end of file +oslo.concurrency>=4.2.0 +pandas>=1.2.4 +scikit-learn>=0.24.1 +openml>=0.12.2 +tqdm>=4.64.0 \ No newline at end of file diff --git a/setup.py b/setup.py index 4c53ecb0..ef1f292c 100644 --- a/setup.py +++ b/setup.py @@ -36,7 +36,7 @@ def read_file(file_name): version=read_file('hpobench/__version__.py').split()[-1].strip('\''), packages=setuptools.find_packages(exclude=['*.tests', '*.tests.*', 'tests.*', 'tests'],), - python_requires='>=3.6, <=3.10', + python_requires='>=3.6', install_requires=read_file('./requirements.txt').split('\n'), extras_require=get_extra_requirements(), test_suite='pytest', diff --git a/tests/test_adult.py b/tests/test_adult.py index d7a030b7..b52c37ed 100644 --- a/tests/test_adult.py +++ b/tests/test_adult.py @@ -28,10 +28,10 @@ def test_adult_benchmark(): result_2 = benchmark.objective_function(test_config, rng=1, fidelity={'budget': 3}) assert result_1['info']['valid_accuracy'] == pytest.approx(0.7539, rel=0.001) - assert result_1['info']['valid_accuracy'] == result_1['function_value']['accuracy'] + assert 1 - result_1['info']['valid_accuracy'] == result_1['function_value']['misclassification_rate'] assert result_1['info']['train_accuracy'] == pytest.approx(0.76145, rel=0.001) assert result_1['info']['train_accuracy'] == result_2['info']['train_accuracy'] result_1 = benchmark.objective_function_test(test_config, rng=1, fidelity={'budget': 3}) - assert result_1['function_value']['accuracy'] == pytest.approx(0.76377, rel=0.001) - assert result_1['function_value']['accuracy'] == result_1['info']['test_accuracy'] + assert 1 - result_1['function_value']['misclassification_rate'] == pytest.approx(0.76377, rel=0.001) + assert 1 - result_1['function_value']['misclassification_rate'] == result_1['info']['test_accuracy'] diff --git a/tests/test_data_manager.py b/tests/test_data_manager.py index 7e32ce84..cee56ccc 100644 --- a/tests/test_data_manager.py +++ b/tests/test_data_manager.py @@ -1,14 +1,13 @@ import shutil -from multiprocessing import Pool - import pytest +from multiprocessing import Pool import hpobench from hpobench.util.data_manager import NASBench_201Data, YearPredictionMSDData, ProteinStructureData, BostonHousingData -skip_message = 'We currently skip this test because it takes too much time.' +from hpobench.util.test_utils import DEFAULT_SKIP_MSG, check_run_all_tests -@pytest.mark.skip(reason=skip_message) +@pytest.mark.skipif(not check_run_all_tests(), reason=DEFAULT_SKIP_MSG) def test_nasbench_201_load_thread_safe(): shutil.rmtree(hpobench.config_file.data_dir / "nasbench_201", ignore_errors=True) function = lambda: NASBench_201Data(dataset='cifar100').load() @@ -16,7 +15,7 @@ def test_nasbench_201_load_thread_safe(): pool.map(function, []) -@pytest.mark.skip(reason=skip_message) +@pytest.mark.skipif(not check_run_all_tests(), reason=DEFAULT_SKIP_MSG) def test_nasbench_201_init(): data_manager = NASBench_201Data(dataset='cifar100') @@ -30,7 +29,7 @@ def test_nasbench_201_init(): assert data_manager._save_dir.exists() -@pytest.mark.skip(reason=skip_message) +@pytest.mark.skipif(not check_run_all_tests(), reason=DEFAULT_SKIP_MSG) def test_nasbench_201_load(): shutil.rmtree(hpobench.config_file.data_dir / "nasbench_201", ignore_errors=True) diff --git a/tests/test_mo_cnn.py b/tests/test_mo_cnn.py index 308c59ad..cded9444 100644 --- a/tests/test_mo_cnn.py +++ b/tests/test_mo_cnn.py @@ -1,6 +1,8 @@ import pytest +from hpobench.util.test_utils import DEFAULT_SKIP_MSG, check_run_all_tests +@pytest.mark.skipif(not check_run_all_tests(), reason=DEFAULT_SKIP_MSG) def test_mo_cnn_seeding(): from hpobench.container.benchmarks.mo.cnn_benchmark import FlowerCNNBenchmark b1 = FlowerCNNBenchmark(rng=0) @@ -18,6 +20,7 @@ def test_mo_cnn_seeding(): assert result_1['function_value'][metric] == pytest.approx(result_2['function_value'][metric], abs=0.001) +@pytest.mark.skipif(not check_run_all_tests(), reason=DEFAULT_SKIP_MSG) def test_mo_cnn_benchmark(): from hpobench.container.benchmarks.mo.cnn_benchmark import FlowerCNNBenchmark diff --git a/tests/test_nasbench_101.py b/tests/test_nasbench_101.py new file mode 100644 index 00000000..67ac7f65 --- /dev/null +++ b/tests/test_nasbench_101.py @@ -0,0 +1,82 @@ +import pytest +import numpy as np + +from hpobench.container.benchmarks.nas.nasbench_101 import ( + NASCifar10ABenchmark, NASCifar10BBenchmark, NASCifar10CBenchmark, + NASCifar10AMOBenchmark, NASCifar10BMOBenchmark, NASCifar10CMOBenchmark, +) + +from hpobench.util.container_utils import disable_container_debug, enable_container_debug +from hpobench.util.test_utils import DEFAULT_SKIP_MSG, check_run_all_tests + +# from hpobench.util.test_utils import enable_all_tests +# enable_all_tests() + + +@pytest.fixture(scope='module') +def enable_debug(): + enable_container_debug() + yield + disable_container_debug() + + +@pytest.mark.skipif(not check_run_all_tests(), reason=DEFAULT_SKIP_MSG) +def test_nasbench101_A_SO(enable_debug): + + b = NASCifar10ABenchmark(rng=0) + cs_1 = b.get_configuration_space(seed=0) + config_1 = cs_1.sample_configuration() + cs_2 = b.get_configuration_space(seed=0) + config_2 = cs_2.sample_configuration() + assert config_1 == config_2 + + assert len(b.get_fidelity_space()) == 1 + + config = { + 'edge_0': 0, 'edge_1': 0, 'edge_10': 0, 'edge_11': 1, 'edge_12': 1, 'edge_13': 0, 'edge_14': 1, 'edge_15': 0, + 'edge_16': 0, 'edge_17': 1, 'edge_18': 1, 'edge_19': 0, 'edge_2': 0, 'edge_20': 1, 'edge_3': 0, 'edge_4': 0, + 'edge_5': 1, 'edge_6': 1, 'edge_7': 0, 'edge_8': 0, 'edge_9': 0, 'op_node_0': 'maxpool3x3', + 'op_node_1': 'conv1x1-bn-relu', 'op_node_2': 'conv3x3-bn-relu', 'op_node_3': 'conv3x3-bn-relu', + 'op_node_4': 'conv3x3-bn-relu' + } + + result = b.objective_function(configuration=config, fidelity={'budget': 108}, run_index=(0, 1, 2)) + assert result['function_value'] == pytest.approx(0.1659655372301737, abs=0.1) + assert result['cost'] == pytest.approx(853.5010070800781, abs=0.1) + assert 1 - np.mean(result['info']['valid_accuracies']) == result['function_value'] + + with pytest.raises(AssertionError): + result = b.objective_function_test(configuration=config, fidelity={'epoch': 109}) + + +@pytest.mark.skipif(not check_run_all_tests(), reason=DEFAULT_SKIP_MSG) +def test_nasbench101_C_MO(enable_debug): + b = NASCifar10CMOBenchmark(rng=0) + cs_1 = b.get_configuration_space(seed=0) + config_1 = cs_1.sample_configuration() + cs_2 = b.get_configuration_space(seed=0) + config_2 = cs_2.sample_configuration() + assert config_1 == config_2 + + assert len(b.get_fidelity_space()) == 1 + + config = { + 'edge_0': 0.9446689170495839, 'edge_1': 0.1289262976548533, 'edge_10': 0.09710127579306127, + 'edge_11': 0.09394051075844168, 'edge_12': 0.5722519057908734, 'edge_13': 0.30157481667454933, + 'edge_14': 0.9194826137446735, 'edge_15': 0.3599780644783639, 'edge_16': 0.589909976354571, + 'edge_17': 0.4536968445560453, 'edge_18': 0.21550767711355845, 'edge_19': 0.18327983621407862, + 'edge_2': 0.5864101661863267, 'edge_20': 0.47837030703998806, 'edge_3': 0.05342718178682526, + 'edge_4': 0.6956254456388572, 'edge_5': 0.3068100995451961, 'edge_6': 0.399025321703102, + 'edge_7': 0.15941446344895593, 'edge_8': 0.23274412927905685, 'edge_9': 0.0653042071517802, 'num_edges': 9, + 'op_node_0': 'conv1x1-bn-relu', 'op_node_1': 'maxpool3x3', 'op_node_2': 'conv1x1-bn-relu', + 'op_node_3': 'maxpool3x3', 'op_node_4': 'maxpool3x3' + } + + result = b.objective_function(configuration=config, fidelity={'budget': 108}, run_index=(0, 1, 2)) + assert result['function_value']['misclassification_rate'] == pytest.approx(0.11985842386881507, abs=0.1) + assert result['function_value']['trainable_parameters'] == 1115277 + assert result['cost'] == pytest.approx(3175.9591064453125, abs=0.1) + assert 1 - np.mean(result['info']['valid_accuracies']) == result['function_value']['misclassification_rate'] + + with pytest.raises(AssertionError): + result = b.objective_function_test(configuration=config, fidelity={'epoch': 109}) diff --git a/tests/test_nasbench_201.py b/tests/test_nasbench_201.py index 70e46de9..29ef18ec 100644 --- a/tests/test_nasbench_201.py +++ b/tests/test_nasbench_201.py @@ -1,5 +1,3 @@ -import logging -logging.basicConfig(level=logging.DEBUG) import pytest from hpobench.container.benchmarks.nas.nasbench_201 import ImageNetNasBench201Benchmark, Cifar100NasBench201Benchmark, \ @@ -7,8 +5,7 @@ from hpobench.benchmarks.nas.nasbench_201 import \ Cifar10ValidNasBench201MOBenchmark as LocalCifar10ValidNasBench201MOBenchmark from hpobench.util.container_utils import disable_container_debug, enable_container_debug - -skip_message = 'We currently skip this test because it takes too much time.' +from hpobench.util.test_utils import DEFAULT_SKIP_MSG, check_run_all_tests @pytest.fixture(scope='module') @@ -18,7 +15,7 @@ def enable_debug(): disable_container_debug() -@pytest.mark.skip(reason=skip_message) +@pytest.mark.skipif(not check_run_all_tests(), reason=DEFAULT_SKIP_MSG) def test_nasbench201_cifar10valid(enable_debug): b = Cifar10ValidNasBench201Benchmark(rng=0) @@ -38,22 +35,22 @@ def test_nasbench201_cifar10valid(enable_debug): '3<-2': 'nor_conv_3x3' } result = b.objective_function(configuration=config, fidelity={'epoch': 199}, data_seed=(777, 888, 999)) - assert result['function_value'] == pytest.approx(9.78, abs=0.1) + assert result['function_value'] == pytest.approx(0.0978, abs=0.1) assert result['cost'] == pytest.approx(11973.20, abs=0.1) - assert result['info']['valid_precision'] == result['function_value'] + assert result['info']['valid_misclassification_rate'] == result['function_value'] assert result['info']['valid_cost'] == result['cost'] result = b.objective_function_test(configuration=config, fidelity={'epoch': 200}) - assert result['function_value'] == pytest.approx(9.70, abs=0.1) + assert result['function_value'] == pytest.approx(0.0970, abs=0.1) assert result['cost'] == pytest.approx(10426.33, abs=0.2) - assert result['info']['test_precision'] == result['function_value'] + assert result['info']['test_misclassification_rate'] == result['function_value'] assert result['info']['test_cost'] == result['cost'] - with pytest.raises(ValueError): + with pytest.raises(AssertionError): result = b.objective_function_test(configuration=config, fidelity={'epoch': 10}) -@pytest.mark.skip(reason=skip_message) +@pytest.mark.skipif(not check_run_all_tests(), reason=DEFAULT_SKIP_MSG) def test_nasbench201_cifar100(enable_debug): b = Cifar100NasBench201Benchmark(rng=0) @@ -67,13 +64,13 @@ def test_nasbench201_cifar100(enable_debug): result = b.objective_function(configuration=config, fidelity=fidelity, data_seed=(777, 888, 999)) assert result is not None - assert result['function_value'] == pytest.approx(29.5233, abs=0.1) + assert result['function_value'] == pytest.approx(0.295233, abs=0.1) assert result['cost'] == pytest.approx(19681.70, abs=0.1) - assert result['info']['valid_precision'] == result['function_value'] + assert result['info']['valid_misclassification_rate'] == result['function_value'] assert result['info']['valid_cost'] == result['cost'] -@pytest.mark.skip(reason=skip_message) +@pytest.mark.skipif(not check_run_all_tests(), reason=DEFAULT_SKIP_MSG) def test_nasbench201_Image(enable_debug): b = ImageNetNasBench201Benchmark(rng=0) config = {'1<-0': 'nor_conv_1x1', @@ -86,9 +83,9 @@ def test_nasbench201_Image(enable_debug): result = b.objective_function(configuration=config, fidelity=fidelity, data_seed=(777, 888, 999)) assert result is not None - assert result['function_value'] == pytest.approx(55.2167, abs=0.1) + assert result['function_value'] == pytest.approx(0.552167, abs=0.1) assert result['cost'] == pytest.approx(57119.22, abs=0.1) - assert result['info']['valid_precision'] == result['function_value'] + assert result['info']['valid_misclassification_rate'] == result['function_value'] assert result['info']['valid_cost'] == result['cost'] diff --git a/tests/test_paramnet.py b/tests/test_paramnet.py index 52d55f94..076f4b38 100644 --- a/tests/test_paramnet.py +++ b/tests/test_paramnet.py @@ -1,11 +1,13 @@ import pytest +import sys -# import logging -# logging.basicConfig(level=logging.DEBUG) -# from hpobench.util.container_utils import enable_container_debug -# enable_container_debug() +MSG = 'Skip this test for new (>3.9) python versions. ' \ + 'The paramnet benchmarks require an specific old scikit learn version. This version however does not work under ' \ + 'python 3.10. Therefore we skip this test. The containerized version does still work under 3.10.' + +@pytest.mark.skipif(sys.version_info > (3, 9), reason=MSG) def test_load_data(): from hpobench.util.data_manager import ParamNetDataManager diff --git a/tests/test_pybnn.py b/tests/test_pybnn.py index 0e749457..f1c6b5fc 100644 --- a/tests/test_pybnn.py +++ b/tests/test_pybnn.py @@ -1,14 +1,19 @@ +import sys import pytest from hpobench.container.benchmarks.ml.pybnn import BNNOnToyFunction, BNNOnBostonHousing, BNNOnProteinStructure, \ BNNOnYearPrediction -import logging -logging.basicConfig(level=logging.DEBUG) from hpobench.util.container_utils import enable_container_debug +from hpobench.util.test_utils import check_run_all_tests, DEFAULT_SKIP_MSG + enable_container_debug() +MSG = 'Skip this test for new (>3.9) python versions. ' \ + 'The paramnet benchmarks require an specific old scikit learn version. This version however does not work under ' \ + 'python 3.10. Therefore we skip this test. The containerized version does still work under 3.10.' +@pytest.mark.skipif(sys.version_info > (3, 9), reason=MSG) def test_bnn_init(): benchmark = BNNOnToyFunction(rng=1) @@ -58,6 +63,7 @@ def test_bnn_boston_housing(): assert test_result['info']['fidelity']['budget'] == 1000 +@pytest.mark.skipif(not check_run_all_tests(), reason=DEFAULT_SKIP_MSG) def test_bnn_protein(): benchmark = BNNOnProteinStructure(rng=1) test_result = simple_call(benchmark) @@ -66,6 +72,7 @@ def test_bnn_protein(): assert test_result['info']['fidelity']['budget'] == 1000 +@pytest.mark.skipif(not check_run_all_tests(), reason=DEFAULT_SKIP_MSG) def test_year_pred(): benchmark = BNNOnYearPrediction(rng=1) test_result = simple_call(benchmark) diff --git a/tests/test_utils.py b/tests/test_utils.py index 9bc5ff3b..e570dbd7 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -105,3 +105,15 @@ def test_debug_level(): disable_container_debug() assert os.environ['HPOBENCH_DEBUG'] == 'false' + + +def test_test_utils(): + from hpobench.util.test_utils import DEFAULT_SKIP_MSG, enable_all_tests, disable_all_tests, check_run_all_tests + + assert isinstance(DEFAULT_SKIP_MSG, str) + + enable_all_tests() + assert check_run_all_tests() + + disable_all_tests() + assert not check_run_all_tests() \ No newline at end of file diff --git a/tests/test_whitebox.py b/tests/test_whitebox.py index 35a9a940..585f9867 100644 --- a/tests/test_whitebox.py +++ b/tests/test_whitebox.py @@ -63,6 +63,7 @@ def test_whitebox_with_container(): assert np.isclose(test_loss, 0.43636, atol=0.001) +@pytest.mark.skipif(skip_container_test, reason="Requires singularity and flask") def test_cartpole(): from hpobench.container.benchmarks.rl.cartpole import CartpoleReduced as Benchmark b = Benchmark(container_name='cartpole', diff --git a/tests/test_yahpo_raw.py b/tests/test_yahpo_raw.py new file mode 100644 index 00000000..65694603 --- /dev/null +++ b/tests/test_yahpo_raw.py @@ -0,0 +1,12 @@ +from hpobench.container.benchmarks.ml.yahpo_benchmark import YAHPOGymMORawBenchmark + + +def test_mo_benchmark(): + + b = YAHPOGymMORawBenchmark(scenario="iaml_xgboost", instance="40981",) + cfg = b.get_configuration_space().get_default_configuration() + b.objective_function(cfg) + + +if __name__ == '__main__': + test_mo_benchmark() \ No newline at end of file