CLIMADA-project
diff --git a/‎climada/test/test_util_calibrate.py‎
Lines changed: 6 additions & 8 deletions b/‎climada/test/test_util_calibrate.py‎
Lines changed: 6 additions & 8 deletions
diff --git a/‎climada/util/calibrate/__init__.py‎
Lines changed: 3 additions & 6 deletions b/‎climada/util/calibrate/__init__.py‎
Lines changed: 3 additions & 6 deletions
diff --git a/‎climada/util/calibrate/base.py‎
Lines changed: 184 additions & 0 deletions b/‎climada/util/calibrate/base.py‎
Lines changed: 184 additions & 0 deletions
diff --git a/‎climada/util/calibrate/bayesian_optimizer.py‎
Lines changed: 160 additions & 0 deletions b/‎climada/util/calibrate/bayesian_optimizer.py‎
Lines changed: 160 additions & 0 deletions
@@ -9,11 +9,9 @@
 
 from climada.entity import ImpactFuncSet, ImpactFunc
 
-from climada.util.calibrate.impact_func import (
-    Input,
-    ScipyMinimizeOptimizer,
-    cost_func_rmse,
-)
+from climada.util.calibrate import Input, ScipyMinimizeOptimizer
+from climada.util.calibrate.impact_func import cost_func_rmse
+
 from climada.util.calibrate.test.test_calibrate import hazard, exposure
 
 
@@ -54,7 +52,7 @@ def test_single(self):
         output = optimizer.run(params_init={"slope": 0.1})
 
         # Result should be nearly exact
-        self.assertTrue(output.success)
+        self.assertTrue(output.result.success)
         self.assertAlmostEqual(output.params["slope"], 1.0)
         self.assertAlmostEqual(output.target, 0.0)
 
@@ -65,7 +63,7 @@ def test_bound(self):
         output = optimizer.run(params_init={"slope": 0.1})
 
         # Result should be very close to the bound
-        self.assertTrue(output.success)
+        self.assertTrue(output.result.success)
         self.assertGreater(output.params["slope"], 0.89)
         self.assertAlmostEqual(output.params["slope"], 0.91, places=2)
 
@@ -96,7 +94,7 @@ def test_multiple_constrained(self):
         )
 
         # Check results (low accuracy)
-        self.assertTrue(output.success)
+        self.assertTrue(output.result.success)
         self.assertAlmostEqual(output.params["intensity_1"], 1.0, places=3)
         self.assertAlmostEqual(output.params["intensity_2"], 3.0, places=3)
         self.assertAlmostEqual(output.target, 0.0, places=3)
@@ -1,8 +1,5 @@
 """Impact function calibration module"""
 
-from .impact_func import (
-    Input,
-    ScipyMinimizeOptimizer,
-    BayesianOptimizer,
-    cost_func_rmse,
-)
+from .base import Input
+from .bayesian_optimizer import BayesianOptimizer
+from .scipy_optimizer import ScipyMinimizeOptimizer
@@ -0,0 +1,184 @@
+"""Calibration Base Classes and Interfaces"""
+
+from abc import ABC, abstractmethod
+from dataclasses import dataclass, field, InitVar
+from typing import Callable, Mapping, Optional, Tuple, Union, Any, Dict
+from numbers import Number
+
+import numpy as np
+import pandas as pd
+from scipy.optimize import Bounds, LinearConstraint, NonlinearConstraint
+
+from climada.hazard import Hazard
+from climada.entity import Exposures, ImpactFuncSet
+from climada.engine import Impact, ImpactCalc
+
+ConstraintType = Union[LinearConstraint, NonlinearConstraint, Mapping]
+
+
+@dataclass
+class Input:
+    """Define the static input for a calibration task
+
+    Attributes
+    ----------
+    hazard : climada.Hazard
+        Hazard object to compute impacts from
+    exposure : climada.Exposures
+        Exposures object to compute impacts from
+    data : pandas.Dataframe
+        The data to compare computed impacts to. Index: Event IDs matching the IDs of
+        ``hazard``. Columns: Arbitrary columns.
+    cost_func : Callable
+        Function that takes an ``Impact`` object and a ``pandas.Dataframe`` as argument
+        and returns a single number. The optimization algorithm will try to minimize this
+        number. See this module for a suggestion of cost functions.
+    impact_func_gen : Callable
+        Function that takes the parameters as keyword arguments and returns an impact
+        function set. This will be called each time the optimization algorithm updates
+        the parameters.
+    bounds : Mapping (str, {Bounds, tuple(float, float)}), optional
+        The bounds for the parameters. Keys: parameter names. Values:
+        ``scipy.minimize.Bounds`` instance or tuple of minimum and maximum value.
+        Unbounded parameters need not be specified here. See the documentation for
+        the selected optimization algorithm on which data types are supported.
+    constraints : Constraint or list of Constraint, optional
+        One or multiple instances of ``scipy.minimize.LinearConstraint``,
+        ``scipy.minimize.NonlinearConstraint``, or a mapping. See the documentation for
+        the selected optimization algorithm on which data types are supported.
+    impact_calc_kwds : Mapping (str, Any), optional
+        Keyword arguments to :py:meth:`climada.engine.impact_calc.ImpactCalc.impact`.
+        Defaults to ``{"assign_centroids": False}`` (by default, centroids are assigned
+        here via the ``align`` parameter, to avoid assigning them each time the impact is
+        calculated).
+    align : bool, optional
+        Match event IDs from ``hazard`` and ``data``, and assign the centroids from
+        ``hazard`` to ``exposure``. Defaults to ``True``.
+    """
+
+    hazard: Hazard
+    exposure: Exposures
+    data: pd.DataFrame
+    cost_func: Callable[[Impact, pd.DataFrame], Number]
+    impact_func_gen: Callable[..., ImpactFuncSet]
+    bounds: Optional[Mapping[str, Union[Bounds, Tuple[Number, Number]]]] = None
+    constraints: Optional[Union[ConstraintType, list[ConstraintType]]] = None
+    impact_calc_kwds: Mapping[str, Any] = field(
+        default_factory=lambda: {"assign_centroids": False}
+    )
+    align: InitVar[bool] = True
+
+    def __post_init__(self, align):
+        """Prepare input data"""
+        if align:
+            event_diff = np.setdiff1d(self.data.index, self.hazard.event_id)
+            if event_diff.size > 0:
+                raise RuntimeError(
+                    "Event IDs in 'data' do not match event IDs in 'hazard': \n"
+                    f"{event_diff}"
+                )
+            self.hazard = self.hazard.select(event_id=self.data.index.tolist())
+            self.exposure.assign_centroids(self.hazard)
+
+
+@dataclass
+class Output:
+    """Generic output of a calibration task
+
+    Attributes
+    ----------
+    params : Mapping (str, Number)
+        The optimal parameters
+    target : Number
+        The target function value for the optimal parameters
+    """
+
+    params: Mapping[str, Number]
+    target: Number
+
+
+@dataclass
+class Optimizer(ABC):
+    """Abstract base class (interface) for an optimization
+
+    This defines the interface for optimizers in CLIMADA. New optimizers can be created
+    by deriving from this class and overriding at least the :py:meth:`run` method.
+
+    Attributes
+    ----------
+    input : Input
+        The input object for the optimization task. See :py:class:`Input`.
+    """
+
+    input: Input
+
+    def _target_func(self, impact: Impact, data: pd.DataFrame) -> Number:
+        """Target function for the optimizer
+
+        The default version of this function simply returns the value of the cost
+        function evaluated on the arguments.
+
+        Parameters
+        ----------
+        impact : climada.engine.Impact
+            The impact object returned by the impact calculation.
+        data : pandas.DataFrame
+            The data used for calibration. See :py:attr:`Input.data`.
+
+        Returns
+        -------
+        The value of the target function for the optimizer.
+        """
+        return self.input.cost_func(impact, data)
+
+    def _kwargs_to_impact_func_gen(self, *_, **kwargs) -> Dict[str, Any]:
+        """Define how the parameters to :py:meth:`_opt_func` must be transformed
+
+        Optimizers may implement different ways of representing the parameters (e.g.,
+        key-value pairs, arrays, etc.). Depending on this representation, the parameters
+        must be transformed to match the syntax of the impact function generator used,
+        see :py:attr:`Input.impact_func_gen`.
+
+        In this default version, the method simply returns its keyword arguments as
+        mapping. Override this method if the optimizer used *does not* represent
+        parameters as key-value pairs.
+
+        Parameters
+        ----------
+        kwargs
+            The parameters as key-value pairs.
+
+        Returns
+        -------
+        The parameters as key-value pairs.
+        """
+        return kwargs
+
+    def _opt_func(self, *args, **kwargs) -> Number:
+        """The optimization function iterated by the optimizer
+
+        This function takes arbitrary arguments from the optimizer, generates a new set
+        of impact functions from it, computes the impact, and finally calculates the
+        target function value and returns it.
+
+        Parameters
+        ----------
+        args, kwargs
+            Arbitrary arguments from the optimizer, including parameters
+
+        Returns
+        -------
+        Target function value for the given arguments
+        """
+        params = self._kwargs_to_impact_func_gen(*args, **kwargs)
+        impf_set = self.input.impact_func_gen(**params)
+        impact = ImpactCalc(
+            exposures=self.input.exposure,
+            impfset=impf_set,
+            hazard=self.input.hazard,
+        ).impact(**self.input.impact_calc_kwds)
+        return self._target_func(impact, self.input.data)
+
+    @abstractmethod
+    def run(self, **opt_kwargs) -> Output:
+        """Execute the optimization"""
@@ -0,0 +1,160 @@
+"""Calibration with Bayesian Optimization"""
+
+from dataclasses import dataclass, InitVar
+from typing import Mapping, Optional, Any
+from numbers import Number
+
+import pandas as pd
+from bayes_opt import BayesianOptimization
+from bayes_opt.target_space import TargetSpace
+
+from climada.engine import Impact
+from .base import Output, Optimizer
+
+
+@dataclass
+class BayesianOptimizer(Optimizer):
+    """An optimization using ``bayes_opt.BayesianOptimization``
+
+    This optimizer reports the target function value for each parameter set and
+    *maximizes* that value. Therefore, a higher target function value is better.
+    The cost function, however, is still minimized: The target function is defined as
+    the inverse of the cost function.
+
+    For details on the underlying optimizer, see
+    https://github.com/bayesian-optimization/BayesianOptimization.
+
+    Parameters
+    ----------
+    input : Input
+        The input data for this optimizer. See the Notes below for input requirements.
+    verbose : int, optional
+        Verbosity of the optimizer output. Defaults to 1.
+    random_state : int, optional
+        Seed for initializing the random number generator. Defaults to 1.
+    allow_duplicate_points : bool, optional
+        Allow the optimizer to sample the same points in parameter space multiple times.
+        This may happen if the parameter space is tightly bound or constrained. Defaults
+        to ``True``.
+    bayes_opt_kwds : dict
+        Additional keyword arguments passed to the ``BayesianOptimization`` constructor.
+
+    Notes
+    -----
+    The following requirements apply to the parameters of :py:class:`Input` when using
+    this class:
+
+    bounds
+        Setting ``bounds`` in the ``Input`` is required because the optimizer first
+        "explores" the bound parameter space and then narrows its search to regions
+        where the cost function is low.
+    constraints
+        Must be an instance of ``scipy.minimize.LinearConstraint`` or
+        ``scipy.minimize.NonlinearConstraint``. See
+        https://github.com/bayesian-optimization/BayesianOptimization/blob/master/examples/constraints.ipynb
+        for further information. Supplying contraints is optional.
+
+    Attributes
+    ----------
+    optimizer : bayes_opt.BayesianOptimization
+        The optimizer instance of this class.
+    """
+
+    verbose: InitVar[int] = 1
+    random_state: InitVar[int] = 1
+    allow_duplicate_points: InitVar[bool] = True
+    bayes_opt_kwds: InitVar[Optional[Mapping[str, Any]]] = None
+
+    def __post_init__(
+        self, verbose, random_state, allow_duplicate_points, bayes_opt_kwds
+    ):
+        """Create optimizer"""
+        if bayes_opt_kwds is None:
+            bayes_opt_kwds = {}
+
+        if self.input.bounds is None:
+            raise ValueError("Input.bounds is required for this optimizer")
+
+        self.optimizer = BayesianOptimization(
+            f=self._opt_func,
+            pbounds=self.input.bounds,
+            verbose=verbose,
+            random_state=random_state,
+            allow_duplicate_points=allow_duplicate_points,
+            **bayes_opt_kwds,
+        )
+
+    def _target_func(self, impact: Impact, data: pd.DataFrame) -> Number:
+        """Invert the cost function because BayesianOptimization maximizes the target"""
+        return 1 / self.input.cost_func(impact, data)
+
+    def run(self, **opt_kwargs):
+        """Execute the optimization
+
+        ``BayesianOptimization`` *maximizes* a target function. Therefore, this class
+        inverts the cost function and used that as target function. The cost function is
+        still minimized.
+
+        Parameters
+        ----------
+        init_points : int, optional
+            Number of initial samples taken from the parameter space. Defaults to 10^N,
+            where N is the number of parameters.
+        n_iter : int, optional
+            Number of iteration steps after initial sampling. Defaults to 10^N, where N
+            is the number of parameters.
+        opt_kwargs
+            Further keyword arguments passed to ``BayesianOptimization.maximize``.
+
+        Returns
+        -------
+        output : BayesianOptimizerOutput
+            Optimization output. :py:attr:`BayesianOptimizerOutput.p_space` stores data
+            on the sampled parameter space.
+        """
+        # Retrieve parameters
+        num_params = len(self.input.bounds)
+        init_points = opt_kwargs.pop("init_points", 10**num_params)
+        n_iter = opt_kwargs.pop("n_iter", 10**num_params)
+
+        # Run optimizer
+        self.optimizer.maximize(init_points=init_points, n_iter=n_iter, **opt_kwargs)
+
+        # Return output
+        opt = self.optimizer.max
+        return BayesianOptimizerOutput(
+            params=opt["params"],
+            target=opt["target"],
+            p_space=self.optimizer.space,
+        )
+
+
+@dataclass
+class BayesianOptimizerOutput(Output):
+    """Output of a calibration with :py:class:`BayesianOptimizer`
+
+    Attributes
+    ----------
+    p_space : bayes_opt.target_space.TargetSpace
+        The parameter space sampled by the optimizer.
+    """
+
+    p_space: TargetSpace
+
+    def p_space_to_dataframe(self):
+        """Return the sampled parameter space as pandas.DataFrame
+
+        Returns
+        -------
+        pandas.DataFrame
+            Data frame whose columns are the parameter values and the associated target
+            function value (``target``) and whose rows are the optimizer iterations.
+        """
+        data = {
+            self.p_space.keys[i]: self.p_space.params[..., i]
+            for i in range(self.p_space.dim)
+        }
+        data["target"] = self.p_space.target
+        data = pd.DataFrame.from_dict(data)
+        data.index.rename("Iteration", inplace=True)
+        return data