Skip to content

Commit e8abb1a

Browse files
committed
Restructure calibration module
1 parent 37c65d9 commit e8abb1a

File tree

7 files changed

+472
-433
lines changed

7 files changed

+472
-433
lines changed

climada/test/test_util_calibrate.py

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,9 @@
99

1010
from climada.entity import ImpactFuncSet, ImpactFunc
1111

12-
from climada.util.calibrate.impact_func import (
13-
Input,
14-
ScipyMinimizeOptimizer,
15-
cost_func_rmse,
16-
)
12+
from climada.util.calibrate import Input, ScipyMinimizeOptimizer
13+
from climada.util.calibrate.impact_func import cost_func_rmse
14+
1715
from climada.util.calibrate.test.test_calibrate import hazard, exposure
1816

1917

@@ -54,7 +52,7 @@ def test_single(self):
5452
output = optimizer.run(params_init={"slope": 0.1})
5553

5654
# Result should be nearly exact
57-
self.assertTrue(output.success)
55+
self.assertTrue(output.result.success)
5856
self.assertAlmostEqual(output.params["slope"], 1.0)
5957
self.assertAlmostEqual(output.target, 0.0)
6058

@@ -65,7 +63,7 @@ def test_bound(self):
6563
output = optimizer.run(params_init={"slope": 0.1})
6664

6765
# Result should be very close to the bound
68-
self.assertTrue(output.success)
66+
self.assertTrue(output.result.success)
6967
self.assertGreater(output.params["slope"], 0.89)
7068
self.assertAlmostEqual(output.params["slope"], 0.91, places=2)
7169

@@ -96,7 +94,7 @@ def test_multiple_constrained(self):
9694
)
9795

9896
# Check results (low accuracy)
99-
self.assertTrue(output.success)
97+
self.assertTrue(output.result.success)
10098
self.assertAlmostEqual(output.params["intensity_1"], 1.0, places=3)
10199
self.assertAlmostEqual(output.params["intensity_2"], 3.0, places=3)
102100
self.assertAlmostEqual(output.target, 0.0, places=3)

climada/util/calibrate/__init__.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,5 @@
11
"""Impact function calibration module"""
22

3-
from .impact_func import (
4-
Input,
5-
ScipyMinimizeOptimizer,
6-
BayesianOptimizer,
7-
cost_func_rmse,
8-
)
3+
from .base import Input
4+
from .bayesian_optimizer import BayesianOptimizer
5+
from .scipy_optimizer import ScipyMinimizeOptimizer

climada/util/calibrate/base.py

Lines changed: 184 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,184 @@
1+
"""Calibration Base Classes and Interfaces"""
2+
3+
from abc import ABC, abstractmethod
4+
from dataclasses import dataclass, field, InitVar
5+
from typing import Callable, Mapping, Optional, Tuple, Union, Any, Dict
6+
from numbers import Number
7+
8+
import numpy as np
9+
import pandas as pd
10+
from scipy.optimize import Bounds, LinearConstraint, NonlinearConstraint
11+
12+
from climada.hazard import Hazard
13+
from climada.entity import Exposures, ImpactFuncSet
14+
from climada.engine import Impact, ImpactCalc
15+
16+
ConstraintType = Union[LinearConstraint, NonlinearConstraint, Mapping]
17+
18+
19+
@dataclass
20+
class Input:
21+
"""Define the static input for a calibration task
22+
23+
Attributes
24+
----------
25+
hazard : climada.Hazard
26+
Hazard object to compute impacts from
27+
exposure : climada.Exposures
28+
Exposures object to compute impacts from
29+
data : pandas.Dataframe
30+
The data to compare computed impacts to. Index: Event IDs matching the IDs of
31+
``hazard``. Columns: Arbitrary columns.
32+
cost_func : Callable
33+
Function that takes an ``Impact`` object and a ``pandas.Dataframe`` as argument
34+
and returns a single number. The optimization algorithm will try to minimize this
35+
number. See this module for a suggestion of cost functions.
36+
impact_func_gen : Callable
37+
Function that takes the parameters as keyword arguments and returns an impact
38+
function set. This will be called each time the optimization algorithm updates
39+
the parameters.
40+
bounds : Mapping (str, {Bounds, tuple(float, float)}), optional
41+
The bounds for the parameters. Keys: parameter names. Values:
42+
``scipy.minimize.Bounds`` instance or tuple of minimum and maximum value.
43+
Unbounded parameters need not be specified here. See the documentation for
44+
the selected optimization algorithm on which data types are supported.
45+
constraints : Constraint or list of Constraint, optional
46+
One or multiple instances of ``scipy.minimize.LinearConstraint``,
47+
``scipy.minimize.NonlinearConstraint``, or a mapping. See the documentation for
48+
the selected optimization algorithm on which data types are supported.
49+
impact_calc_kwds : Mapping (str, Any), optional
50+
Keyword arguments to :py:meth:`climada.engine.impact_calc.ImpactCalc.impact`.
51+
Defaults to ``{"assign_centroids": False}`` (by default, centroids are assigned
52+
here via the ``align`` parameter, to avoid assigning them each time the impact is
53+
calculated).
54+
align : bool, optional
55+
Match event IDs from ``hazard`` and ``data``, and assign the centroids from
56+
``hazard`` to ``exposure``. Defaults to ``True``.
57+
"""
58+
59+
hazard: Hazard
60+
exposure: Exposures
61+
data: pd.DataFrame
62+
cost_func: Callable[[Impact, pd.DataFrame], Number]
63+
impact_func_gen: Callable[..., ImpactFuncSet]
64+
bounds: Optional[Mapping[str, Union[Bounds, Tuple[Number, Number]]]] = None
65+
constraints: Optional[Union[ConstraintType, list[ConstraintType]]] = None
66+
impact_calc_kwds: Mapping[str, Any] = field(
67+
default_factory=lambda: {"assign_centroids": False}
68+
)
69+
align: InitVar[bool] = True
70+
71+
def __post_init__(self, align):
72+
"""Prepare input data"""
73+
if align:
74+
event_diff = np.setdiff1d(self.data.index, self.hazard.event_id)
75+
if event_diff.size > 0:
76+
raise RuntimeError(
77+
"Event IDs in 'data' do not match event IDs in 'hazard': \n"
78+
f"{event_diff}"
79+
)
80+
self.hazard = self.hazard.select(event_id=self.data.index.tolist())
81+
self.exposure.assign_centroids(self.hazard)
82+
83+
84+
@dataclass
85+
class Output:
86+
"""Generic output of a calibration task
87+
88+
Attributes
89+
----------
90+
params : Mapping (str, Number)
91+
The optimal parameters
92+
target : Number
93+
The target function value for the optimal parameters
94+
"""
95+
96+
params: Mapping[str, Number]
97+
target: Number
98+
99+
100+
@dataclass
101+
class Optimizer(ABC):
102+
"""Abstract base class (interface) for an optimization
103+
104+
This defines the interface for optimizers in CLIMADA. New optimizers can be created
105+
by deriving from this class and overriding at least the :py:meth:`run` method.
106+
107+
Attributes
108+
----------
109+
input : Input
110+
The input object for the optimization task. See :py:class:`Input`.
111+
"""
112+
113+
input: Input
114+
115+
def _target_func(self, impact: Impact, data: pd.DataFrame) -> Number:
116+
"""Target function for the optimizer
117+
118+
The default version of this function simply returns the value of the cost
119+
function evaluated on the arguments.
120+
121+
Parameters
122+
----------
123+
impact : climada.engine.Impact
124+
The impact object returned by the impact calculation.
125+
data : pandas.DataFrame
126+
The data used for calibration. See :py:attr:`Input.data`.
127+
128+
Returns
129+
-------
130+
The value of the target function for the optimizer.
131+
"""
132+
return self.input.cost_func(impact, data)
133+
134+
def _kwargs_to_impact_func_gen(self, *_, **kwargs) -> Dict[str, Any]:
135+
"""Define how the parameters to :py:meth:`_opt_func` must be transformed
136+
137+
Optimizers may implement different ways of representing the parameters (e.g.,
138+
key-value pairs, arrays, etc.). Depending on this representation, the parameters
139+
must be transformed to match the syntax of the impact function generator used,
140+
see :py:attr:`Input.impact_func_gen`.
141+
142+
In this default version, the method simply returns its keyword arguments as
143+
mapping. Override this method if the optimizer used *does not* represent
144+
parameters as key-value pairs.
145+
146+
Parameters
147+
----------
148+
kwargs
149+
The parameters as key-value pairs.
150+
151+
Returns
152+
-------
153+
The parameters as key-value pairs.
154+
"""
155+
return kwargs
156+
157+
def _opt_func(self, *args, **kwargs) -> Number:
158+
"""The optimization function iterated by the optimizer
159+
160+
This function takes arbitrary arguments from the optimizer, generates a new set
161+
of impact functions from it, computes the impact, and finally calculates the
162+
target function value and returns it.
163+
164+
Parameters
165+
----------
166+
args, kwargs
167+
Arbitrary arguments from the optimizer, including parameters
168+
169+
Returns
170+
-------
171+
Target function value for the given arguments
172+
"""
173+
params = self._kwargs_to_impact_func_gen(*args, **kwargs)
174+
impf_set = self.input.impact_func_gen(**params)
175+
impact = ImpactCalc(
176+
exposures=self.input.exposure,
177+
impfset=impf_set,
178+
hazard=self.input.hazard,
179+
).impact(**self.input.impact_calc_kwds)
180+
return self._target_func(impact, self.input.data)
181+
182+
@abstractmethod
183+
def run(self, **opt_kwargs) -> Output:
184+
"""Execute the optimization"""
Lines changed: 160 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,160 @@
1+
"""Calibration with Bayesian Optimization"""
2+
3+
from dataclasses import dataclass, InitVar
4+
from typing import Mapping, Optional, Any
5+
from numbers import Number
6+
7+
import pandas as pd
8+
from bayes_opt import BayesianOptimization
9+
from bayes_opt.target_space import TargetSpace
10+
11+
from climada.engine import Impact
12+
from .base import Output, Optimizer
13+
14+
15+
@dataclass
16+
class BayesianOptimizer(Optimizer):
17+
"""An optimization using ``bayes_opt.BayesianOptimization``
18+
19+
This optimizer reports the target function value for each parameter set and
20+
*maximizes* that value. Therefore, a higher target function value is better.
21+
The cost function, however, is still minimized: The target function is defined as
22+
the inverse of the cost function.
23+
24+
For details on the underlying optimizer, see
25+
https://github.com/bayesian-optimization/BayesianOptimization.
26+
27+
Parameters
28+
----------
29+
input : Input
30+
The input data for this optimizer. See the Notes below for input requirements.
31+
verbose : int, optional
32+
Verbosity of the optimizer output. Defaults to 1.
33+
random_state : int, optional
34+
Seed for initializing the random number generator. Defaults to 1.
35+
allow_duplicate_points : bool, optional
36+
Allow the optimizer to sample the same points in parameter space multiple times.
37+
This may happen if the parameter space is tightly bound or constrained. Defaults
38+
to ``True``.
39+
bayes_opt_kwds : dict
40+
Additional keyword arguments passed to the ``BayesianOptimization`` constructor.
41+
42+
Notes
43+
-----
44+
The following requirements apply to the parameters of :py:class:`Input` when using
45+
this class:
46+
47+
bounds
48+
Setting ``bounds`` in the ``Input`` is required because the optimizer first
49+
"explores" the bound parameter space and then narrows its search to regions
50+
where the cost function is low.
51+
constraints
52+
Must be an instance of ``scipy.minimize.LinearConstraint`` or
53+
``scipy.minimize.NonlinearConstraint``. See
54+
https://github.com/bayesian-optimization/BayesianOptimization/blob/master/examples/constraints.ipynb
55+
for further information. Supplying contraints is optional.
56+
57+
Attributes
58+
----------
59+
optimizer : bayes_opt.BayesianOptimization
60+
The optimizer instance of this class.
61+
"""
62+
63+
verbose: InitVar[int] = 1
64+
random_state: InitVar[int] = 1
65+
allow_duplicate_points: InitVar[bool] = True
66+
bayes_opt_kwds: InitVar[Optional[Mapping[str, Any]]] = None
67+
68+
def __post_init__(
69+
self, verbose, random_state, allow_duplicate_points, bayes_opt_kwds
70+
):
71+
"""Create optimizer"""
72+
if bayes_opt_kwds is None:
73+
bayes_opt_kwds = {}
74+
75+
if self.input.bounds is None:
76+
raise ValueError("Input.bounds is required for this optimizer")
77+
78+
self.optimizer = BayesianOptimization(
79+
f=self._opt_func,
80+
pbounds=self.input.bounds,
81+
verbose=verbose,
82+
random_state=random_state,
83+
allow_duplicate_points=allow_duplicate_points,
84+
**bayes_opt_kwds,
85+
)
86+
87+
def _target_func(self, impact: Impact, data: pd.DataFrame) -> Number:
88+
"""Invert the cost function because BayesianOptimization maximizes the target"""
89+
return 1 / self.input.cost_func(impact, data)
90+
91+
def run(self, **opt_kwargs):
92+
"""Execute the optimization
93+
94+
``BayesianOptimization`` *maximizes* a target function. Therefore, this class
95+
inverts the cost function and used that as target function. The cost function is
96+
still minimized.
97+
98+
Parameters
99+
----------
100+
init_points : int, optional
101+
Number of initial samples taken from the parameter space. Defaults to 10^N,
102+
where N is the number of parameters.
103+
n_iter : int, optional
104+
Number of iteration steps after initial sampling. Defaults to 10^N, where N
105+
is the number of parameters.
106+
opt_kwargs
107+
Further keyword arguments passed to ``BayesianOptimization.maximize``.
108+
109+
Returns
110+
-------
111+
output : BayesianOptimizerOutput
112+
Optimization output. :py:attr:`BayesianOptimizerOutput.p_space` stores data
113+
on the sampled parameter space.
114+
"""
115+
# Retrieve parameters
116+
num_params = len(self.input.bounds)
117+
init_points = opt_kwargs.pop("init_points", 10**num_params)
118+
n_iter = opt_kwargs.pop("n_iter", 10**num_params)
119+
120+
# Run optimizer
121+
self.optimizer.maximize(init_points=init_points, n_iter=n_iter, **opt_kwargs)
122+
123+
# Return output
124+
opt = self.optimizer.max
125+
return BayesianOptimizerOutput(
126+
params=opt["params"],
127+
target=opt["target"],
128+
p_space=self.optimizer.space,
129+
)
130+
131+
132+
@dataclass
133+
class BayesianOptimizerOutput(Output):
134+
"""Output of a calibration with :py:class:`BayesianOptimizer`
135+
136+
Attributes
137+
----------
138+
p_space : bayes_opt.target_space.TargetSpace
139+
The parameter space sampled by the optimizer.
140+
"""
141+
142+
p_space: TargetSpace
143+
144+
def p_space_to_dataframe(self):
145+
"""Return the sampled parameter space as pandas.DataFrame
146+
147+
Returns
148+
-------
149+
pandas.DataFrame
150+
Data frame whose columns are the parameter values and the associated target
151+
function value (``target``) and whose rows are the optimizer iterations.
152+
"""
153+
data = {
154+
self.p_space.keys[i]: self.p_space.params[..., i]
155+
for i in range(self.p_space.dim)
156+
}
157+
data["target"] = self.p_space.target
158+
data = pd.DataFrame.from_dict(data)
159+
data.index.rename("Iteration", inplace=True)
160+
return data

0 commit comments

Comments
 (0)