Skip to content

Commit dda079d

Browse files
committed
Add option to adjust data frame alignment
1 parent d43eb8a commit dda079d

File tree

2 files changed

+33
-10
lines changed

2 files changed

+33
-10
lines changed

climada/util/calibrate/base.py

Lines changed: 31 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,8 @@ class Input:
4343
cost_func : Callable
4444
Function that takes two ``pandas.Dataframe`` objects and returns the scalar
4545
"cost" between them. The optimization algorithm will try to minimize this
46-
number.
46+
number. The first argument is the true/correct values (:py:attr:`data`), and the
47+
second argument is the estimated/predicted values.
4748
bounds : Mapping (str, {Bounds, tuple(float, float)}), optional
4849
The bounds for the parameters. Keys: parameter names. Values:
4950
``scipy.minimize.Bounds`` instance or tuple of minimum and maximum value.
@@ -58,6 +59,15 @@ class Input:
5859
Defaults to ``{"assign_centroids": False}`` (by default, centroids are assigned
5960
here via the ``assign_centroids`` parameter, to avoid assigning them each time
6061
the impact is calculated).
62+
align_kwds : Mapping (str, Any), optional
63+
Keyword arguments to ``pandas.DataFrame.align`` for aligning the :py:attr:`data`
64+
with the data frame returned by :py:attr:`impact_to_dataframe`. By default,
65+
both axes will be aligned and the fill value is zero
66+
(``"axis": None, "fill_value": 0}``). This assumes that if events and/or regions
67+
between both data frames do not align, the respective value is assumed to be
68+
zero and this will be incorporated into the estimation. If you want to require
69+
alignment, set ``"fill_value": None``. This will set non-aligned values to NaN,
70+
which typically results in a NaN target function, aborting the estimation.
6171
assign_centroids : bool, optional
6272
If ``True`` (default), assign the hazard centroids to the exposure.
6373
"""
@@ -73,6 +83,9 @@ class Input:
7383
impact_calc_kwds: Mapping[str, Any] = field(
7484
default_factory=lambda: {"assign_centroids": False}
7585
)
86+
align_kwds: Mapping[str, Any] = field(
87+
default_factory=lambda: {"axis": None, "fill_value": 0}
88+
)
7689
assign_centroids: InitVar[bool] = True
7790

7891
def __post_init__(self, assign_centroids):
@@ -303,24 +316,26 @@ class Optimizer(ABC):
303316

304317
input: Input
305318

306-
def _target_func(self, impact: pd.DataFrame, data: pd.DataFrame) -> Number:
319+
def _target_func(self, true: pd.DataFrame, predicted: pd.DataFrame) -> Number:
307320
"""Target function for the optimizer
308321
309322
The default version of this function simply returns the value of the cost
310323
function evaluated on the arguments.
311324
312325
Parameters
313326
----------
314-
impact : climada.engine.Impact
315-
The impact object returned by the impact calculation.
316-
data : pandas.DataFrame
317-
The data used for calibration. See :py:attr:`Input.data`.
327+
true : pandas.DataFrame
328+
The "true" data used for calibration. By default, this is
329+
:py:attr:`Input.data`.
330+
predicted : pandas.DataFrame
331+
The impact predicted by the data calibration after it has been transformed
332+
into a dataframe by :py:attr:`Input.impact_to_dataframe`.
318333
319334
Returns
320335
-------
321336
The value of the target function for the optimizer.
322337
"""
323-
return self.input.cost_func(impact, data)
338+
return self.input.cost_func(true, predicted)
324339

325340
def _kwargs_to_impact_func_creator(self, *_, **kwargs) -> Dict[str, Any]:
326341
"""Define how the parameters to :py:meth:`_opt_func` must be transformed
@@ -361,15 +376,23 @@ def _opt_func(self, *args, **kwargs) -> Number:
361376
-------
362377
Target function value for the given arguments
363378
"""
379+
# Create the impact function set from a new parameter estimate
364380
params = self._kwargs_to_impact_func_creator(*args, **kwargs)
365381
impf_set = self.input.impact_func_creator(**params)
382+
383+
# Compute the impact
366384
impact = ImpactCalc(
367385
exposures=self.input.exposure,
368386
impfset=impf_set,
369387
hazard=self.input.hazard,
370388
).impact(**self.input.impact_calc_kwds)
389+
390+
# Transform to DataFrame, align, and compute target function
371391
impact_df = self.input.impact_to_dataframe(impact)
372-
return self._target_func(impact_df, self.input.data)
392+
data_aligned, impact_df_aligned = self.input.data.align(
393+
impact_df, **self.input.align_kwds
394+
)
395+
return self._target_func(data_aligned, impact_df_aligned)
373396

374397
@abstractmethod
375398
def run(self, **opt_kwargs) -> Output:

climada/util/calibrate/bayesian_optimizer.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -85,9 +85,9 @@ def __post_init__(
8585
**bayes_opt_kwds,
8686
)
8787

88-
def _target_func(self, impact: pd.DataFrame, data: pd.DataFrame) -> Number:
88+
def _target_func(self, true: pd.DataFrame, predicted: pd.DataFrame) -> Number:
8989
"""Invert the cost function because BayesianOptimization maximizes the target"""
90-
return -self.input.cost_func(impact, data)
90+
return -self.input.cost_func(true, predicted)
9191

9292
def run(self, **opt_kwargs):
9393
"""Execute the optimization

0 commit comments

Comments
 (0)