Skip to content

Commit 423b5b8

Browse files
committed
Improve alignment and handling of NaNs
1 parent 645862a commit 423b5b8

File tree

3 files changed

+883
-894
lines changed

3 files changed

+883
-894
lines changed

climada/util/calibrate/base.py

Lines changed: 60 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
from climada.hazard import Hazard
1414
from climada.entity import Exposures, ImpactFuncSet
1515
from climada.engine import Impact, ImpactCalc
16-
import climada.util.coordinates as u_coord
1716

1817
ConstraintType = Union[LinearConstraint, NonlinearConstraint, Mapping]
1918

@@ -30,7 +29,9 @@ class Input:
3029
Exposures object to compute impacts from
3130
data : pandas.Dataframe
3231
The data to compare computed impacts to. Index: Event IDs matching the IDs of
33-
``hazard``. Columns: Arbitrary columns.
32+
``hazard``. Columns: Arbitrary columns. NaN values in the data frame have
33+
special meaning: Corresponding impact values computed by the model are ignored
34+
in the calibration.
3435
impact_func_creator : Callable
3536
Function that takes the parameters as keyword arguments and returns an impact
3637
function set. This will be called each time the optimization algorithm updates
@@ -59,17 +60,14 @@ class Input:
5960
Defaults to ``{"assign_centroids": False}`` (by default, centroids are assigned
6061
here via the ``assign_centroids`` parameter, to avoid assigning them each time
6162
the impact is calculated).
62-
align_kwds : Mapping (str, Any), optional
63-
Keyword arguments to ``pandas.DataFrame.align`` for aligning the :py:attr:`data`
64-
with the data frame returned by :py:attr:`impact_to_dataframe`. By default,
65-
both axes will be aligned and the fill value is zero
66-
(``"axis": None, "fill_value": 0}``). This assumes that if events and/or regions
67-
between both data frames do not align, the respective value is assumed to be
68-
zero and this will be incorporated into the estimation. If you want to require
69-
alignment, set ``"fill_value": None``. This will set non-aligned values to NaN,
70-
which typically results in a NaN target function, aborting the estimation.
63+
missing_data_value : float, optional
64+
If the impact model returns impact data for which no values exist in
65+
:py:attr:`data`, insert this value. Defaults to NaN, in which case the impact
66+
from the model is ignored. Set this to zero to explicitly calibrate to zero
67+
impacts in these cases.
7168
assign_centroids : bool, optional
72-
If ``True`` (default), assign the hazard centroids to the exposure.
69+
If ``True`` (default), assign the hazard centroids to the exposure when this
70+
object is created.
7371
"""
7472

7573
hazard: Hazard
@@ -83,9 +81,7 @@ class Input:
8381
impact_calc_kwds: Mapping[str, Any] = field(
8482
default_factory=lambda: {"assign_centroids": False}
8583
)
86-
align_kwds: Mapping[str, Any] = field(
87-
default_factory=lambda: {"axis": None, "fill_value": 0}
88-
)
84+
missing_data_value: float = np.nan
8985
assign_centroids: InitVar[bool] = True
9086

9187
def __post_init__(self, assign_centroids):
@@ -271,7 +267,7 @@ def plot_event_region_heatmap(
271267
# Data preparation
272268
agg = self.input.impact_to_dataframe(self.impact)
273269
data = (agg + 1) / (self.input.data + 1)
274-
data = data.transform(np.log10).replace(0, np.nan)
270+
data = data.transform(np.log10)
275271
data = data.where((agg > 0) | (self.input.data > 0))
276272

277273
# Transform data
@@ -360,6 +356,53 @@ def _kwargs_to_impact_func_creator(self, *_, **kwargs) -> Dict[str, Any]:
360356
"""
361357
return kwargs
362358

359+
def _align_impact_with_data(
360+
self, impact_df: pd.DataFrame
361+
) -> Tuple[pd.DataFrame, pd.DataFrame]:
362+
"""Align the impact dataframe with the input data dataframe
363+
364+
When aligning, two general cases might occur, which are not mutually exclusive:
365+
366+
1. There are data points for which no impact was computed. This will always be
367+
treated as an impact of zero.
368+
2. There are impacts for which no data points exist. For these points, the input
369+
data will be filled with the value of :py:attr:`Input.missing_data_value`.
370+
371+
Parameters
372+
----------
373+
impact_df : pandas.DataFrame
374+
The impact computed by the model, transformed into a dataframe by
375+
:py:attr:`Input.impact_to_dataframe`.
376+
377+
Returns
378+
-------
379+
data_aligned : pandas.DataFrame
380+
The :py:attr:`Input.data` aligned with the impact.
381+
impact_df_aligned : pandas.DataFrame
382+
The ``impact_df`` aligned with the data.
383+
384+
Raises
385+
------
386+
ValueError
387+
If ``impact_df`` contains NaNs before aligning.
388+
"""
389+
if impact_df.isna().any(axis=None):
390+
raise ValueError("NaN values computed in impact!")
391+
392+
data_aligned, impact_df_aligned = self.input.data.align(
393+
impact_df, axis=None, fill_value=None
394+
)
395+
396+
# Add user-set value for non-aligned data
397+
data_aligned[
398+
impact_df_aligned.notna() & data_aligned.isna()
399+
] = self.input.missing_data_value
400+
401+
# Set all impacts to zero for which data is NaN
402+
impact_df_aligned.where(data_aligned.notna(), inplace=True)
403+
404+
return data_aligned.fillna(0), impact_df_aligned.fillna(0)
405+
363406
def _opt_func(self, *args, **kwargs) -> Number:
364407
"""The optimization function iterated by the optimizer
365408
@@ -389,9 +432,7 @@ def _opt_func(self, *args, **kwargs) -> Number:
389432

390433
# Transform to DataFrame, align, and compute target function
391434
impact_df = self.input.impact_to_dataframe(impact)
392-
data_aligned, impact_df_aligned = self.input.data.align(
393-
impact_df, **self.input.align_kwds
394-
)
435+
data_aligned, impact_df_aligned = self._align_impact_with_data(impact_df)
395436
return self._target_func(data_aligned, impact_df_aligned)
396437

397438
@abstractmethod

climada/util/calibrate/bayesian_optimizer.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
11
"""Calibration with Bayesian Optimization"""
22

33
from dataclasses import dataclass, InitVar
4-
from typing import Mapping, Optional, Any
4+
from typing import Mapping, Optional, Any, Union, List
55
from numbers import Number
66
from itertools import combinations, repeat
77

88
import pandas as pd
9+
import matplotlib.axes as maxes
910
from bayes_opt import BayesianOptimization
1011
from bayes_opt.target_space import TargetSpace
1112

@@ -170,7 +171,7 @@ def plot_p_space(
170171
min_fmt: str = "x",
171172
min_color: str = "r",
172173
**plot_kwargs
173-
):
174+
) -> Union[maxes.Axes, List[maxes.Axes]]:
174175
"""Plot the parameter space as scatter plot(s)
175176
176177
Produce a scatter plot where each point represents a parameter combination
@@ -255,5 +256,4 @@ def plot_single(x, y):
255256
iterable = zip(params, repeat(y))
256257

257258
# Iterate over parameter combinations
258-
for p_first, p_second in iterable:
259-
plot_single(p_first, p_second)
259+
return [plot_single(p_first, p_second) for p_first, p_second in iterable]

0 commit comments

Comments
 (0)