Skip to content

Commit 97d763a

Browse files
committed
Update code, docs, and tutorial
1 parent 43f40b3 commit 97d763a

File tree

4 files changed

+999
-838
lines changed

4 files changed

+999
-838
lines changed

climada/util/calibrate/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
"""Impact function calibration module"""
22

3-
from .base import Input
3+
from .base import Input, OutputEvaluator
44
from .bayesian_optimizer import BayesianOptimizer
55
from .scipy_optimizer import ScipyMinimizeOptimizer
66
from .func import rmse, rmsf, impact_at_reg

climada/util/calibrate/base.py

Lines changed: 113 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -130,55 +130,139 @@ def __post_init__(self):
130130
self._impact_label = f"Impact [{self.input.exposure.value_unit}]"
131131

132132
def plot_impf_set(self, **plot_kwargs):
133-
"""Plot the optimized impact functions"""
133+
"""Plot the optimized impact functions
134+
135+
This calls the plot function of the respective impact function set.
136+
137+
Parameters
138+
----------
139+
plot_kwargs
140+
Plotting keyword arguments passed to the underlying plotting method.
141+
142+
See Also
143+
--------
144+
:py:meth:`~climada.entity.impact_funcs.impact_func_set.ImpactFuncSet.plot`
145+
"""
134146
return self.impf_set.plot(**plot_kwargs)
135147

136-
def plot_at_event(self, **plot_kwargs):
137-
data = (
138-
pd.concat(
139-
[
140-
pd.Series([self.impact.at_event]),
141-
self.input.data.sum(axis="columns"),
142-
],
143-
ignore_index=True,
144-
axis=1,
145-
)
146-
.rename(columns={0: "Model", 1: "Data"})
147-
.set_index(self.input.hazard.event_name)
148-
)
148+
def plot_at_event(
149+
self,
150+
data_transf: Callable[[pd.DataFrame], pd.DataFrame] = lambda x: x,
151+
**plot_kwargs,
152+
):
153+
"""Create a bar plot comparing estimated model output and data per event
154+
155+
Every row of the :py:attr:`Input.data` is considered an event.
156+
The data to be plotted can be transformed with a generic function
157+
``data_transf``.
158+
159+
Parameters
160+
----------
161+
data_transf : Callable (pd.DataFrame -> pd.DataFrame), optional
162+
A function that transforms the data to plot before plotting.
163+
It receives a dataframe whose rows represent events and whose columns
164+
represent the modelled impact and the calibration data, respectively.
165+
By default, the data is not transformed.
166+
plot_kwargs
167+
Keyword arguments passed to the ``DataFrame.plot.bar`` method.
168+
169+
Returns
170+
-------
171+
ax : matplotlib.axes.Axes
172+
The plot axis returned by ``DataFrame.plot.bar``
173+
"""
174+
data = pd.concat(
175+
[
176+
self.input.impact_to_dataframe(self.impact).sum(axis="columns"),
177+
self.input.data.sum(axis="columns"),
178+
],
179+
axis=1,
180+
).rename(columns={0: "Model", 1: "Data"})
181+
182+
# Transform data before plotting
183+
data = data_transf(data)
184+
185+
# Now plot
149186
ylabel = plot_kwargs.pop("ylabel", self._impact_label)
150187
return data.plot.bar(ylabel=ylabel, **plot_kwargs)
151188

152-
def plot_at_region(self, agg_regions=None, **plot_kwargs):
189+
def plot_at_region(
190+
self,
191+
data_transf: Callable[[pd.DataFrame], pd.DataFrame] = lambda x: x,
192+
**plot_kwargs,
193+
):
194+
"""Create a bar plot comparing estimated model output and data per event
195+
196+
Every column of the :py:attr:`Input.data` is considered a region.
197+
The data to be plotted can be transformed with a generic function
198+
``data_transf``.
199+
200+
Parameters
201+
----------
202+
data_transf : Callable (pd.DataFrame -> pd.DataFrame), optional
203+
A function that transforms the data to plot before plotting.
204+
It receives a dataframe whose rows represent regions and whose columns
205+
represent the modelled impact and the calibration data, respectively.
206+
By default, the data is not transformed.
207+
plot_kwargs
208+
Keyword arguments passed to the ``DataFrame.plot.bar`` method.
209+
210+
Returns
211+
-------
212+
ax : matplotlib.axes.Axes
213+
The plot axis returned by ``DataFrame.plot.bar``.
214+
"""
153215
data = pd.concat(
154216
[
155-
self.impact.impact_at_reg(agg_regions).sum(axis="index"),
217+
self.input.impact_to_dataframe(self.impact).sum(axis="index"),
156218
self.input.data.sum(axis="index"),
157219
],
158220
axis=1,
159221
).rename(columns={0: "Model", 1: "Data"})
160222

161-
# Use nice country names if no agg_regions were given
162-
if agg_regions is None:
163-
data = data.rename(
164-
index=lambda x: u_coord.country_to_iso(x, representation="name")
165-
)
223+
# Transform data before plotting
224+
data = data_transf(data)
166225

226+
# Now plot
167227
ylabel = plot_kwargs.pop("ylabel", self._impact_label)
168228
return data.plot.bar(ylabel=ylabel, **plot_kwargs)
169229

170-
def plot_event_region_heatmap(self, agg_regions=None, **plot_kwargs):
230+
def plot_event_region_heatmap(
231+
self,
232+
data_transf: Callable[[pd.DataFrame], pd.DataFrame] = lambda x: x,
233+
**plot_kwargs,
234+
):
235+
"""Plot a heatmap comparing all events per all regions
236+
237+
Every column of the :py:attr:`Input.data` is considered a region, and every
238+
row is considered an event.
239+
The data to be plotted can be transformed with a generic function
240+
``data_transf``.
241+
242+
Parameters
243+
----------
244+
data_transf : Callable (pd.DataFrame -> pd.DataFrame), optional
245+
A function that transforms the data to plot before plotting.
246+
It receives a dataframe whose rows represent events and whose columns
247+
represent the regions, respectively.
248+
By default, the data is not transformed.
249+
plot_kwargs
250+
Keyword arguments passed to the ``DataFrame.plot.bar`` method.
251+
252+
Returns
253+
-------
254+
ax : matplotlib.axes.Axes
255+
The plot axis returned by ``DataFrame.plot.bar``.
256+
257+
"""
171258
# Data preparation
172-
agg = self.impact.impact_at_reg(agg_regions)
259+
agg = self.input.impact_to_dataframe(self.impact)
173260
data = (agg + 1) / (self.input.data + 1)
174261
data = data.transform(np.log10).replace(0, np.nan)
175-
data = data.where((agg < 1) & (self.input.data < 1))
262+
data = data.where((agg > 0) | (self.input.data > 0))
176263

177-
# Use nice country names if no agg_regions were given
178-
if agg_regions is None:
179-
data = data.rename(
180-
index=lambda x: u_coord.country_to_iso(x, representation="name")
181-
)
264+
# Transform data
265+
data = data_transf(data)
182266

183267
# Default plot settings
184268
annot = plot_kwargs.pop("annot", True)

climada/util/calibrate/bayesian_optimizer.py

Lines changed: 72 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
from dataclasses import dataclass, InitVar
44
from typing import Mapping, Optional, Any
55
from numbers import Number
6-
from itertools import combinations
6+
from itertools import combinations, repeat
77

88
import pandas as pd
99
from bayes_opt import BayesianOptimization
@@ -164,33 +164,58 @@ def p_space_to_dataframe(self):
164164
def plot_p_space(
165165
self,
166166
p_space_df: Optional[pd.DataFrame] = None,
167+
x: Optional[str] = None,
168+
y: Optional[str] = None,
167169
min_def: Optional[str] = "Cost Function",
168170
min_fmt: str = "x",
169171
min_color: str = "r",
170172
**plot_kwargs
171173
):
172-
"""Plot the parameter space"""
174+
"""Plot the parameter space as scatter plot(s)
175+
176+
Produce a scatter plot where each point represents a parameter combination
177+
sampled by the optimizer. The coloring represents the cost function value.
178+
If there are more than two parameters in the input data frame, this method will
179+
produce one plot for each combination of two parameters.
180+
Explicit parameter names to plot can be given via the ``x`` and ``y`` arguments.
181+
If no data frame is provided as argument, the output of
182+
:py:meth:`p_space_to_dataframe` is used.
183+
184+
Parameters
185+
----------
186+
p_space_df : pd.DataFrame, optional
187+
The parameter space to plot. Defaults to the one returned by
188+
:py:meth:`p_space_to_dataframe`
189+
x : str, optional
190+
The parameter to plot on the x-axis. If ``y`` is *not* given, this will plot
191+
``x`` against all other parameters.
192+
y : str, optional
193+
The parameter to plot on the y-axis. If ``x`` is *not* given, this will plot
194+
``y`` against all other parameters.
195+
min_def : str, optional
196+
The name of the column in ``p_space_df`` defining which parameter set
197+
represents the minimum, which is plotted separately. Defaults to
198+
``"Cost Function"``. Set to ``None`` to avoid plotting the minimum.
199+
min_fmt : str, optional
200+
Plot format string for plotting the minimum. Defaults to ``"x"``.
201+
min_color : str, optional
202+
Color for plotting the minimum. Defaults to ``"r"`` (red).
203+
"""
173204
if p_space_df is None:
174205
p_space_df = self.p_space_to_dataframe()
175-
206+
176207
# Plot defaults
177208
cmap = plot_kwargs.pop("cmap", "viridis_r")
178209
s = plot_kwargs.pop("s", 40)
179210
c = plot_kwargs.pop("c", "Cost Function")
180211

181-
# Ignore cost dimension
182-
params = p_space_df.columns.tolist()
183-
try:
184-
params.remove(c)
185-
except ValueError:
186-
pass
187-
188-
# Iterate over parameter combinations
189-
for p_first, p_second in combinations(params, 2):
212+
def plot_single(x, y):
213+
"""Plot a single combination of parameters"""
214+
# Plot scatter
190215
ax = p_space_df.plot(
191216
kind="scatter",
192-
x=p_first,
193-
y=p_second,
217+
x=x,
218+
y=y,
194219
c=c,
195220
s=s,
196221
cmap=cmap,
@@ -199,5 +224,36 @@ def plot_p_space(
199224

200225
# Plot the minimum
201226
if min_def is not None:
202-
best = p_space_df.iloc[p_space_df.idxmin()[min_def]]
203-
ax.plot(best[p_first], best[p_second], min_fmt, color=min_color)
227+
best = p_space_df.loc[p_space_df.idxmin()[min_def]]
228+
ax.plot(best[x], best[y], min_fmt, color=min_color)
229+
230+
return ax
231+
232+
# Ignore cost dimension
233+
params = p_space_df.columns.tolist()
234+
try:
235+
params.remove(c)
236+
except ValueError:
237+
pass
238+
239+
# Option 0: Only one parameter
240+
if len(params) < 2:
241+
return plot_single(x=params[0], y=repeat(0))
242+
243+
# Option 1: Only a single plot
244+
if x is not None and y is not None:
245+
return plot_single(x, y)
246+
247+
# Option 2: Combination of all
248+
iterable = combinations(params, 2)
249+
# Option 3: Fix one and iterate over all others
250+
if x is not None:
251+
params.remove(x)
252+
iterable = zip(repeat(x), params)
253+
elif y is not None:
254+
params.remove(y)
255+
iterable = zip(params, repeat(y))
256+
257+
# Iterate over parameter combinations
258+
for p_first, p_second in iterable:
259+
plot_single(p_first, p_second)

doc/tutorial/climada_util_calibrate.ipynb

Lines changed: 813 additions & 792 deletions
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)