Skip to content

Commit 1538e78

Browse files
committed
Move impact transform and align to Input
1 parent 24c1fc3 commit 1538e78

File tree

5 files changed

+187
-145
lines changed

5 files changed

+187
-145
lines changed

climada/test/test_util_calibrate.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212

1313
from climada.util.calibrate import Input, ScipyMinimizeOptimizer, BayesianOptimizer
1414

15-
from climada.util.calibrate.test.test_calibrate import hazard, exposure
15+
from climada.util.calibrate.test.test_base import hazard, exposure
1616

1717

1818
class TestScipyMinimizeOptimizer(unittest.TestCase):
@@ -48,7 +48,6 @@ def setUp(self) -> None:
4848
self.impact_func_creator,
4949
self.impact_to_dataframe,
5050
mean_squared_error,
51-
# lambda x,y: mean_squared_error(x, y, squared=True),
5251
)
5352

5453
def test_single(self):

climada/util/calibrate/base.py

Lines changed: 135 additions & 108 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,59 @@ def __post_init__(self, assign_centroids):
9090
if assign_centroids:
9191
self.exposure.assign_centroids(self.hazard)
9292

93+
def impact_to_aligned_df(
94+
self, impact: Impact, fillna: float = np.nan
95+
) -> Tuple[pd.DataFrame, pd.DataFrame]:
96+
"""Create a dataframe from an impact and align it with the data.
97+
98+
When aligning, two general cases might occur, which are not mutually exclusive:
99+
100+
1. There are data points for which no impact was computed. This will always be
101+
treated as an impact of zero.
102+
2. There are impacts for which no data points exist. For these points, the input
103+
data will be filled with the value of :py:attr:`Input.missing_data_value`.
104+
105+
This method performs the following steps:
106+
107+
* Transform the impact into a dataframe using :py:attr:`impact_to_dataframe`.
108+
* Align the :py:attr:`data` with the impact dataframe, using
109+
:py:attr:`missing_data_value` as fill value.
110+
* Align the impact dataframe with the data, using zeros as fill value.
111+
* In the aligned impact, set all values to zero where the data is NaN.
112+
* Fill remaining NaNs in data with ``fillna``.
113+
114+
Parameters
115+
----------
116+
impact_df : pandas.DataFrame
117+
The impact computed by the model, transformed into a dataframe by
118+
:py:attr:`Input.impact_to_dataframe`.
119+
120+
Returns
121+
-------
122+
data_aligned : pd.DataFrame
123+
The data aligned to the impact dataframe
124+
impact_df_aligned : pd.DataFrame
125+
The impact transformed to a dataframe and aligned with the data
126+
"""
127+
# Transform impact to to dataframe
128+
impact_df = self.impact_to_dataframe(impact)
129+
if impact_df.isna().any(axis=None):
130+
raise ValueError("NaN values computed in impact!")
131+
132+
# Align with different fill values
133+
data_aligned, _ = self.data.align(
134+
impact_df, axis=None, fill_value=self.missing_data_value, copy=True
135+
)
136+
impact_df_aligned, _ = impact_df.align(
137+
data_aligned, join="right", axis=None, fill_value=0.0, copy=False
138+
)
139+
140+
# Set all impacts to zero for which data is NaN
141+
impact_df_aligned.where(data_aligned.notna(), 0.0, inplace=True)
142+
143+
# NOTE: impact_df_aligned should not contain any NaNs at this point
144+
return data_aligned.fillna(fillna), impact_df_aligned.fillna(fillna)
145+
93146

94147
@dataclass
95148
class Output:
@@ -163,7 +216,6 @@ def plot_impf_variability(
163216
plot_impf_kws: Optional[dict] = None,
164217
plot_hist_kws: Optional[dict] = None,
165218
):
166-
167219
"""Plot impact function variability with parameter combinations of
168220
almost equal cost function values
169221
@@ -190,7 +242,7 @@ def plot_impf_variability(
190242
if p_space_df is None:
191243
# Assert that self.output has the p_space_to_dataframe() method,
192244
# which is defined for the BayesianOptimizerOutput class
193-
if not hasattr(self.output,"p_space_to_dataframe"):
245+
if not hasattr(self.output, "p_space_to_dataframe"):
194246
raise TypeError(
195247
"To derive the full impact function parameter space, "
196248
"plot_impf_variability() requires BayesianOptimizerOutput "
@@ -203,74 +255,93 @@ def plot_impf_variability(
203255
# and remove the dimension 'Cost Function'.
204256
params = p_space_df.columns.tolist()
205257
try:
206-
params.remove('Cost Function')
258+
params.remove("Cost Function")
207259
except ValueError:
208260
pass
209261

210262
# Retrieve parameters of impact functions with cost function values
211263
# within 'cost_func_diff' % of the best estimate
212264
params_within_range = p_space_df[params]
213-
plot_space_label = 'Parameter space'
265+
plot_space_label = "Parameter space"
214266
if cost_func_diff is not None:
215-
max_cost_func_val = (p_space_df['Cost Function'].min()*
216-
(1+cost_func_diff))
267+
max_cost_func_val = p_space_df["Cost Function"].min() * (1 + cost_func_diff)
217268
params_within_range = p_space_df.loc[
218-
p_space_df['Cost Function'] <=max_cost_func_val,params
269+
p_space_df["Cost Function"] <= max_cost_func_val, params
219270
]
220-
plot_space_label = (f"within {int(cost_func_diff*100)} percent "
221-
f"of best fit")
271+
plot_space_label = (
272+
f"within {int(cost_func_diff*100)} percent " f"of best fit"
273+
)
222274

223275
# Set plot defaults
224-
color = plot_impf_kws.pop('color','tab:blue')
225-
lw = plot_impf_kws.pop('lw',2)
226-
zorder = plot_impf_kws.pop('zorder',3)
227-
label = plot_impf_kws.pop('label','best fit')
276+
color = plot_impf_kws.pop("color", "tab:blue")
277+
lw = plot_impf_kws.pop("lw", 2)
278+
zorder = plot_impf_kws.pop("zorder", 3)
279+
label = plot_impf_kws.pop("label", "best fit")
228280

229-
#get number of impact functions and create a plot for each
281+
# get number of impact functions and create a plot for each
230282
n_impf = len(self.impf_set.get_func(haz_type=haz_type))
231-
axes=[]
283+
axes = []
232284

233285
for impf_idx in range(n_impf):
286+
_, ax = plt.subplots()
234287

235-
_,ax = plt.subplots()
236-
237-
#Plot best-fit impact function
288+
# Plot best-fit impact function
238289
best_impf = self.impf_set.get_func(haz_type=haz_type)[impf_idx]
239-
ax.plot(best_impf.intensity,best_impf.mdd*best_impf.paa*100,
240-
color=color,lw=lw,zorder=zorder,label=label,**plot_impf_kws)
241-
242-
#Plot all impact functions within 'cost_func_diff' % of best estimate
290+
ax.plot(
291+
best_impf.intensity,
292+
best_impf.mdd * best_impf.paa * 100,
293+
color=color,
294+
lw=lw,
295+
zorder=zorder,
296+
label=label,
297+
**plot_impf_kws,
298+
)
299+
300+
# Plot all impact functions within 'cost_func_diff' % of best estimate
243301
for row in range(params_within_range.shape[0]):
244302
label_temp = plot_space_label if row == 0 else None
245303

246-
sel_params = params_within_range.iloc[row,:].to_dict()
304+
sel_params = params_within_range.iloc[row, :].to_dict()
247305
temp_impf_set = self.input.impact_func_creator(**sel_params)
248306
temp_impf = temp_impf_set.get_func(haz_type=haz_type)[impf_idx]
249307

250-
ax.plot(temp_impf.intensity,temp_impf.mdd*temp_impf.paa*100,
251-
color='grey',alpha=0.4,label=label_temp)
308+
ax.plot(
309+
temp_impf.intensity,
310+
temp_impf.mdd * temp_impf.paa * 100,
311+
color="grey",
312+
alpha=0.4,
313+
label=label_temp,
314+
)
252315

253316
# Plot hazard intensity value distributions
254317
if plot_haz:
255318
haz_vals = self.input.hazard.intensity[
256319
:, self.input.exposure.gdf[f"centr_{haz_type}"]
257320
]
258321

259-
#Plot defaults
260-
color_hist = plot_hist_kws.pop('color','tab:orange')
261-
alpha_hist = plot_hist_kws.pop('alpha',0.3)
322+
# Plot defaults
323+
color_hist = plot_hist_kws.pop("color", "tab:orange")
324+
alpha_hist = plot_hist_kws.pop("alpha", 0.3)
262325

263326
ax2 = ax.twinx()
264-
ax2.hist(haz_vals.data,bins=40,color=color_hist,
265-
alpha=alpha_hist,label='Hazard intensity\noccurence')
266-
ax2.set(ylabel='Hazard intensity occurence (#Exposure points)')
267-
ax.axvline(x=haz_vals.max(),label='Maximum hazard value',
268-
color='tab:orange')
269-
ax2.legend(loc='lower right')
270-
271-
ax.set(xlabel=f"Intensity ({self.input.hazard.units})",
327+
ax2.hist(
328+
haz_vals.data,
329+
bins=40,
330+
color=color_hist,
331+
alpha=alpha_hist,
332+
label="Hazard intensity\noccurence",
333+
)
334+
ax2.set(ylabel="Hazard intensity occurence (#Exposure points)")
335+
ax.axvline(
336+
x=haz_vals.max(), label="Maximum hazard value", color="tab:orange"
337+
)
338+
ax2.legend(loc="lower right")
339+
340+
ax.set(
341+
xlabel=f"Intensity ({self.input.hazard.units})",
272342
ylabel="Mean Damage Ratio (MDR) in %",
273-
xlim=(min(best_impf.intensity),max(best_impf.intensity)))
343+
xlim=(min(best_impf.intensity), max(best_impf.intensity)),
344+
)
274345
ax.legend()
275346
axes.append(ax)
276347

@@ -279,13 +350,12 @@ def plot_impf_variability(
279350

280351
return ax
281352

282-
283353
def plot_at_event(
284354
self,
285355
data_transf: Callable[[pd.DataFrame], pd.DataFrame] = lambda x: x,
286356
**plot_kwargs,
287357
):
288-
"""Create a bar plot comparing estimated model output and data per event
358+
"""Create a bar plot comparing estimated model output and data per event.
289359
290360
Every row of the :py:attr:`Input.data` is considered an event.
291361
The data to be plotted can be transformed with a generic function
@@ -305,21 +375,23 @@ def plot_at_event(
305375
-------
306376
ax : matplotlib.axes.Axes
307377
The plot axis returned by ``DataFrame.plot.bar``
378+
379+
Note
380+
----
381+
This plot does *not* include the ignored impact, see :py:attr:`Input.data`.
308382
"""
309-
data = pd.concat(
310-
[
311-
self.input.impact_to_dataframe(self.impact).sum(axis="columns"),
312-
self.input.data.sum(axis="columns"),
313-
],
383+
data, impact = self.input.impact_to_aligned_df(self.impact)
384+
values = pd.concat(
385+
[impact.sum(axis="columns"), data.sum(axis="columns")],
314386
axis=1,
315387
).rename(columns={0: "Model", 1: "Data"})
316388

317389
# Transform data before plotting
318-
data = data_transf(data)
390+
values = data_transf(values)
319391

320392
# Now plot
321393
ylabel = plot_kwargs.pop("ylabel", self._impact_label)
322-
return data.plot.bar(ylabel=ylabel, **plot_kwargs)
394+
return values.plot.bar(ylabel=ylabel, **plot_kwargs)
323395

324396
def plot_at_region(
325397
self,
@@ -346,21 +418,23 @@ def plot_at_region(
346418
-------
347419
ax : matplotlib.axes.Axes
348420
The plot axis returned by ``DataFrame.plot.bar``.
421+
422+
Note
423+
----
424+
This plot does *not* include the ignored impact, see :py:attr:`Input.data`.
349425
"""
350-
data = pd.concat(
351-
[
352-
self.input.impact_to_dataframe(self.impact).sum(axis="index"),
353-
self.input.data.sum(axis="index"),
354-
],
426+
data, impact = self.input.impact_to_aligned_df(self.impact)
427+
values = pd.concat(
428+
[impact.sum(axis="index"), data.sum(axis="index")],
355429
axis=1,
356430
).rename(columns={0: "Model", 1: "Data"})
357431

358432
# Transform data before plotting
359-
data = data_transf(data)
433+
values = data_transf(values)
360434

361435
# Now plot
362436
ylabel = plot_kwargs.pop("ylabel", self._impact_label)
363-
return data.plot.bar(ylabel=ylabel, **plot_kwargs)
437+
return values.plot.bar(ylabel=ylabel, **plot_kwargs)
364438

365439
def plot_event_region_heatmap(
366440
self,
@@ -391,13 +465,12 @@ def plot_event_region_heatmap(
391465
392466
"""
393467
# Data preparation
394-
agg = self.input.impact_to_dataframe(self.impact)
395-
data = (agg + 1) / (self.input.data + 1)
396-
data = data.transform(np.log10)
397-
data = data.where((agg > 0) | (self.input.data > 0))
468+
data, impact = self.input.impact_to_aligned_df(self.impact)
469+
values = (impact + 1) / (data + 1) # Avoid division by zero
470+
values = values.transform(np.log10)
398471

399472
# Transform data
400-
data = data_transf(data)
473+
values = data_transf(values)
401474

402475
# Default plot settings
403476
annot = plot_kwargs.pop("annot", True)
@@ -411,7 +484,7 @@ def plot_event_region_heatmap(
411484
)
412485

413486
return sns.heatmap(
414-
data,
487+
values,
415488
annot=annot,
416489
vmin=vmin,
417490
vmax=vmax,
@@ -482,53 +555,6 @@ def _kwargs_to_impact_func_creator(self, *_, **kwargs) -> Dict[str, Any]:
482555
"""
483556
return kwargs
484557

485-
def _align_impact_with_data(
486-
self, impact_df: pd.DataFrame
487-
) -> Tuple[pd.DataFrame, pd.DataFrame]:
488-
"""Align the impact dataframe with the input data dataframe
489-
490-
When aligning, two general cases might occur, which are not mutually exclusive:
491-
492-
1. There are data points for which no impact was computed. This will always be
493-
treated as an impact of zero.
494-
2. There are impacts for which no data points exist. For these points, the input
495-
data will be filled with the value of :py:attr:`Input.missing_data_value`.
496-
497-
Parameters
498-
----------
499-
impact_df : pandas.DataFrame
500-
The impact computed by the model, transformed into a dataframe by
501-
:py:attr:`Input.impact_to_dataframe`.
502-
503-
Returns
504-
-------
505-
data_aligned : pandas.DataFrame
506-
The :py:attr:`Input.data` aligned with the impact.
507-
impact_df_aligned : pandas.DataFrame
508-
The ``impact_df`` aligned with the data.
509-
510-
Raises
511-
------
512-
ValueError
513-
If ``impact_df`` contains NaNs before aligning.
514-
"""
515-
if impact_df.isna().any(axis=None):
516-
raise ValueError("NaN values computed in impact!")
517-
518-
data_aligned, impact_df_aligned = self.input.data.align(
519-
impact_df, axis=None, fill_value=None
520-
)
521-
522-
# Add user-set value for non-aligned data
523-
data_aligned[
524-
impact_df_aligned.notna() & data_aligned.isna()
525-
] = self.input.missing_data_value
526-
527-
# Set all impacts to zero for which data is NaN
528-
impact_df_aligned.where(data_aligned.notna(), inplace=True)
529-
530-
return data_aligned.fillna(0), impact_df_aligned.fillna(0)
531-
532558
def _opt_func(self, *args, **kwargs) -> Number:
533559
"""The optimization function iterated by the optimizer
534560
@@ -557,8 +583,9 @@ def _opt_func(self, *args, **kwargs) -> Number:
557583
).impact(**self.input.impact_calc_kwds)
558584

559585
# Transform to DataFrame, align, and compute target function
560-
impact_df = self.input.impact_to_dataframe(impact)
561-
data_aligned, impact_df_aligned = self._align_impact_with_data(impact_df)
586+
data_aligned, impact_df_aligned = self.input.impact_to_aligned_df(
587+
impact, fillna=0
588+
)
562589
return self._target_func(data_aligned, impact_df_aligned)
563590

564591
@abstractmethod

0 commit comments

Comments
 (0)