Skip to content

Commit 1d9f508

Browse files
committed
Merge branch 'calibrate-impact-functions' into cross-calibrate-impact-functions
2 parents fbdeff0 + 5e2dd75 commit 1d9f508

File tree

3 files changed

+175
-116
lines changed

3 files changed

+175
-116
lines changed

climada/util/calibrate/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@
2222
from .bayesian_optimizer import (
2323
BayesianOptimizer,
2424
BayesianOptimizerController,
25+
BayesianOptimizerOutput,
2526
BayesianOptimizerOutputEvaluator,
27+
select_best
2628
)
2729
from .scipy_optimizer import ScipyMinimizeOptimizer

climada/util/calibrate/bayesian_optimizer.py

Lines changed: 117 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,11 @@
2828

2929
import pandas as pd
3030
import numpy as np
31+
import matplotlib as mpl
3132
import matplotlib.pyplot as plt
3233
import matplotlib.axes as maxes
34+
import matplotlib.patches as mpatches
35+
import matplotlib.ticker as mticker
3336
from bayes_opt import BayesianOptimization, Events, UtilityFunction, ScreenLogger
3437
from bayes_opt.target_space import TargetSpace
3538

@@ -57,8 +60,46 @@ def allowed(self, values):
5760
return self.results
5861

5962

60-
# TODO: Add read/write method
61-
# TODO: Export this class
63+
def select_best(
64+
p_space_df: pd.DataFrame,
65+
cost_limit: float,
66+
absolute: bool = True,
67+
cost_col=("Calibration", "Cost Function"),
68+
) -> pd.DataFrame:
69+
"""Select the best parameter space samples defined by a cost function limit
70+
71+
The limit is a factor of the minimum value relative to itself (``absolute=True``) or
72+
to the range of cost function values (``absolute=False``). A ``cost_limit`` of 0.1
73+
will select all rows where the cost function is within
74+
75+
- 110% of the minimum value if ``absolute=True``.
76+
- 10% of the range between minimum and maximum cost function value if
77+
``absolute=False``.
78+
79+
Parameters
80+
----------
81+
p_space_df : pd.DataFrame
82+
The parameter space to select from.
83+
cost_limit : float
84+
The limit factor used for selection.
85+
absolute : bool, optional
86+
Whether the limit factor is applied to the minimum value (``True``) or the range
87+
of values (``False``). Defaults to ``True``.
88+
cost_col : Column specifier, optional
89+
The column indicating cost function values. Defaults to
90+
``("Calibration", "Cost Function")``.
91+
92+
Returns
93+
-------
94+
pd.DataFrame
95+
A subselection of the input data frame.
96+
"""
97+
min_val = p_space_df[cost_col].min()
98+
cost_range = min_val if absolute else p_space_df[cost_col].max() - min_val
99+
max_val = min_val + cost_range * cost_limit
100+
return p_space_df.loc[p_space_df[cost_col] <= max_val]
101+
102+
62103
@dataclass
63104
class BayesianOptimizerOutput(Output):
64105
"""Output of a calibration with :py:class:`BayesianOptimizer`
@@ -374,7 +415,10 @@ def _previous_max(self):
374415
return self._improvements[-1].target
375416

376417
def optimizer_params(self) -> dict[str, Union[int, float, str, UtilityFunction]]:
377-
"""Return parameters for the optimizer"""
418+
"""Return parameters for the optimizer
419+
420+
In the current implementation, these do not change.
421+
"""
378422
return {
379423
"init_points": self.init_points,
380424
"n_iter": self.n_iter,
@@ -385,7 +429,7 @@ def optimizer_params(self) -> dict[str, Union[int, float, str, UtilityFunction]]
385429
),
386430
}
387431

388-
def _is_random_step(self):
432+
def _is_random_step(self) -> bool:
389433
"""Return true if we sample randomly instead of Bayesian"""
390434
return (self._last_it_end + self.steps) < self.init_points
391435

@@ -491,7 +535,7 @@ def update(self, event: str, instance: BayesianOptimization):
491535
self._last_it_end = self.steps
492536

493537
def improvements(self) -> pd.DataFrame:
494-
"""Return improvements as nice data
538+
"""Return improvements as nicely formatted data
495539
496540
Returns
497541
-------
@@ -663,58 +707,60 @@ def __post_init__(self):
663707

664708
def plot_impf_variability(
665709
self,
666-
cost_func_diff: float = 0.1,
667710
p_space_df: Optional[pd.DataFrame] = None,
668711
plot_haz: bool = True,
712+
plot_opt_kws: Optional[dict] = None,
669713
plot_impf_kws: Optional[dict] = None,
670714
plot_hist_kws: Optional[dict] = None,
715+
plot_axv_kws: Optional[dict] = None,
671716
):
672717
"""Plot impact function variability with parameter combinations of
673718
almost equal cost function values
674719
675720
Args:
676-
cost_func_diff (float, optional): Max deviation from optimal cost
677-
function value (as fraction). Defaults to 0.1 (i.e. 10%).
678-
p_space_df (pd.DataFrame, optional): parameter space. Defaults to None.
721+
p_space_df (pd.DataFrame, optional): Parameter space to plot functions from.
722+
If ``None``, this uses the space returned by
723+
:py:meth:`~BayesianOptimizerOutput.p_space_to_dataframe`. Use
724+
:py:func:`select_best` for a convenient subselection of parameters close
725+
to the optimum.
679726
plot_haz (bool, optional): Whether or not to plot hazard intensity
680727
distibution. Defaults to False.
681-
plot_impf_kws (dict, optional): Keyword arguments for impact
728+
plot_opt_kws (dict, optional): Keyword arguments for optimal impact
682729
function plot. Defaults to None.
730+
plot_impf_kws (dict, optional): Keyword arguments for all impact
731+
function plots. Defaults to None.
683732
plot_hist_kws (dict, optional): Keyword arguments for hazard
684-
intensity distribution plot. Defaults to None.
733+
intensity histogram plot. Defaults to None.
734+
plot_axv_kws (dict, optional): Keyword arguments for hazard intensity range
735+
plot (axvspan).
685736
"""
686737

687738
# Initialize plot keyword arguments
739+
if plot_opt_kws is None:
740+
plot_opt_kws = {}
688741
if plot_impf_kws is None:
689742
plot_impf_kws = {}
690743
if plot_hist_kws is None:
691744
plot_hist_kws = {}
745+
if plot_axv_kws is None:
746+
plot_axv_kws = {}
692747

693748
# Retrieve hazard type and parameter space
694749
haz_type = self.input.hazard.haz_type
695750
if p_space_df is None:
696751
p_space_df = self.output.p_space_to_dataframe()
697752

698-
# Retrieve parameters of impact functions with cost function values
699-
# within 'cost_func_diff' % of the best estimate
700-
params_within_range = p_space_df["Parameters"]
701-
plot_space_label = "Parameter space"
702-
if cost_func_diff is not None:
703-
max_cost_func_val = p_space_df["Calibration", "Cost Function"].min() * (
704-
1 + cost_func_diff
705-
)
706-
params_within_range = params_within_range.loc[
707-
p_space_df["Calibration", "Cost Function"] <= max_cost_func_val
708-
]
709-
plot_space_label = (
710-
f"within {int(cost_func_diff*100)} percent " f"of best fit"
711-
)
712-
713753
# Set plot defaults
714-
color = plot_impf_kws.pop("color", "tab:blue")
715-
lw = plot_impf_kws.pop("lw", 2)
716-
zorder = plot_impf_kws.pop("zorder", 3)
717-
label = plot_impf_kws.pop("label", "best fit")
754+
colors = mpl.colormaps["tab20"].colors
755+
lw = plot_opt_kws.pop("lw", 2)
756+
label_opt = plot_opt_kws.pop("label", "Optimal Function")
757+
color_opt = plot_opt_kws.pop("color", colors[0])
758+
zorder_opt = plot_opt_kws.pop("zorder", 4)
759+
760+
label_impf = plot_impf_kws.pop("label", "All Functions")
761+
color_impf = plot_impf_kws.pop("color", colors[1])
762+
alpha_impf = plot_impf_kws.pop("alpha", 0.5)
763+
zorder_impf = plot_impf_kws.pop("zorder", 3)
718764

719765
# get number of impact functions and create a plot for each
720766
n_impf = len(self.impf_set.get_func(haz_type=haz_type))
@@ -728,63 +774,76 @@ def plot_impf_variability(
728774
ax.plot(
729775
best_impf.intensity,
730776
best_impf.mdd * best_impf.paa * 100,
731-
color=color,
777+
color=color_opt,
732778
lw=lw,
733-
zorder=zorder,
734-
label=label,
735-
**plot_impf_kws,
779+
zorder=zorder_opt,
780+
label=label_opt,
781+
**plot_opt_kws,
736782
)
737783

738784
# Plot all impact functions within 'cost_func_diff' % of best estimate
739-
for row in range(params_within_range.shape[0]):
740-
label_temp = plot_space_label if row == 0 else None
785+
for idx, (_, row) in enumerate(p_space_df.iterrows()):
786+
label_temp = label_impf if idx == 0 else None
741787

742-
sel_params = params_within_range.iloc[row, :].to_dict()
743-
temp_impf_set = self.input.impact_func_creator(**sel_params)
788+
temp_impf_set = self.input.impact_func_creator(**row["Parameters"])
744789
temp_impf = temp_impf_set.get_func(haz_type=haz_type)[impf_idx]
745790

746791
ax.plot(
747792
temp_impf.intensity,
748793
temp_impf.mdd * temp_impf.paa * 100,
749-
color="grey",
750-
alpha=0.4,
794+
color=color_impf,
795+
alpha=alpha_impf,
796+
zorder=zorder_impf,
751797
label=label_temp,
752798
)
753799

800+
handles, _ = ax.get_legend_handles_labels()
801+
ax.set(
802+
xlabel=f"Intensity ({self.input.hazard.units})",
803+
ylabel="Mean Damage Ratio (MDR)",
804+
xlim=(min(best_impf.intensity), max(best_impf.intensity)),
805+
)
806+
ax.yaxis.set_major_formatter(mticker.PercentFormatter(xmax=100))
807+
754808
# Plot hazard intensity value distributions
755809
if plot_haz:
756810
haz_vals = self.input.hazard.intensity[
757811
:, self.input.exposure.gdf[f"centr_{haz_type}"]
758-
]
812+
].data
759813

760814
# Plot defaults
761-
color_hist = plot_hist_kws.pop("color", "tab:orange")
762-
alpha_hist = plot_hist_kws.pop("alpha", 0.3)
763815
bins = plot_hist_kws.pop("bins", 40)
764-
label = plot_hist_kws.pop("label", "Hazard intensity\noccurence")
816+
label_hist = plot_hist_kws.pop("label", "Hazard Intensity")
817+
color_hist = plot_hist_kws.pop("color", colors[2])
818+
color_axv = plot_axv_kws.pop("color", colors[3])
819+
alpha_axv = plot_axv_kws.pop("alpha", 0.5)
765820

766821
# Histogram plot
767822
ax2 = ax.twinx()
823+
ax.set_facecolor("none")
824+
ax.set_zorder(2)
825+
ax2.set_zorder(1)
826+
ax2.axvspan(
827+
haz_vals.min(), haz_vals.max(), color=color_axv, alpha=alpha_axv
828+
)
768829
ax2.hist(
769-
haz_vals.data,
830+
haz_vals,
770831
bins=bins,
771832
color=color_hist,
772-
alpha=alpha_hist,
773-
label=label,
833+
label=label_hist,
774834
**plot_hist_kws,
775835
)
776-
ax2.set(ylabel="Hazard intensity occurence (#Exposure points)")
777-
ax.axvline(
778-
x=haz_vals.max(), label="Maximum hazard value", color="tab:orange"
779-
)
780-
ax2.legend(loc="lower right")
836+
ax2.set_ylabel("Exposure Points", color=color_hist)
781837

782-
ax.set(
783-
xlabel=f"Intensity ({self.input.hazard.units})",
784-
ylabel="Mean Damage Ratio (MDR) in %",
785-
xlim=(min(best_impf.intensity), max(best_impf.intensity)),
786-
)
787-
ax.legend()
838+
handles = handles + [
839+
mpatches.Patch(color=color_hist, label=label_hist),
840+
mpatches.Patch(color=color_axv, label=f"{label_hist} Range"),
841+
]
842+
ax.yaxis.label.set_color(color_opt)
843+
ax.tick_params(axis="y", colors=color_opt)
844+
ax2.tick_params(axis="y", colors=color_hist)
845+
846+
ax.legend(handles=handles)
788847
axes.append(ax)
789848

790849
if n_impf > 1:

0 commit comments

Comments
 (0)