Skip to content

Commit 832da6a

Browse files
committed
Use MultiIndex in parameter space dataframe
1 parent 1538e78 commit 832da6a

File tree

3 files changed

+60
-32
lines changed

3 files changed

+60
-32
lines changed

climada/test/test_util_calibrate.py

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,7 @@ def test_multiple_constrained(self):
170170

171171
# Constraint: param[0] < param[1] (intensity_1 < intensity_2)
172172
self.input.constraints = NonlinearConstraint(
173-
lambda params: params[0] - params[1], -np.inf, 0.0
173+
lambda intensity_1, intensity_2: intensity_1 - intensity_2, -np.inf, 0.0
174174
)
175175
self.input.bounds = {"intensity_1": (-1, 4), "intensity_2": (-1, 4)}
176176
# Run optimizer
@@ -187,6 +187,17 @@ def test_multiple_constrained(self):
187187
p_space = output.p_space_to_dataframe()
188188
self.assertSetEqual(
189189
set(p_space.columns.to_list()),
190-
{"intensity_1", "intensity_2", "Cost Function"},
190+
{
191+
("Parameters", "intensity_1"),
192+
("Parameters", "intensity_2"),
193+
("Calibration", "Cost Function"),
194+
("Calibration", "Constraints Function"),
195+
("Calibration", "Allowed"),
196+
},
197+
)
198+
self.assertTupleEqual(p_space.shape, (300, 5))
199+
p_allowed = p_space.loc[p_space["Calibration", "Allowed"], "Parameters"]
200+
npt.assert_array_equal(
201+
(p_allowed["intensity_1"] < p_allowed["intensity_2"]).to_numpy(),
202+
np.full_like(p_allowed["intensity_1"].to_numpy(), True),
191203
)
192-
self.assertTupleEqual(p_space.shape, (300, 3))

climada/util/calibrate/base.py

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,15 @@ class Input:
8787

8888
def __post_init__(self, assign_centroids):
8989
"""Prepare input data"""
90+
if not isinstance(self.data, pd.DataFrame):
91+
if isinstance(self.data, pd.Series):
92+
raise ValueError(
93+
"You passed a pandas Series as 'data'. Please transform it into a "
94+
"dataframe with Series.to_frame() and make sure that columns "
95+
"correctly indicate locations and indexes events."
96+
)
97+
raise ValueError("'data' must be a pandas.DataFrame")
98+
9099
if assign_centroids:
91100
self.exposure.assign_centroids(self.hazard)
92101

@@ -251,22 +260,16 @@ def plot_impf_variability(
251260
)
252261
p_space_df = self.output.p_space_to_dataframe()
253262

254-
# Retrieve list of parameters required for creating impact functions
255-
# and remove the dimension 'Cost Function'.
256-
params = p_space_df.columns.tolist()
257-
try:
258-
params.remove("Cost Function")
259-
except ValueError:
260-
pass
261-
262263
# Retrieve parameters of impact functions with cost function values
263264
# within 'cost_func_diff' % of the best estimate
264-
params_within_range = p_space_df[params]
265+
params_within_range = p_space_df["Parameters"]
265266
plot_space_label = "Parameter space"
266267
if cost_func_diff is not None:
267-
max_cost_func_val = p_space_df["Cost Function"].min() * (1 + cost_func_diff)
268-
params_within_range = p_space_df.loc[
269-
p_space_df["Cost Function"] <= max_cost_func_val, params
268+
max_cost_func_val = p_space_df["Calibration", "Cost Function"].min() * (
269+
1 + cost_func_diff
270+
)
271+
params_within_range = params_within_range.loc[
272+
p_space_df["Calibration", "Cost Function"] <= max_cost_func_val
270273
]
271274
plot_space_label = (
272275
f"within {int(cost_func_diff*100)} percent " f"of best fit"

climada/util/calibrate/bayesian_optimizer.py

Lines changed: 31 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
"""Calibration with Bayesian Optimization"""
22

33
from dataclasses import dataclass, InitVar
4-
from typing import Mapping, Optional, Any, Union, List
4+
from typing import Mapping, Optional, Any, Union, List, Tuple
55
from numbers import Number
66
from itertools import combinations, repeat
77

@@ -80,6 +80,7 @@ def __post_init__(
8080
self.optimizer = BayesianOptimization(
8181
f=self._opt_func,
8282
pbounds=self.input.bounds,
83+
constraint=self.input.constraints,
8384
verbose=verbose,
8485
random_state=random_state,
8586
allow_duplicate_points=allow_duplicate_points,
@@ -153,13 +154,26 @@ def p_space_to_dataframe(self):
153154
function value (``Cost Function``) and whose rows are the optimizer
154155
iterations.
155156
"""
156-
# TODO: Handle constraints!!!
157-
data = {
158-
self.p_space.keys[i]: self.p_space.params[..., i]
159-
for i in range(self.p_space.dim)
160-
}
161-
data["Cost Function"] = -self.p_space.target
162-
data = pd.DataFrame.from_dict(data)
157+
# Build MultiIndex for columns
158+
index = pd.MultiIndex.from_tuples(
159+
[("Parameters", p) for p in self.p_space.keys]
160+
+ [("Calibration", "Cost Function")]
161+
)
162+
163+
# Create DataFrame and fill
164+
data = pd.DataFrame(data=None, columns=index)
165+
for i in range(self.p_space.dim):
166+
data["Parameters", self.p_space.keys[i]] = self.p_space.params[..., i]
167+
data["Calibration", "Cost Function"] = -self.p_space.target
168+
169+
# Constraints
170+
if self.p_space.constraint is not None:
171+
data["Calibration", "Constraints Function"] = self.p_space.constraint_values
172+
data["Calibration", "Allowed"] = self.p_space.constraint.allowed(
173+
self.p_space.constraint_values
174+
)
175+
176+
# Rename index and return
163177
data.index.rename("Iteration", inplace=True)
164178
return data
165179

@@ -168,7 +182,7 @@ def plot_p_space(
168182
p_space_df: Optional[pd.DataFrame] = None,
169183
x: Optional[str] = None,
170184
y: Optional[str] = None,
171-
min_def: Optional[str] = "Cost Function",
185+
min_def: Optional[Union[str, Tuple[str, str]]] = "Cost Function",
172186
min_fmt: str = "x",
173187
min_color: str = "r",
174188
**plot_kwargs
@@ -206,13 +220,19 @@ def plot_p_space(
206220
if p_space_df is None:
207221
p_space_df = self.p_space_to_dataframe()
208222

223+
if min_def is not None and not isinstance(min_def, tuple):
224+
min_def = ("Calibration", min_def)
225+
209226
# Plot defaults
210227
cmap = plot_kwargs.pop("cmap", "viridis_r")
211228
s = plot_kwargs.pop("s", 40)
212-
c = plot_kwargs.pop("c", "Cost Function")
229+
c = ("Calibration", plot_kwargs.pop("c", "Cost Function"))
213230

214231
def plot_single(x, y):
215232
"""Plot a single combination of parameters"""
233+
x = ("Parameters", x)
234+
y = ("Parameters", y)
235+
216236
# Plot scatter
217237
ax = p_space_df.plot(
218238
kind="scatter",
@@ -231,14 +251,8 @@ def plot_single(x, y):
231251

232252
return ax
233253

234-
# Ignore cost dimension
235-
params = p_space_df.columns.tolist()
236-
try:
237-
params.remove(c)
238-
except ValueError:
239-
pass
240-
241254
# Option 0: Only one parameter
255+
params = p_space_df.columns.to_list()
242256
if len(params) < 2:
243257
return plot_single(x=params[0], y=repeat(0))
244258

0 commit comments

Comments
 (0)