Skip to content

Commit 2326db2

Browse files
committed
Add option to store and load calibration results
1 parent 10e9679 commit 2326db2

File tree

4 files changed

+152
-9
lines changed

4 files changed

+152
-9
lines changed

climada/util/calibrate/base.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,13 @@
2222
from dataclasses import dataclass, field, InitVar
2323
from typing import Callable, Mapping, Optional, Tuple, Union, Any, Dict
2424
from numbers import Number
25+
from pathlib import Path
2526

2627
import pandas as pd
2728
import numpy as np
2829
from scipy.optimize import Bounds, LinearConstraint, NonlinearConstraint
2930
import seaborn as sns
31+
import h5py
3032

3133
from climada.hazard import Hazard
3234
from climada.entity import Exposures, ImpactFuncSet
@@ -185,6 +187,37 @@ class Output:
185187
params: Mapping[str, Number]
186188
target: Number
187189

190+
def to_hdf5(self, filepath: Union[Path, str], mode:str = "x"):
191+
"""Write the output into an H5 file
192+
193+
This stores the data as attributes because we only store single numbers, not
194+
arrays
195+
196+
Parameters
197+
----------
198+
filepath : Path or str
199+
The filepath to store the data.
200+
mode : str (optional)
201+
The mode for opening the file. Defaults to ``x`` (Create file, fail if
202+
exists).
203+
"""
204+
with h5py.File(filepath, mode=mode) as file:
205+
# Store target
206+
grp = file.create_group("base")
207+
grp.attrs["target"] = self.target
208+
209+
# Store params
210+
grp_params = grp.create_group("params")
211+
for p_name, p_val in self.params.items():
212+
grp_params.attrs[p_name] = p_val
213+
214+
@classmethod
215+
def from_hdf5(cls, filepath: Union[Path, str]):
216+
"""Create an output object from an H5 file"""
217+
with h5py.File(filepath) as file:
218+
target = file["base"].attrs["target"]
219+
params = dict(file["base"]["params"].attrs.items())
220+
return cls(params=params, target=target)
188221

189222
@dataclass
190223
class OutputEvaluator:

climada/util/calibrate/bayesian_optimizer.py

Lines changed: 80 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
from itertools import combinations, repeat
2525
from collections import deque, namedtuple
2626
import logging
27+
from pathlib import Path
2728

2829
import pandas as pd
2930
import numpy as np
@@ -38,6 +39,26 @@
3839
LOGGER = logging.getLogger(__name__)
3940

4041

42+
@dataclass
43+
class _FakeConstraint:
44+
"""Fake the behavior of the constrait for cycling the BayesianOutputOptimizer"""
45+
46+
results: np.ndarray
47+
48+
@property
49+
def lb(self):
50+
"""Return the lower bound"""
51+
return np.array([0])
52+
53+
def allowed(self, values):
54+
"""Return if the values are allowed. This only mocks the true behavior"""
55+
if self.results.shape != values.shape:
56+
raise ValueError("Inserting wrong constraint values")
57+
return self.results
58+
59+
60+
# TODO: Add read/write method
61+
# TODO: Export this class
4162
@dataclass
4263
class BayesianOptimizerOutput(Output):
4364
"""Output of a calibration with :py:class:`BayesianOptimizer`
@@ -83,6 +104,59 @@ def p_space_to_dataframe(self):
83104
data.index.rename("Iteration", inplace=True)
84105
return data
85106

107+
def to_hdf5(self, filepath: Union[Path, str], mode: str = "x"):
108+
"""Write this output to an H5 file"""
109+
# Write base class information
110+
super().to_hdf5(filepath=filepath, mode=mode)
111+
112+
# Write parameter space
113+
p_space_df = self.p_space_to_dataframe()
114+
p_space_df.to_hdf(filepath, mode="a", key="p_space")
115+
116+
@classmethod
117+
def from_hdf5(cls, filepath: Union[Path, str]):
118+
"""Read BayesianOptimizerOutput from an H5 file
119+
120+
Warning
121+
-------
122+
This results in an object with broken :py:attr:`p_space` object. Do not further
123+
modify this parameter space. This function is only intended to load the
124+
parameter space again for analysis/plotting.
125+
"""
126+
output = Output.from_hdf5(filepath)
127+
p_space_df = pd.read_hdf(filepath, mode="r", key="p_space")
128+
p_space_df["Calibration", "Target"] = -p_space_df[
129+
"Calibration", "Cost Function"
130+
]
131+
132+
# Reorganize data
133+
bounds = {param: (np.nan, np.nan) for param in p_space_df["Parameters"].columns}
134+
constraint = None
135+
if "Constraints Function" in p_space_df["Calibration"].columns:
136+
constraint = _FakeConstraint(
137+
p_space_df["Calibration", "Allowed"].to_numpy()
138+
)
139+
140+
p_space = TargetSpace(
141+
target_func=lambda x: x,
142+
pbounds=bounds,
143+
constraint=constraint,
144+
allow_duplicate_points=True,
145+
)
146+
for _, row in p_space_df.iterrows():
147+
constraint_value = (
148+
None
149+
if constraint is None
150+
else row["Calibration", "Constraints Function"]
151+
)
152+
p_space.register(
153+
params=row["Parameters"].to_numpy(),
154+
target=row["Calibration", "Target"],
155+
constraint_value=constraint_value,
156+
)
157+
158+
return cls(params=output.params, target=output.target, p_space=p_space)
159+
86160
def plot_p_space(
87161
self,
88162
p_space_df: Optional[pd.DataFrame] = None,
@@ -685,14 +759,18 @@ def plot_impf_variability(
685759
# Plot defaults
686760
color_hist = plot_hist_kws.pop("color", "tab:orange")
687761
alpha_hist = plot_hist_kws.pop("alpha", 0.3)
762+
bins = plot_hist_kws.pop("bins", 40)
763+
label = plot_hist_kws.pop("label", "Hazard intensity\noccurence")
688764

765+
# Histogram plot
689766
ax2 = ax.twinx()
690767
ax2.hist(
691768
haz_vals.data,
692-
bins=40,
769+
bins=bins,
693770
color=color_hist,
694771
alpha=alpha_hist,
695-
label="Hazard intensity\noccurence",
772+
label=label,
773+
**plot_hist_kws,
696774
)
697775
ax2.set(ylabel="Hazard intensity occurence (#Exposure points)")
698776
ax.axvline(

climada/util/calibrate/test/test_base.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@
2020

2121
import unittest
2222
from unittest.mock import patch, create_autospec, MagicMock
23+
from tempfile import TemporaryDirectory
24+
from pathlib import Path
2325

2426
import numpy as np
2527
import numpy.testing as npt
@@ -206,6 +208,20 @@ def setUp(self):
206208
self.optimizer = ConcreteOptimizer(self.input)
207209

208210

211+
class TestOuput(unittest.TestCase):
212+
"""Test the optimizer output"""
213+
214+
def test_cycle(self):
215+
"""Test if cycling an output object works"""
216+
output = Output(params={"p1": 1.0, "p_2": 10}, target=2.0)
217+
with TemporaryDirectory() as tmpdir:
218+
outfile = Path(tmpdir, "out.h5")
219+
output.to_hdf5(outfile)
220+
self.assertTrue(outfile.is_file())
221+
output_2 = Output.from_hdf5(outfile)
222+
self.assertEqual(output.target, output_2.target)
223+
self.assertDictEqual(output.params, output_2.params)
224+
209225
class TestOutputEvaluator(unittest.TestCase):
210226
"""Test the output evaluator"""
211227

climada/util/calibrate/test/test_bayesian_optimizer.py

Lines changed: 23 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -20,13 +20,14 @@
2020

2121
import unittest
2222
from unittest.mock import patch, MagicMock
23+
from tempfile import TemporaryDirectory
24+
from pathlib import Path
2325

2426
import numpy as np
2527
import numpy.testing as npt
2628
import pandas as pd
2729
from bayes_opt import BayesianOptimization, Events
2830
from scipy.optimize import NonlinearConstraint
29-
import matplotlib.pyplot as plt
3031
from matplotlib.axes import Axes
3132

3233
from climada.util.calibrate import Input, BayesianOptimizer, BayesianOptimizerController
@@ -185,8 +186,8 @@ def test_improvements(self):
185186
class TestBayesianOptimizerOutput(unittest.TestCase):
186187
"""Tests for the output class of BayesianOptimizer"""
187188

188-
def test_p_space_to_dataframe(self):
189-
""""""
189+
def setUp(self):
190+
"""Create a default output"""
190191
bayes_opt = BayesianOptimization(
191192
f=lambda x: -(x**2),
192193
pbounds={"x": (-10, 10)},
@@ -198,13 +199,16 @@ def test_p_space_to_dataframe(self):
198199
bayes_opt.probe({"x": 1.0}, lazy=False)
199200
bayes_opt.probe({"x": -0.9}, lazy=False)
200201

201-
output = BayesianOptimizerOutput(
202+
self.output = BayesianOptimizerOutput(
202203
params=bayes_opt.max["params"],
203204
target=bayes_opt.max["target"],
204205
p_space=bayes_opt.space,
205206
)
206-
self.assertDictEqual(output.params, {"x": 1.0})
207-
self.assertEqual(output.target, -1.0)
207+
208+
def test_p_space_to_dataframe(self):
209+
""""""
210+
self.assertDictEqual(self.output.params, {"x": 1.0})
211+
self.assertEqual(self.output.target, -1.0)
208212

209213
idx = pd.MultiIndex.from_tuples(
210214
[
@@ -220,7 +224,19 @@ def test_p_space_to_dataframe(self):
220224
df["Calibration", "Constraints Function"] = df["Parameters", "x"]
221225
df["Calibration", "Allowed"] = [True, True, False]
222226
df.index.rename("Iteration", inplace=True)
223-
pd.testing.assert_frame_equal(output.p_space_to_dataframe(), df)
227+
pd.testing.assert_frame_equal(self.output.p_space_to_dataframe(), df)
228+
229+
def test_cycle(self):
230+
"""Check if the output can be cycled to produce the same p_space_df"""
231+
with TemporaryDirectory() as tmpdir:
232+
outpath = Path(tmpdir, "file.h5")
233+
self.output.to_hdf5(outpath)
234+
self.assertTrue(outpath.is_file())
235+
236+
output = BayesianOptimizerOutput.from_hdf5(outpath)
237+
pd.testing.assert_frame_equal(
238+
self.output.p_space_to_dataframe(), output.p_space_to_dataframe()
239+
)
224240

225241
def test_plot_p_space(self):
226242
"""Test plotting of different parameter combinations"""

0 commit comments

Comments
 (0)