Skip to content

Commit 96cf55c

Browse files
committed
Make cost functions consume numpy arrays
1 parent 56a0f15 commit 96cf55c

File tree

8 files changed

+61
-10
lines changed

8 files changed

+61
-10
lines changed

climada/util/calibrate/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
BayesianOptimizerOutputEvaluator,
2727
select_best,
2828
)
29+
from .cost_func import mse, msle
2930
from .ensemble import (
3031
AverageEnsembleOptimizer,
3132
EnsembleOptimizerOutput,

climada/util/calibrate/base.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,10 @@ class Input:
8686
:py:attr:`data`, insert this value. Defaults to NaN, in which case the impact
8787
from the model is ignored. Set this to zero to explicitly calibrate to zero
8888
impacts in these cases.
89+
df_to_numpy : Callable
90+
A function that transforms a pandas.DataFrame into a numpy.ndarray to be
91+
inserted into the :py:attr:`cost_func`. By default, this will flatten the data
92+
frame.
8993
data_weights : pandas.DataFrame, optional
9094
Weights for each entry in :py:attr:`data`. Must have the exact same index and
9195
columns. If ``None``, the weights will be ignored (equivalent to the same weight
@@ -109,6 +113,9 @@ class Input:
109113
default_factory=lambda: {"assign_centroids": False}
110114
)
111115
missing_data_value: float = np.nan
116+
df_to_numpy: Callable[[pd.DataFrame], np.ndarray] = (
117+
lambda df: df.to_numpy().flatten()
118+
)
112119
data_weights: pd.DataFrame | None = field(default=None, kw_only=True)
113120
missing_weights_value: float = field(default=0.0, kw_only=True)
114121
assign_centroids: InitVar[bool] = field(default=True, kw_only=True)
@@ -523,12 +530,12 @@ def _opt_func(self, *args, **kwargs) -> Number:
523530
copy=True,
524531
fill_value=self.input.missing_weights_value,
525532
)
526-
weights_aligned = weights_aligned.to_numpy().flatten()
533+
weights_aligned = self.input.df_to_numpy(weights_aligned)
527534

528535
# Compute target function
529536
return self._target_func(
530-
data_aligned.to_numpy().flatten(),
531-
impact_df_aligned.to_numpy().flatten(),
537+
self.input.df_to_numpy(data_aligned),
538+
self.input.df_to_numpy(impact_df_aligned),
532539
weights_aligned,
533540
)
534541

climada/util/calibrate/bayesian_optimizer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -617,7 +617,7 @@ def __post_init__(self, random_state, allow_duplicate_points, bayes_opt_kwds):
617617
)
618618

619619
def _target_func(
620-
self, data: pd.DataFrame, predicted: pd.DataFrame, weights: pd.DataFrame | None
620+
self, data: np.ndarray, predicted: np.ndarray, weights: np.ndarray | None
621621
) -> Number:
622622
"""Invert the cost function because BayesianOptimization maximizes the target"""
623623
return -self.input.cost_func(data, predicted, weights)
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
"""
2+
This file is part of CLIMADA.
3+
4+
Copyright (C) 2017 ETH Zurich, CLIMADA contributors listed in AUTHORS.
5+
6+
CLIMADA is free software: you can redistribute it and/or modify it under the
7+
terms of the GNU General Public License as published by the Free
8+
Software Foundation, version 3.
9+
10+
CLIMADA is distributed in the hope that it will be useful, but WITHOUT ANY
11+
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
12+
PARTICULAR PURPOSE. See the GNU General Public License for more details.
13+
14+
You should have received a copy of the GNU General Public License along
15+
with CLIMADA. If not, see <https://www.gnu.org/licenses/>.
16+
17+
---
18+
Cost functions for impact function calibration module
19+
"""
20+
21+
import numpy as np
22+
from sklearn.metrics import mean_squared_error, mean_squared_log_error
23+
24+
25+
def mse(data: np.ndarray, predicted: np.ndarray, weights: np.ndarray | None) -> float:
26+
"""Weighted mean squared error
27+
28+
See
29+
https://scikit-learn.org/stable/modules/generated/sklearn.metrics.mean_squared_error.html
30+
"""
31+
return mean_squared_error(data, predicted, sample_weight=weights)
32+
33+
34+
def msle(data: np.ndarray, predicted: np.ndarray, weights: np.ndarray | None) -> float:
35+
"""Weighted mean squared logarithmic error
36+
37+
See
38+
https://scikit-learn.org/stable/modules/generated/sklearn.metrics.mean_squared_log_error.html
39+
"""
40+
return mean_squared_log_error(data, predicted, sample_weight=weights)

climada/util/calibrate/test/test_base.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,7 @@ def test_align(self, _):
212212
self.input.impact_to_dataframe = lambda _: pd.DataFrame(
213213
data={"col1": [2, 4], "col2": [4, 0]}, index=[0, 2]
214214
)
215+
self.input.df_to_numpy = lambda df: df
215216
self.input.cost_func = lambda x, y, w: (x, y, w)
216217

217218
# Apply

climada/util/calibrate/test/test_bayesian_optimizer.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -316,10 +316,10 @@ def test_kwargs_to_impact_func_creator(self, _):
316316

317317
# Call 'run'
318318
with patch.object(self.input, "impact_to_aligned_df") as align:
319-
align.return_value = (None, None)
319+
align.return_value = (pd.DataFrame(), pd.DataFrame())
320320
self.optimizer.run(controller=self.controller)
321321

322-
# Check call to '_kwargs_to_impact_func_gen'
322+
# Check call to '_kwargs_to_impact_func_creator'
323323
call_args = self.input.impact_func_creator.call_args_list
324324
self.assertEqual(len(call_args), 3)
325325
for args in call_args:
@@ -340,7 +340,7 @@ def test_target_func(self, _):
340340

341341
# Call 'run'
342342
with patch.object(self.input, "impact_to_aligned_df") as align:
343-
align.return_value = (None, None)
343+
align.return_value = (pd.DataFrame(), pd.DataFrame())
344344
output = self.optimizer.run(controller=self.controller)
345345

346346
# Check target space

climada/util/calibrate/test/test_ensemble.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -371,16 +371,17 @@ def setUp(self):
371371

372372
def test_post_init_sampling(self):
373373
opt = AverageEnsembleOptimizer(
374-
input=self.input, sample_fraction=0.5, optimizer_type=ConcreteOptimizer
374+
input=self.input, optimizer_type=ConcreteOptimizer
375375
)
376376
samples = np.array(opt.samples)
377-
self.assertTupleEqual(samples.shape, (20, 2, 2))
377+
self.assertTupleEqual(samples.shape, (20, 4, 2))
378378

379379
opt = AverageEnsembleOptimizer(
380380
input=self.input,
381381
ensemble_size=11,
382382
sample_fraction=0.8, # Will cause rounding
383383
optimizer_type=ConcreteOptimizer,
384+
replace=False,
384385
)
385386
samples = np.array(opt.samples)
386387
self.assertTupleEqual(samples.shape, (11, 3, 2))
@@ -390,6 +391,7 @@ def test_post_init_sampling(self):
390391
ensemble_size=2,
391392
sample_fraction=0.95, # Will cause rounding, always select all
392393
optimizer_type=ConcreteOptimizer,
394+
replace=False,
393395
)
394396

395397
samples = [sorted([tuple(idx) for idx in arr]) for arr in opt.samples]

climada/util/calibrate/test/test_scipy_optimizer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ def test_kwargs_to_impact_func_creator(self, _):
6363
# Call 'run', make sure that 'minimize' is only with these parameters
6464
params_init = {"x_2": 1, "x 1": 2, "x_3": 3} # NOTE: Also works with whitespace
6565
with patch.object(self.input, "impact_to_aligned_df") as align:
66-
align.return_value = (None, None)
66+
align.return_value = (pd.DataFrame(), pd.DataFrame())
6767
self.optimizer.run(params_init=params_init, options={"maxiter": 1})
6868

6969
# Check call to '_kwargs_to_impact_func_creator'

0 commit comments

Comments
 (0)