CLIMADA-project
diff --git a/‎climada/test/test_util_calibrate.py‎
Lines changed: 96 additions & 6 deletions b/‎climada/test/test_util_calibrate.py‎
Lines changed: 96 additions & 6 deletions
diff --git a/‎climada/util/calibrate/bayesian_optimizer.py‎
Lines changed: 1 addition & 0 deletions b/‎climada/util/calibrate/bayesian_optimizer.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎climada/util/calibrate/scipy_optimizer.py‎
Lines changed: 20 additions & 15 deletions b/‎climada/util/calibrate/scipy_optimizer.py‎
Lines changed: 20 additions & 15 deletions
diff --git a/‎climada/util/calibrate/test/test_base.py‎
Lines changed: 180 additions & 0 deletions b/‎climada/util/calibrate/test/test_base.py‎
Lines changed: 180 additions & 0 deletions
@@ -1,15 +1,16 @@
 """Integration tests for calibration utility module"""
 
 import unittest
-from functools import partial
 
 import pandas as pd
 import numpy as np
+import numpy.testing as npt
 from scipy.optimize import NonlinearConstraint
+from sklearn.metrics import mean_squared_error
 
 from climada.entity import ImpactFuncSet, ImpactFunc
 
-from climada.util.calibrate import Input, ScipyMinimizeOptimizer, rmse, impact_at_reg
+from climada.util.calibrate import Input, ScipyMinimizeOptimizer, BayesianOptimizer
 
 from climada.util.calibrate.test.test_calibrate import hazard, exposure
 
@@ -29,7 +30,7 @@ def setUp(self) -> None:
         self.data = pd.DataFrame(
             data={"a": [3, 1], "b": [0.2, 0.01]}, index=self.events
         )
-        self.impact_to_dataframe = partial(impact_at_reg, region_ids=["a", "b"])
+        self.impact_to_dataframe = lambda impact: impact.impact_at_reg(["a", "b"])
         self.impact_func_creator = lambda slope: ImpactFuncSet(
             [
                 ImpactFunc(
@@ -46,7 +47,8 @@ def setUp(self) -> None:
             self.data,
             self.impact_func_creator,
             self.impact_to_dataframe,
-            rmse,
+            mean_squared_error,
+            # lambda x,y: mean_squared_error(x, y, squared=True),
         )
 
     def test_single(self):
@@ -94,10 +96,98 @@ def test_multiple_constrained(self):
         optimizer = ScipyMinimizeOptimizer(self.input)
         output = optimizer.run(
             params_init={"intensity_1": 2, "intensity_2": 2},
+            options=dict(gtol=1e-5, xtol=1e-5),
         )
 
         # Check results (low accuracy)
         self.assertTrue(output.result.success)
-        self.assertAlmostEqual(output.params["intensity_1"], 1.0, places=3)
-        self.assertAlmostEqual(output.params["intensity_2"], 3.0, places=3)
+        print(output.result.message)
+        print(output.result.status)
+        self.assertAlmostEqual(output.params["intensity_1"], 1.0, places=2)
+        self.assertGreater(output.params["intensity_2"], 2.8)  # Should be 3.0
         self.assertAlmostEqual(output.target, 0.0, places=3)
+
+
+class TestBayesianOptimizer(unittest.TestCase):
+    """Integration tests for the BayesianOptimizer"""
+
+    def setUp(self) -> None:
+        """Prepare input for optimization"""
+        self.hazard = hazard()
+        self.hazard.frequency = np.ones_like(self.hazard.event_id)
+        self.hazard.date = self.hazard.frequency
+        self.hazard.event_name = ["event"] * len(self.hazard.event_id)
+        self.exposure = exposure()
+        self.events = [10, 1]
+        self.hazard = self.hazard.select(event_id=self.events)
+        self.data = pd.DataFrame(
+            data={"a": [3, 1], "b": [0.2, 0.01]}, index=self.events
+        )
+        self.impact_to_dataframe = lambda impact: impact.impact_at_reg(["a", "b"])
+        self.impact_func_creator = lambda slope: ImpactFuncSet(
+            [
+                ImpactFunc(
+                    intensity=np.array([0, 10]),
+                    mdd=np.array([0, 10 * slope]),
+                    paa=np.ones(2),
+                    id=1,
+                )
+            ]
+        )
+        self.input = Input(
+            self.hazard,
+            self.exposure,
+            self.data,
+            self.impact_func_creator,
+            self.impact_to_dataframe,
+            mean_squared_error,
+        )
+
+    def test_single(self):
+        """Test with single parameter"""
+        self.input.bounds = {"slope": (-1, 3)}
+        optimizer = BayesianOptimizer(self.input)
+        output = optimizer.run(init_points=10, n_iter=20, random_state=1)
+
+        # Check result (low accuracy)
+        self.assertAlmostEqual(output.params["slope"], 1.0, places=2)
+        self.assertAlmostEqual(output.target, 0.0, places=3)
+        self.assertEqual(output.p_space.dim, 1)
+        self.assertTupleEqual(output.p_space_to_dataframe().shape, (30, 2))
+
+    def test_multiple_constrained(self):
+        """Test with multiple constrained parameters"""
+        # Set new generator
+        self.input.impact_func_creator = lambda intensity_1, intensity_2: ImpactFuncSet(
+            [
+                ImpactFunc(
+                    intensity=np.array([0, intensity_1, intensity_2]),
+                    mdd=np.array([0, 1, 3]),
+                    paa=np.ones(3),
+                    id=1,
+                )
+            ]
+        )
+
+        # Constraint: param[0] < param[1] (intensity_1 < intensity_2)
+        self.input.constraints = NonlinearConstraint(
+            lambda params: params[0] - params[1], -np.inf, 0.0
+        )
+        self.input.bounds = {"intensity_1": (-1, 4), "intensity_2": (-1, 4)}
+        # Run optimizer
+        optimizer = BayesianOptimizer(self.input)
+        output = optimizer.run(n_iter=200, random_state=1)
+
+        # Check results (low accuracy)
+        self.assertEqual(output.p_space.dim, 2)
+        self.assertAlmostEqual(output.params["intensity_1"], 1.0, places=2)
+        self.assertAlmostEqual(output.params["intensity_2"], 3.0, places=1)
+        self.assertAlmostEqual(output.target, 0.0, places=3)
+
+        # Check constraints in parameter space
+        p_space = output.p_space_to_dataframe()
+        self.assertSetEqual(
+            set(p_space.columns.to_list()),
+            {"intensity_1", "intensity_2", "Cost Function"},
+        )
+        self.assertTupleEqual(p_space.shape, (300, 3))
@@ -153,6 +153,7 @@ def p_space_to_dataframe(self):
             function value (``Cost Function``) and whose rows are the optimizer
             iterations.
         """
+        # TODO: Handle constraints!!!
         data = {
             self.p_space.keys[i]: self.p_space.params[..., i]
             for i in range(self.p_space.dim)
 
@@ -9,6 +9,19 @@
 from .base import Output, Optimizer
 
 
+@dataclass
+class ScipyMinimizeOptimizerOutput(Output):
+    """Output of a calibration with :py:class:`ScipyMinimizeOptimizer`
+
+    Attributes
+    ----------
+    result : scipy.minimize.OptimizeResult
+        The OptimizeResult instance returned by ``scipy.optimize.minimize``.
+    """
+
+    result: OptimizeResult
+
+
 @dataclass
 class ScipyMinimizeOptimizer(Optimizer):
     """An optimization using scipy.optimize.minimize
@@ -42,7 +55,7 @@ def _select_by_param_names(self, mapping: Mapping[str, Any]) -> List[Any]:
         """Return a list of entries from a map with matching keys or ``None``"""
         return [mapping.get(key) for key in self._param_names]
 
-    def run(self, **opt_kwargs) -> Output:
+    def run(self, **opt_kwargs) -> ScipyMinimizeOptimizerOutput:
         """Execute the optimization
 
         Parameters
@@ -64,7 +77,12 @@ def run(self, **opt_kwargs) -> Output:
             associated ``scipy.optimize.OptimizeResult`` instance.
         """
         # Parse kwargs
-        params_init = opt_kwargs.pop("params_init")
+        try:
+            params_init = opt_kwargs.pop("params_init")
+        except KeyError as err:
+            raise RuntimeError(
+                "ScipyMinimizeOptimizer.run requires 'params_init' mapping as argument"
+            ) from err
         method = opt_kwargs.pop("method", "trust-constr")
 
         # Store names to rebuild dict when the minimize iterator returns an array
@@ -89,16 +107,3 @@ def run(self, **opt_kwargs) -> Output:
 
         params = dict(zip(self._param_names, res.x.flat))
         return ScipyMinimizeOptimizerOutput(params=params, target=res.fun, result=res)
-
-
-@dataclass
-class ScipyMinimizeOptimizerOutput(Output):
-    """Output of a calibration with :py:class:`ScipyMinimizeOptimizer`
-
-    Attributes
-    ----------
-    result : scipy.minimize.OptimizeResult
-        The OptimizeResult instance returned by ``scipy.optimize.minimize``.
-    """
-
-    result: OptimizeResult
 
@@ -0,0 +1,180 @@
+"""Tests for calibration module"""
+
+import unittest
+from unittest.mock import create_autospec
+
+import numpy as np
+import numpy.testing as npt
+import pandas as pd
+from scipy.sparse import csr_matrix
+
+from climada.entity import Exposures, ImpactFunc, ImpactFuncSet
+from climada.hazard import Hazard, Centroids
+
+from climada.util.calibrate import Input
+from climada.util.calibrate.base import Optimizer
+
+
+class ConcreteOptimizer(Optimizer):
+    """An instance for testing. Implements 'run' without doing anything"""
+
+    def run(self, **_):
+        pass
+
+
+def hazard():
+    """Create a dummy hazard instance"""
+    lat = [1, 2]
+    lon = [0, 1]
+    centroids = Centroids.from_lat_lon(lat=lat, lon=lon)
+    event_id = np.array([1, 3, 10])
+    intensity = csr_matrix([[1, 0.1], [2, 0.2], [3, 2]])
+    return Hazard(event_id=event_id, centroids=centroids, intensity=intensity)
+
+
+def exposure():
+    """Create a dummy exposure instance"""
+    return Exposures(
+        data=dict(
+            longitude=[0, 1, 100],
+            latitude=[1, 2, 50],
+            value=[1, 0.1, 1e6],
+            impf_=[1, 1, 1],
+        )
+    )
+
+
+class TestInputPostInit(unittest.TestCase):
+    """Test the post_init dunder method of Input"""
+
+    def setUp(self):
+        """Create default input instance"""
+        # Create the hazard instance
+        self.hazard = hazard()
+
+        # Create the exposure instance
+        self.exposure = exposure()
+
+        # Create some data
+        self.data_events = [10, 3]
+        self.data = pd.DataFrame(data={"a": [1, 2]}, index=self.data_events)
+
+        # Create dummy funcs
+        self.impact_to_dataframe = lambda _: pd.DataFrame()
+        self.cost_func = lambda impact, data: 1.0
+        self.impact_func_gen = lambda **kwargs: ImpactFuncSet()
+
+    def test_post_init_calls(self):
+        """Test if post_init calls stuff correctly using mocks"""
+        # Create mocks
+        exposure_mock = create_autospec(Exposures())
+
+        # Default
+        Input(
+            hazard=self.hazard,
+            exposure=exposure_mock,
+            data=self.data,
+            cost_func=self.cost_func,
+            impact_func_creator=self.impact_func_gen,
+            impact_to_dataframe=self.impact_to_dataframe,
+        )
+        exposure_mock.assign_centroids.assert_called_once_with(self.hazard)
+        exposure_mock.reset_mock()
+
+        # Default
+        Input(
+            hazard=self.hazard,
+            exposure=exposure_mock,
+            data=self.data,
+            cost_func=self.cost_func,
+            impact_func_creator=self.impact_func_gen,
+            impact_to_dataframe=self.impact_to_dataframe,
+            assign_centroids=False,
+        )
+        exposure_mock.assign_centroids.assert_not_called()
+
+    def test_post_init(self):
+        """Test if post_init results in a sensible hazard and exposure"""
+        # Create input
+        input = Input(
+            hazard=self.hazard,
+            exposure=self.exposure,
+            data=self.data,
+            cost_func=self.cost_func,
+            impact_func_creator=self.impact_func_gen,
+            impact_to_dataframe=self.impact_to_dataframe,
+        )
+
+        # Check hazard and exposure
+        self.assertIn("centr_", input.exposure.gdf)
+        npt.assert_array_equal(input.exposure.gdf["centr_"], [0, 1, -1])
+
+
+class TestOptimizer(unittest.TestCase):
+    """Base class for testing optimizers. Creates an input mock"""
+
+    def setUp(self):
+        """Mock the input"""
+        self.input = Input(
+            hazard=hazard(),
+            exposure=exposure(),
+            data=pd.DataFrame(data={"col1": [1, 2], "col2": [2, 3]}, index=[0, 1]),
+            cost_func=lambda x, y: (x + y).sum(axis=None),
+            impact_func_creator=lambda _: ImpactFuncSet([ImpactFunc()]),
+            impact_to_dataframe=lambda x: x.impact_at_reg(),
+        )
+        self.optimizer = ConcreteOptimizer(self.input)
+
+    def test_align_impact_with_data(self):
+        """Check alignment of impact and data"""
+        self.input.data = pd.DataFrame(
+            data={"col1": [1, 2], "col2": [2, 3]}, index=[0, 1]
+        )
+        impact_df = pd.DataFrame(data={"col2": [1, 2], "col3": [2, 3]}, index=[1, 2])
+
+        # missing_data_value = np.nan
+        data_aligned, impact_df_aligned = self.optimizer._align_impact_with_data(
+            impact_df
+        )
+        data_aligned_test = pd.DataFrame(
+            data={"col1": [1, 2, 0], "col2": [2, 3, 0], "col3": [0, 0, 0]},
+            index=[0, 1, 2],
+            dtype="float",
+        )
+        pd.testing.assert_frame_equal(data_aligned, data_aligned_test)
+        pd.testing.assert_frame_equal(
+            impact_df_aligned,
+            pd.DataFrame(
+                data={"col1": [0, 0, 0], "col2": [0, 1, 0], "col3": [0, 0, 0]},
+                index=[0, 1, 2],
+                dtype="float",
+            ),
+        )
+
+        # Different missing data value
+        self.input.missing_data_value = 0
+        data_aligned, impact_df_aligned = self.optimizer._align_impact_with_data(
+            impact_df
+        )
+        pd.testing.assert_frame_equal(data_aligned, data_aligned_test)
+        pd.testing.assert_frame_equal(
+            impact_df_aligned,
+            pd.DataFrame(
+                data={"col1": [0, 0, 0], "col2": [0, 1, 2], "col3": [0, 2, 3]},
+                index=[0, 1, 2],
+                dtype="float",
+            ),
+        )
+
+        # Check error
+        with self.assertRaisesRegex(ValueError, "NaN values computed in impact!"):
+            data_aligned, impact_df_aligned = self.optimizer._align_impact_with_data(
+                pd.DataFrame(data={"col1": [np.nan], "col2": [2, 3]}, index=[1, 2])
+            )
+
+
+# Execute Tests
+if __name__ == "__main__":
+    TESTS = unittest.TestLoader().loadTestsFromTestCase(TestInputPostInit)
+    TESTS.addTests(unittest.TestLoader().loadTestsFromTestCase(TestOptimizer))
+    unittest.TextTestRunner(verbosity=2).run(TESTS)