Add first unit tests of calibration module

peanutfun · peanutfun · commit 819aab52495f · 2023-05-05T15:29:17.000+02:00
diff --git a/climada/util/calibrate/impact_func.py b/climada/util/calibrate/impact_func.py
@@ -11,14 +11,27 @@
     Bounds,
     LinearConstraint,
     NonlinearConstraint,
-    OptimizeResult,
     minimize,
 )
 from bayes_opt import BayesianOptimization
 
-from ...hazard import Hazard
-from ...entity import Exposures, ImpactFunc, ImpactFuncSet
-from ...engine import Impact, ImpactCalc
+from climada.hazard import Hazard
+from climada.entity import Exposures, ImpactFunc, ImpactFuncSet
+from climada.engine import Impact, ImpactCalc
+
+
+def cost_func_rmse(impact: Impact, data: pd.DataFrame) -> Number:
+    return np.sqrt(((impact - data) ** 2).mean(axis=None))
+
+
+def impf_step_generator(threshold: Number, paa: Number) -> ImpactFuncSet:
+    return ImpactFuncSet(
+        [
+            ImpactFunc.from_step_impf(
+                haz_type="RF", intensity=(0, threshold, 100), paa=(0, paa)
+            )
+        ]
+    )
 
 
 @dataclass
@@ -28,7 +41,7 @@ class Input:
     hazard: Hazard
     exposure: Exposures
     data: pd.DataFrame
-    cost_func: Callable[[Impact, pd.DataFrame], float]
+    cost_func: Callable[[Impact, pd.DataFrame], Number]
     impact_func_gen: Callable[..., ImpactFuncSet]
     bounds: Optional[Mapping[str, Union[Bounds, Tuple[Number, Number]]]] = None
     constraints: Optional[
@@ -40,7 +53,7 @@ class Input:
 
     def __post_init__(self):
         """Prepare input data"""
-        self.hazard = self.hazard.select(event_id=self.data.index)
+        self.hazard = self.hazard.select(event_id=self.data.index.tolist())
         self.exposure.assign_centroids(self.hazard)
 
 
@@ -51,7 +64,7 @@ class Output:
     params: Mapping[str, Number]
     target: Number
     success: bool
-    result: Optional[OptimizeResult] = None
+    result: Optional[Any] = None
 
 
 @dataclass
@@ -88,23 +101,32 @@ def run(self, **opt_kwargs) -> Output:
 class ScipyMinimizeOptimizer(Optimizer):
     """An optimization using scipy.optimize.minimize"""
 
-    _param_names: List[str] = field(default_factory=list)
+    def __post_init__(self):
+        """Create a private attribute for storing the parameter names"""
+        self._param_names: List[str] = list()
 
     def _kwargs_to_impact_func_gen(self, *args, **kwargs) -> Dict[str, Any]:
         return dict(zip(self._param_names, args[0].flat))
 
+    def _select_by_param_names(self, mapping: Mapping[str, Any]) -> List[Any]:
+        """Return a list of entries from a map with matching keys or ``None``"""
+        return [mapping.get(key) for key in self._param_names]
+
     def run(self, params_init: Mapping[str, Number], **opt_kwargs):
         """Execute the optimization"""
         self._param_names = list(params_init.keys())
 
         # Transform data to match minimize input
-        bounds = self.input.bounds
-        if bounds is not None:
-            bounds = [bounds.get(name) for name in self._param_names]
-
-        constraints = self.input.constraints
-        if constraints is not None:
-            constraints = [constraints.get(name) for name in self._param_names]
+        bounds = (
+            self._select_by_param_names(self.input.bounds)
+            if self.input.bounds is not None
+            else None
+        )
+        constraints = (
+            self._select_by_param_names(self.input.constraints)
+            if self.input.constraints is not None
+            else None
+        )
 
         x0 = np.array(list(params_init.values()))
         res = minimize(
@@ -126,21 +148,28 @@ class BayesianOptimizer(Optimizer):
     verbose: InitVar[int] = 1
     random_state: InitVar[int] = 1
     allow_duplicate_points: InitVar[bool] = True
-    init_kwds: InitVar[Mapping[str, Any]] = field(default_factory=dict)
+    bayes_opt_kwds: InitVar[Optional[Mapping[str, Any]]] = None
 
-    def __post_init__(self, **kwargs):
+    def __post_init__(
+        self, verbose, random_state, allow_duplicate_points, bayes_opt_kwds
+    ):
         """Create optimizer"""
-        init_kwds = kwargs.pop("init_kwds")
         self.optimizer = BayesianOptimization(
             f=lambda **kwargs: self._opt_func(**kwargs),
             pbounds=self.input.bounds,
-            **kwargs,
-            **init_kwds,
+            verbose=verbose,
+            random_state=random_state,
+            allow_duplicate_points=allow_duplicate_points,
+            **bayes_opt_kwds,
         )
 
     def run(self, init_points: int = 100, n_iter: int = 200, **opt_kwargs):
         """Execute the optimization"""
-        opt_kwargs.update(init_points=init_points, n_iter=n_iter)
-        self.optimizer.maximize(**opt_kwargs)
+        self.optimizer.maximize(init_points=init_points, n_iter=n_iter, **opt_kwargs)
         opt = self.optimizer.max
-        return Output(params=opt["params"], target=opt["target"], success=True)
+        return Output(
+            params=opt["params"],
+            target=opt["target"],
+            success=True,
+            result=self.optimizer,
+        )
diff --git a/climada/util/calibrate/test/test_calibrate.py b/climada/util/calibrate/test/test_calibrate.py
@@ -1 +1,138 @@
 """Tests for calibration module"""
+
+import unittest
+from unittest.mock import create_autospec
+
+import numpy as np
+import numpy.testing as npt
+import pandas as pd
+from scipy.sparse import csr_matrix
+from shapely.geometry import Point
+
+from climada.entity import Exposures, ImpactFuncSet
+from climada.hazard import Hazard, Centroids
+
+from ..impact_func import Input, ScipyMinimizeOptimizer
+
+
+def hazard():
+    """Create a dummy hazard instance"""
+    lat = [1, 2]
+    lon = [0, 1]
+    centroids = Centroids.from_lat_lon(lat=lat, lon=lon)
+    event_id = np.array([1, 3, 10])
+    intensity = csr_matrix([[1, 1], [2, 2], [3, 3]])
+    return Hazard(event_id=event_id, centroids=centroids, intensity=intensity)
+
+
+def exposure():
+    """Create a dummy exposure instance"""
+    return Exposures(data=dict(longitude=[0, 1, 100], latitude=[1, 2, 50]))
+
+
+class TestInputPostInit(unittest.TestCase):
+    """Test the post_init dunder method of Input"""
+
+    def setUp(self):
+        """Create default input instance"""
+        # Create the hazard instance
+        self.hazard = hazard()
+
+        # Create the exposure instance
+        self.exposure = exposure()
+
+        # Create some data
+        self.data_events = [10, 3]
+        self.data = pd.DataFrame(data={"a": [1, 2]}, index=self.data_events)
+
+        # Create dummy funcs
+        self.cost_func = lambda impact, data: 1.0
+        self.impact_func_gen = lambda **kwargs: ImpactFuncSet()
+
+    def test_post_init_calls(self):
+        """Test if post_init calls stuff correctly using mocks"""
+        # Create mocks
+        hazard_mock_1 = create_autospec(Hazard, instance=True)
+        hazard_mock_2 = create_autospec(Hazard, instance=True)
+        exposure_mock = create_autospec(Exposures, instance=True)
+
+        # Make first hazard mock return another instance
+        hazard_mock_1.select.return_value = hazard_mock_2
+
+        # Create input
+        input = Input(
+            hazard=hazard_mock_1,
+            exposure=exposure_mock,
+            data=self.data,
+            cost_func=self.cost_func,
+            impact_func_gen=self.impact_func_gen,
+        )
+
+        # Query checks
+        hazard_mock_1.select.assert_called_once_with(event_id=self.data_events)
+        self.assertNotEqual(input.hazard, hazard_mock_1)
+        self.assertEqual(input.hazard, hazard_mock_2)
+        exposure_mock.assign_centroids.assert_called_once_with(hazard_mock_2)
+
+    def test_post_init(self):
+        """Test if post_init results in a sensible hazard and exposure"""
+        # Create input
+        input = Input(
+            hazard=self.hazard,
+            exposure=self.exposure,
+            data=self.data,
+            cost_func=self.cost_func,
+            impact_func_gen=self.impact_func_gen,
+        )
+
+        # Check hazard and exposure
+        npt.assert_array_equal(input.hazard.event_id, self.data.index)
+        self.assertIn("centr_", input.exposure.gdf)
+        npt.assert_array_equal(input.exposure.gdf["centr_"], [0, 1, -1])
+
+
+class TestScipyMinimizeOptimizer(unittest.TestCase):
+    """Tests for the optimizer based on scipy.optimize.minimize"""
+
+    def setUp(self):
+        """Mock the input and create the optimizer"""
+        self.input = create_autospec(Input, instance=True)
+        self.optimizer = ScipyMinimizeOptimizer(self.input)
+
+    def test_kwargs_to_impact_func_gen(self):
+        """Test the _kwargs_to_impact_func_gen method"""
+        # _param_names is empty in the beginning
+        x = np.array([1, 2, 3])
+        self.assertDictEqual(self.optimizer._kwargs_to_impact_func_gen(x), {})
+
+        # Now populate it and try again
+        self.optimizer._param_names = ["x_2", "x_1", "x_3"]
+        result = {"x_2": 1, "x_1": 2, "x_3": 3}
+        self.assertDictEqual(self.optimizer._kwargs_to_impact_func_gen(x), result)
+
+        # Other arguments are ignored
+        self.assertDictEqual(
+            self.optimizer._kwargs_to_impact_func_gen(x, x + 3), result
+        )
+
+        # Array is flattened, iterator stops
+        self.assertDictEqual(
+            self.optimizer._kwargs_to_impact_func_gen(np.array([[1, 2], [3, 4]])),
+            result,
+        )
+
+    def test_select_by_keys(self):
+        """Test the _select_by_keys method"""
+        param_names = ["a", "b", "c", "d"]
+        mapping = dict(zip(param_names, [1, "2", (1, 2)]))
+
+        # _param_names is empty in the beginning
+        self.assertListEqual(self.optimizer._select_by_param_names(mapping), [])
+
+        # Set _param_names
+        self.optimizer._param_names = param_names
+
+        # Check result
+        self.assertListEqual(
+            self.optimizer._select_by_param_names(mapping), [1, "2", (1, 2), None]
+        )