Add evaluator for calibration output

peanutfun · peanutfun · commit 4e1f104c1ded · 2023-07-10T14:26:15.000+02:00
diff --git a/climada/util/calibrate/base.py b/climada/util/calibrate/base.py
@@ -6,11 +6,14 @@
 from numbers import Number
 
 import pandas as pd
+import numpy as np
 from scipy.optimize import Bounds, LinearConstraint, NonlinearConstraint
+import seaborn as sns
 
 from climada.hazard import Hazard
 from climada.entity import Exposures, ImpactFuncSet
 from climada.engine import Impact, ImpactCalc
+import climada.util.coordinates as u_coord
 
 ConstraintType = Union[LinearConstraint, NonlinearConstraint, Mapping]
 
@@ -94,6 +97,113 @@ class Output:
     target: Number
 
 
+@dataclass
+class OutputEvaluator:
+    """Evaluate the output of a calibration task
+
+    Parameters
+    ----------
+    input : Input
+        The input object for the optimization task.
+    output : Output
+        The output object returned by the optimization task.
+
+    Attributes
+    ----------
+    impf_set : climada.entity.ImpactFuncSet
+        The impact function set built from the optimized parameters
+    impact : climada.engine.Impact
+        An impact object calculated using the optimal :py:attr:`impf_set`
+    """
+
+    input: Input
+    output: Output
+
+    def __post_init__(self):
+        """Compute the impact for the optimal parameters"""
+        self.impf_set = self.input.impact_func_creator(**self.output.params)
+        self.impact = ImpactCalc(
+            exposures=self.input.exposure,
+            impfset=self.impf_set,
+            hazard=self.input.hazard,
+        ).impact(assign_centroids=True, save_mat=True)
+        self._impact_label = f"Impact [{self.input.exposure.value_unit}]"
+
+    def plot_impf_set(self, **plot_kwargs):
+        """Plot the optimized impact functions"""
+        return self.impf_set.plot(**plot_kwargs)
+
+    def plot_at_event(self, **plot_kwargs):
+        data = (
+            pd.concat(
+                [
+                    pd.Series([self.impact.at_event]),
+                    self.input.data.sum(axis="columns"),
+                ],
+                ignore_index=True,
+                axis=1,
+            )
+            .rename(columns={0: "Model", 1: "Data"})
+            .set_index(self.input.hazard.event_name)
+        )
+        ylabel = plot_kwargs.pop("ylabel", self._impact_label)
+        return data.plot.bar(ylabel=ylabel, **plot_kwargs)
+
+    def plot_at_region(self, agg_regions=None, **plot_kwargs):
+        data = pd.concat(
+            [
+                self.impact.impact_at_reg(agg_regions).sum(axis="index"),
+                self.input.data.sum(axis="index"),
+            ],
+            axis=1,
+        ).rename(columns={0: "Model", 1: "Data"})
+
+        # Use nice country names if no agg_regions were given
+        if agg_regions is None:
+            data = data.rename(
+                index=lambda x: u_coord.country_to_iso(x, representation="name")
+            )
+
+        ylabel = plot_kwargs.pop("ylabel", self._impact_label)
+        return data.plot.bar(ylabel=ylabel, **plot_kwargs)
+
+    def plot_event_region_heatmap(self, agg_regions=None, **plot_kwargs):
+        # Data preparation
+        agg = self.impact.impact_at_reg(agg_regions)
+        data = (agg + 1) / (self.input.data + 1)
+        data = data.transform(np.log10).replace(0, np.nan)
+        data = data.where((agg < 1) & (self.input.data < 1))
+
+        # Use nice country names if no agg_regions were given
+        if agg_regions is None:
+            data = data.rename(
+                index=lambda x: u_coord.country_to_iso(x, representation="name")
+            )
+
+        # Default plot settings
+        annot = plot_kwargs.pop("annot", True)
+        vmax = plot_kwargs.pop("vmax", 3)
+        vmin = plot_kwargs.pop("vmin", -vmax)
+        center = plot_kwargs.pop("center", 0)
+        fmt = plot_kwargs.pop("fmt", ".1f")
+        cmap = plot_kwargs.pop("cmap", "RdBu_r")
+        cbar_kws = plot_kwargs.pop(
+            "cbar_kws", {"label": r"Model Error $\log_{10}(\mathrm{Impact})$"}
+        )
+
+        return sns.heatmap(
+            data,
+            annot=annot,
+            vmin=vmin,
+            vmax=vmax,
+            center=center,
+            fmt=fmt,
+            cmap=cmap,
+            cbar_kws=cbar_kws,
+            **plot_kwargs,
+        )
+
+
 @dataclass
 class Optimizer(ABC):
     """Abstract base class (interface) for an optimization
diff --git a/climada/util/calibrate/bayesian_optimizer.py b/climada/util/calibrate/bayesian_optimizer.py
@@ -3,6 +3,7 @@
 from dataclasses import dataclass, InitVar
 from typing import Mapping, Optional, Any
 from numbers import Number
+from itertools import combinations
 
 import pandas as pd
 from bayes_opt import BayesianOptimization
@@ -147,14 +148,56 @@ def p_space_to_dataframe(self):
         Returns
         -------
         pandas.DataFrame
-            Data frame whose columns are the parameter values and the associated target
-            function value (``target``) and whose rows are the optimizer iterations.
+            Data frame whose columns are the parameter values and the associated cost
+            function value (``Cost Function``) and whose rows are the optimizer
+            iterations.
         """
         data = {
             self.p_space.keys[i]: self.p_space.params[..., i]
             for i in range(self.p_space.dim)
         }
-        data["target"] = self.p_space.target
+        data["Cost Function"] = -self.p_space.target
         data = pd.DataFrame.from_dict(data)
         data.index.rename("Iteration", inplace=True)
         return data
+
+    def plot_p_space(
+        self,
+        p_space_df: Optional[pd.DataFrame] = None,
+        min_def: Optional[str] = "Cost Function",
+        min_fmt: str = "x",
+        min_color: str = "r",
+        **plot_kwargs
+    ):
+        """Plot the parameter space"""
+        if p_space_df is None:
+            p_space_df = self.p_space_to_dataframe()
+        
+        # Plot defaults
+        cmap = plot_kwargs.pop("cmap", "viridis_r")
+        s = plot_kwargs.pop("s", 40)
+        c = plot_kwargs.pop("c", "Cost Function")
+
+        # Ignore cost dimension
+        params = p_space_df.columns.tolist()
+        try:
+            params.remove(c)
+        except ValueError:
+            pass
+
+        # Iterate over parameter combinations
+        for p_first, p_second in combinations(params, 2):
+            ax = p_space_df.plot(
+                kind="scatter",
+                x=p_first,
+                y=p_second,
+                c=c,
+                s=s,
+                cmap=cmap,
+                **plot_kwargs,
+            )
+
+            # Plot the minimum
+            if min_def is not None:
+                best = p_space_df.iloc[p_space_df.idxmin()[min_def]]
+                ax.plot(best[p_first], best[p_second], min_fmt, color=min_color)
diff --git a/climada/util/calibrate/func.py b/climada/util/calibrate/func.py
@@ -0,0 +1,16 @@
+"""Default functions"""
+from typing import Sequence, Optional
+
+import pandas as pd
+import numpy as np
+
+from climada.engine import Impact
+
+def rmse(impact: pd.DataFrame, data: pd.DataFrame):
+    return np.sqrt(np.mean(((impact - data) ** 2).to_numpy()))
+
+def rmsf(impact: pd.DataFrame, data: pd.DataFrame):
+    return np.sqrt(np.mean((((impact + 1) / (data + 1)) ** 2).to_numpy()))
+
+def impact_at_reg(impact: Impact, region_ids: Optional[Sequence] = None):
+    return impact.impact_at_reg(agg_regions=region_ids)
diff --git a/climada/util/calibrate/test/test_calibrate.py b/climada/util/calibrate/test/test_calibrate.py
@@ -243,6 +243,21 @@ def test_kwargs_to_impact_func_creator(self, _):
         for args in call_args:
             self.assertSequenceEqual(args.kwargs.keys(), self.input.bounds.keys())
 
+    @patch("climada.util.calibrate.base.ImpactCalc", autospec=True)
+    def test_target_func(self, _):
+        """Test if cost function is transformed correctly
+        
+        We test the method '_target_func' through 'run' because it is
+        private"""
+        self.input.bounds = {"x_2": (0, 1), "x 1": (1, 2)}
+        self.input.cost_func.side_effect = [1.0, -1.0]
+        self.optimizer = BayesianOptimizer(self.input)
+
+        # Call 'run'
+        output = self.optimizer.run(init_points=1, n_iter=1)
+
+        # Check target space
+        npt.assert_array_equal(output.p_space.target, [-1.0, 1.0])
 
 # Execute Tests
 if __name__ == "__main__":
diff --git a/climada/util/coordinates.py b/climada/util/coordinates.py
@@ -28,6 +28,7 @@
 import re
 import warnings
 import zipfile
+from typing import Union, Sequence
 
 from cartopy.io import shapereader
 import dask.dataframe as dd
@@ -1463,6 +1464,11 @@ def country_natid2iso(natids, representation="alpha3"):
         iso_list = country_to_iso(iso_list, representation)
     return iso_list[0] if return_str else iso_list
 
+
+def iso_to_country(iso: Union[str, int, Sequence[Union[str, int]]], attr):
+    """Convert an ISO 3166 code to a country name"""
+
+
 def country_iso2natid(isos):
     """Convert ISO 3166-1 alpha-3 codes to internal NatIDs