Update code, docs, and tutorial

peanutfun · peanutfun · commit 97d763ad5530 · 2023-08-03T21:09:43.000+02:00
diff --git a/climada/util/calibrate/__init__.py b/climada/util/calibrate/__init__.py
@@ -1,6 +1,6 @@
 """Impact function calibration module"""
 
-from .base import Input
+from .base import Input, OutputEvaluator
 from .bayesian_optimizer import BayesianOptimizer
 from .scipy_optimizer import ScipyMinimizeOptimizer
 from .func import rmse, rmsf, impact_at_reg
diff --git a/climada/util/calibrate/base.py b/climada/util/calibrate/base.py
@@ -130,55 +130,139 @@ def __post_init__(self):
         self._impact_label = f"Impact [{self.input.exposure.value_unit}]"
 
     def plot_impf_set(self, **plot_kwargs):
-        """Plot the optimized impact functions"""
+        """Plot the optimized impact functions
+
+        This calls the plot function of the respective impact function set.
+
+        Parameters
+        ----------
+        plot_kwargs
+            Plotting keyword arguments passed to the underlying plotting method.
+
+        See Also
+        --------
+        :py:meth:`~climada.entity.impact_funcs.impact_func_set.ImpactFuncSet.plot`
+        """
         return self.impf_set.plot(**plot_kwargs)
 
-    def plot_at_event(self, **plot_kwargs):
-        data = (
-            pd.concat(
-                [
-                    pd.Series([self.impact.at_event]),
-                    self.input.data.sum(axis="columns"),
-                ],
-                ignore_index=True,
-                axis=1,
-            )
-            .rename(columns={0: "Model", 1: "Data"})
-            .set_index(self.input.hazard.event_name)
-        )
+    def plot_at_event(
+        self,
+        data_transf: Callable[[pd.DataFrame], pd.DataFrame] = lambda x: x,
+        **plot_kwargs,
+    ):
+        """Create a bar plot comparing estimated model output and data per event
+
+        Every row of the :py:attr:`Input.data` is considered an event.
+        The data to be plotted can be transformed with a generic function
+        ``data_transf``.
+
+        Parameters
+        ----------
+        data_transf : Callable (pd.DataFrame -> pd.DataFrame), optional
+            A function that transforms the data to plot before plotting.
+            It receives a dataframe whose rows represent events and whose columns
+            represent the modelled impact and the calibration data, respectively.
+            By default, the data is not transformed.
+        plot_kwargs
+            Keyword arguments passed to the ``DataFrame.plot.bar`` method.
+
+        Returns
+        -------
+        ax : matplotlib.axes.Axes
+            The plot axis returned by ``DataFrame.plot.bar``
+        """
+        data = pd.concat(
+            [
+                self.input.impact_to_dataframe(self.impact).sum(axis="columns"),
+                self.input.data.sum(axis="columns"),
+            ],
+            axis=1,
+        ).rename(columns={0: "Model", 1: "Data"})
+
+        # Transform data before plotting
+        data = data_transf(data)
+
+        # Now plot
         ylabel = plot_kwargs.pop("ylabel", self._impact_label)
         return data.plot.bar(ylabel=ylabel, **plot_kwargs)
 
-    def plot_at_region(self, agg_regions=None, **plot_kwargs):
+    def plot_at_region(
+        self,
+        data_transf: Callable[[pd.DataFrame], pd.DataFrame] = lambda x: x,
+        **plot_kwargs,
+    ):
+        """Create a bar plot comparing estimated model output and data per event
+
+        Every column of the :py:attr:`Input.data` is considered a region.
+        The data to be plotted can be transformed with a generic function
+        ``data_transf``.
+
+        Parameters
+        ----------
+        data_transf : Callable (pd.DataFrame -> pd.DataFrame), optional
+            A function that transforms the data to plot before plotting.
+            It receives a dataframe whose rows represent regions and whose columns
+            represent the modelled impact and the calibration data, respectively.
+            By default, the data is not transformed.
+        plot_kwargs
+            Keyword arguments passed to the ``DataFrame.plot.bar`` method.
+
+        Returns
+        -------
+        ax : matplotlib.axes.Axes
+            The plot axis returned by ``DataFrame.plot.bar``.
+        """
         data = pd.concat(
             [
-                self.impact.impact_at_reg(agg_regions).sum(axis="index"),
+                self.input.impact_to_dataframe(self.impact).sum(axis="index"),
                 self.input.data.sum(axis="index"),
             ],
             axis=1,
         ).rename(columns={0: "Model", 1: "Data"})
 
-        # Use nice country names if no agg_regions were given
-        if agg_regions is None:
-            data = data.rename(
-                index=lambda x: u_coord.country_to_iso(x, representation="name")
-            )
+        # Transform data before plotting
+        data = data_transf(data)
 
+        # Now plot
         ylabel = plot_kwargs.pop("ylabel", self._impact_label)
         return data.plot.bar(ylabel=ylabel, **plot_kwargs)
 
-    def plot_event_region_heatmap(self, agg_regions=None, **plot_kwargs):
+    def plot_event_region_heatmap(
+        self,
+        data_transf: Callable[[pd.DataFrame], pd.DataFrame] = lambda x: x,
+        **plot_kwargs,
+    ):
+        """Plot a heatmap comparing all events per all regions
+
+        Every column of the :py:attr:`Input.data` is considered a region, and every
+        row is considered an event.
+        The data to be plotted can be transformed with a generic function
+        ``data_transf``.
+
+        Parameters
+        ----------
+        data_transf : Callable (pd.DataFrame -> pd.DataFrame), optional
+            A function that transforms the data to plot before plotting.
+            It receives a dataframe whose rows represent events and whose columns
+            represent the regions, respectively.
+            By default, the data is not transformed.
+        plot_kwargs
+            Keyword arguments passed to the ``DataFrame.plot.bar`` method.
+
+        Returns
+        -------
+        ax : matplotlib.axes.Axes
+            The plot axis returned by ``DataFrame.plot.bar``.
+
+        """
         # Data preparation
-        agg = self.impact.impact_at_reg(agg_regions)
+        agg = self.input.impact_to_dataframe(self.impact)
         data = (agg + 1) / (self.input.data + 1)
         data = data.transform(np.log10).replace(0, np.nan)
-        data = data.where((agg < 1) & (self.input.data < 1))
+        data = data.where((agg > 0) | (self.input.data > 0))
 
-        # Use nice country names if no agg_regions were given
-        if agg_regions is None:
-            data = data.rename(
-                index=lambda x: u_coord.country_to_iso(x, representation="name")
-            )
+        # Transform data
+        data = data_transf(data)
 
         # Default plot settings
         annot = plot_kwargs.pop("annot", True)
diff --git a/climada/util/calibrate/bayesian_optimizer.py b/climada/util/calibrate/bayesian_optimizer.py
@@ -3,7 +3,7 @@
 from dataclasses import dataclass, InitVar
 from typing import Mapping, Optional, Any
 from numbers import Number
-from itertools import combinations
+from itertools import combinations, repeat
 
 import pandas as pd
 from bayes_opt import BayesianOptimization
@@ -164,33 +164,58 @@ def p_space_to_dataframe(self):
     def plot_p_space(
         self,
         p_space_df: Optional[pd.DataFrame] = None,
+        x: Optional[str] = None,
+        y: Optional[str] = None,
         min_def: Optional[str] = "Cost Function",
         min_fmt: str = "x",
         min_color: str = "r",
         **plot_kwargs
     ):
-        """Plot the parameter space"""
+        """Plot the parameter space as scatter plot(s)
+
+        Produce a scatter plot where each point represents a parameter combination
+        sampled by the optimizer. The coloring represents the cost function value.
+        If there are more than two parameters in the input data frame, this method will
+        produce one plot for each combination of two parameters.
+        Explicit parameter names to plot can be given via the ``x`` and ``y`` arguments.
+        If no data frame is provided as argument, the output of
+        :py:meth:`p_space_to_dataframe` is used.
+
+        Parameters
+        ----------
+        p_space_df : pd.DataFrame, optional
+            The parameter space to plot. Defaults to the one returned by
+            :py:meth:`p_space_to_dataframe`
+        x : str, optional
+            The parameter to plot on the x-axis. If ``y`` is *not* given, this will plot
+            ``x`` against all other parameters.
+        y : str, optional
+            The parameter to plot on the y-axis. If ``x`` is *not* given, this will plot
+            ``y`` against all other parameters.
+        min_def : str, optional
+            The name of the column in ``p_space_df`` defining which parameter set
+            represents the minimum, which is plotted separately. Defaults to
+            ``"Cost Function"``. Set to ``None`` to avoid plotting the minimum.
+        min_fmt : str, optional
+            Plot format string for plotting the minimum. Defaults to ``"x"``.
+        min_color : str, optional
+            Color for plotting the minimum. Defaults to ``"r"`` (red).
+        """
         if p_space_df is None:
             p_space_df = self.p_space_to_dataframe()
-        
+
         # Plot defaults
         cmap = plot_kwargs.pop("cmap", "viridis_r")
         s = plot_kwargs.pop("s", 40)
         c = plot_kwargs.pop("c", "Cost Function")
 
-        # Ignore cost dimension
-        params = p_space_df.columns.tolist()
-        try:
-            params.remove(c)
-        except ValueError:
-            pass
-
-        # Iterate over parameter combinations
-        for p_first, p_second in combinations(params, 2):
+        def plot_single(x, y):
+            """Plot a single combination of parameters"""
+            # Plot scatter
             ax = p_space_df.plot(
                 kind="scatter",
-                x=p_first,
-                y=p_second,
+                x=x,
+                y=y,
                 c=c,
                 s=s,
                 cmap=cmap,
@@ -199,5 +224,36 @@ def plot_p_space(
 
             # Plot the minimum
             if min_def is not None:
-                best = p_space_df.iloc[p_space_df.idxmin()[min_def]]
-                ax.plot(best[p_first], best[p_second], min_fmt, color=min_color)
+                best = p_space_df.loc[p_space_df.idxmin()[min_def]]
+                ax.plot(best[x], best[y], min_fmt, color=min_color)
+
+            return ax
+
+        # Ignore cost dimension
+        params = p_space_df.columns.tolist()
+        try:
+            params.remove(c)
+        except ValueError:
+            pass
+
+        # Option 0: Only one parameter
+        if len(params) < 2:
+            return plot_single(x=params[0], y=repeat(0))
+
+        # Option 1: Only a single plot
+        if x is not None and y is not None:
+            return plot_single(x, y)
+
+        # Option 2: Combination of all
+        iterable = combinations(params, 2)
+        # Option 3: Fix one and iterate over all others
+        if x is not None:
+            params.remove(x)
+            iterable = zip(repeat(x), params)
+        elif y is not None:
+            params.remove(y)
+            iterable = zip(params, repeat(y))
+
+        # Iterate over parameter combinations
+        for p_first, p_second in iterable:
+            plot_single(p_first, p_second)
diff --git a/doc/tutorial/climada_util_calibrate.ipynb b/doc/tutorial/climada_util_calibrate.ipynb