adapted docstrings for binning explanations and some error messages

ValentinGebhart · ValentinGebhart · commit 827ec99b203e · 2025-02-27T15:45:30.000+01:00
diff --git a/climada/engine/impact.py b/climada/engine/impact.py
@@ -498,10 +498,12 @@ def local_exceedance_impact(
         min_impact=0,
         log_frequency=True,
         log_impact=True,
+        n_sig_dig=3,
     ):
         """Compute local exceedance impact for given return periods. The default method
         is fitting the ordered impacts per centroid to the corresponding cummulated
-        frequency with linear interpolation on log-log scale.
+        frequency with linear interpolation on log-log scale. Impacts are binned according
+        to their n_sig_dig significant digits, see Notes.
 
         Parameters
         ----------
@@ -530,6 +532,9 @@ def local_exceedance_impact(
             This parameter is only used if method is set to "extrapolate" or "interpolate". If set
             to True, impact values are converted to log scale before inter- and extrapolation.
             Defaults to True.
+        n_sig_dig : int, optional
+            Number of significant digits for the binning of the impact values, see Notes.
+            Defaults to 3.
 
         Returns
         -------
@@ -542,6 +547,14 @@ def local_exceedance_impact(
             GeoDataFrame label, for reporting and plotting
         column_label : function
             Column-label-generating function, for reporting and plotting
+
+        Notes
+        -------
+        Contrary to Impact.calc_freq_curve(), impacts are binned according to their n_sig_dig
+        significant digits. This results in a coarser (and smoother) interpolation, and a
+        more stable extrapolation. To not bin the values, please use, e.g., n_sig_dig=7. For more
+        information about the binning, see docstring of
+        climada.util.interpolation.preprocess_and_interpolate_ev().
         """
         LOGGER.info(
             "Computing exceedance impact map for return periods: %s", return_periods
@@ -589,7 +602,7 @@ def local_exceedance_impact(
                         value_threshold=min_impact,
                         method=method,
                         y_asymptotic=0.0,
-                        n_sig_dig=3,
+                        n_sig_dig=n_sig_dig,
                     )
                     for i_centroid in nonzero_centroids
                 ]
@@ -632,10 +645,12 @@ def local_return_period(
         min_impact=0,
         log_frequency=True,
         log_impact=True,
+        n_sig_dig=3,
     ):
         """Compute local return periods for given threshold impacts. The default method
         is fitting the ordered impacts per centroid to the corresponding cummulated
-        frequency with linear interpolation on log-log scale.
+        frequency with linear interpolation on log-log scale. Impacts are binned according
+        to their n_sig_dig significant digits, see Notes.
 
         Parameters
         ----------
@@ -665,6 +680,9 @@ def local_return_period(
             This parameter is only used if method is set to "interpolate". If set to True,
             impact values are converted to log scale before inter- and extrapolation.
             Defaults to True.
+        n_sig_dig : int, optional
+            Number of significant digits for the binning of the impact values, see Notes.
+            Defaults to 3.
 
         Returns
         -------
@@ -677,6 +695,14 @@ def local_return_period(
             GeoDataFrame label, for reporting and plotting
         column_label : function
             Column-label-generating function, for reporting and plotting
+
+        Notes
+        -------
+        Contrary to Impact.calc_freq_curve(), impacts are binned according to their n_sig_dig
+        significant digits. This results in a coarser (and smoother) interpolation, and a
+        more stable extrapolation. To not bin the values, please use, e.g., n_sig_dig=7. For more
+        information about the binning, see docstring of
+        climada.util.interpolation.preprocess_and_interpolate_ev().
         """
 
         LOGGER.info("Computing return period map for impacts: %s", threshold_impact)
@@ -718,7 +744,7 @@ def local_return_period(
                         value_threshold=min_impact,
                         method=method,
                         y_asymptotic=np.nan,
-                        n_sig_dig=3,
+                        n_sig_dig=n_sig_dig,
                     )
                     for i_centroid in nonzero_centroids
                 ]
diff --git a/climada/hazard/base.py b/climada/hazard/base.py
@@ -491,10 +491,12 @@ def local_exceedance_intensity(
         min_intensity=None,
         log_frequency=True,
         log_intensity=True,
+        n_sig_dig=3,
     ):
         """Compute local exceedance intensity for given return periods. The default method
         is fitting the ordered intensitites per centroid to the corresponding cummulated
-        frequency with linear interpolation on log-log scale.
+        frequency with linear interpolation on log-log scale. Intensities are binned according
+        to their n_sig_dig significant digits, see Notes.
 
         Parameters
         ----------
@@ -524,6 +526,9 @@ def local_exceedance_intensity(
             This parameter is only used if method is set to "interpolate". If set to True,
             intensity values are converted to log scale before inter- and extrapolation.
             Defaults to True.
+        n_sig_dig : int, optional
+            Number of significant digits for the binning of the intensity values, see Notes.
+            Defaults to 3.
 
         Returns
         -------
@@ -536,6 +541,14 @@ def local_exceedance_intensity(
             GeoDataFrame label, for reporting and plotting
         column_label : function
             Column-label-generating function, for reporting and plotting
+
+        Notes
+        -------
+        Contrary to Impact.calc_freq_curve(), intensities are binned according to their n_sig_dig
+        significant digits. This results in a coarser (and smoother) interpolation, and a
+        more stable extrapolation. To not bin the values, please use, e.g., n_sig_dig=7. For more
+        information about the binning, see docstring of
+        climada.util.interpolation.preprocess_and_interpolate_ev().
         """
         if not min_intensity and min_intensity != 0:
             min_intensity = self.intensity_thres
@@ -575,7 +588,7 @@ def local_exceedance_intensity(
                         value_threshold=min_intensity,
                         method=method,
                         y_asymptotic=0.0,
-                        n_sig_dig=3,
+                        n_sig_dig=n_sig_dig,
                     )
                     for i_centroid in nonzero_centroids
                 ]
@@ -623,10 +636,12 @@ def local_return_period(
         min_intensity=None,
         log_frequency=True,
         log_intensity=True,
+        n_sig_dig=3,
     ):
         """Compute local return periods for given hazard intensities. The default method
         is fitting the ordered intensitites per centroid to the corresponding cummulated
-        frequency with linear interpolation on log-log scale.
+        frequency with linear interpolation on log-log scale. Intensities are binned according
+        to their n_sig_dig significant digits, see Notes.
 
         Parameters
         ----------
@@ -657,6 +672,9 @@ def local_return_period(
             This parameter is only used if method is set to "interpolate". If set to True,
             intensity values are converted to log scale before inter- and extrapolation.
             Defaults to True.
+        n_sig_dig : int, optional
+            Number of significant digits for the binning of the intensity values, see Notes.
+            Defaults to 3.
 
         Returns
         -------
@@ -669,6 +687,14 @@ def local_return_period(
             GeoDataFrame label, for reporting and plotting
         column_label : function
             Column-label-generating function, for reporting and plotting
+
+        Notes
+        -------
+        Contrary to Impact.calc_freq_curve(), intensities are binned according to their n_sig_dig
+        significant digits. This results in a coarser (and smoother) interpolation, and a
+        more stable extrapolation. To not bin the values, please use, e.g., n_sig_dig=7. For more
+        information about the binning, see docstring of
+        climada.util.interpolation.preprocess_and_interpolate_ev().
         """
         if not min_intensity and min_intensity != 0:
             min_intensity = self.intensity_thres
@@ -705,7 +731,7 @@ def local_return_period(
                         value_threshold=min_intensity,
                         method=method,
                         y_asymptotic=np.nan,
-                        n_sig_dig=3,
+                        n_sig_dig=n_sig_dig,
                     )
                     for i_centroid in nonzero_centroids
                 ]
diff --git a/climada/util/interpolation.py b/climada/util/interpolation.py
@@ -39,15 +39,18 @@ def preprocess_and_interpolate_ev(
     y_asymptotic=np.nan,
     n_sig_dig=3,
 ):
-    """Wrapper function to first preprocess (frequency, values) data and and then inter- and
-    extrapolate to test frequencies or test values.
+    """Function to first preprocess (frequency, values) data by binning the data according to
+    their value with the given number of significant digits (see Notes), compute the cumulative
+    frequencies, and then inter- and extrapolate either to test frequencies or to test values.
 
     Parameters
     ----------
     test_frequency : array_like
         1-D array of test frequencies for which values (e.g., intensities or impacts) should be assigned.
+        If given, test_values must be None.
     test_values : array_like
         1-D array of test values (e.g., intensities or impacts) for which frequencies should be assigned.
+        If given, test_frequency must be None.
     frequency : array_like
         1-D array of frequencies to be interpolated.
     values : array_like
@@ -72,7 +75,7 @@ def preprocess_and_interpolate_ev(
         for test x values larger than given x values, if size < 2 or if method is set
         to "extrapolate_constant" or "stepfunction". Defaults to np.nan.
     n_sig_dig : int, optional
-        number of significant digits to group the values (in order to avoid bad extrapolation behaviour). Defaults to 3.
+        Number of significant digits to group and bin the values, see Notes. Defaults to 3.
 
     Returns
     -------
@@ -84,13 +87,25 @@ def preprocess_and_interpolate_ev(
     ------
     ValueError
         If both test frequencies and test values are given or none of them.
+
+    Notes
+    -------
+    Before inter- and extrapolation, the values are binned according to their n_sig_dig
+    significant digits, and their corresponding frequencies are summed. For instance, if
+    n_sig_dig=3, the two values 12.01 and 11.97 with corresponding frequencies 0.1 and 0.2 are
+    combined to a value 12.0 with frequency 0.3. This binning leads to a coarser (and smoother)
+    interpolation, and a more stable extrapolation. To not bin the values, you can use a large
+    n_sig_dig, e.g., n_sig_dig=7.
     """
 
     # check that only test frequencies or only test values are given
     if test_frequency is not None and test_values is not None:
-        raise ValueError("Both test frequencies and test values are given.")
+        raise ValueError(
+            "Both test frequencies and test values are given. "
+            "To use this method, please only use one of them."
+        )
     elif test_frequency is None and test_values is None:
-        raise ValueError("No test values or frequencies are given.")
+        raise ValueError("No test values or test frequencies are given.")
 
     # sort values and frequencies
     sorted_idxs = np.argsort(values)
@@ -213,7 +228,10 @@ def interpolate_ev(
         fill_value = "extrapolate"
     elif extrapolation == "extrapolate_constant":
         if not all(sorted(x_train) == x_train):
-            raise ValueError("x_train array must be sorted in ascending order.")
+            raise ValueError(
+                "x_train array must be sorted in ascending order. This might be due to floating "
+                "point errors in the rounding process of `group_frequency()`."
+            )
         fill_value = (y_train[0], np.log10(y_asymptotic) if logy else y_asymptotic)
     else:
         fill_value = np.nan
@@ -268,7 +286,10 @@ def stepfunction_ev(
 
     # find indices of x_test if sorted into x_train
     if not all(sorted(x_train) == x_train):
-        raise ValueError("Input array x_train must be sorted in ascending order.")
+        raise ValueError(
+            "Input array x_train must be sorted in ascending order. This might be due to "
+            "floating point errors in the rounding process of `group_frequency()`."
+        )
     indx = np.searchsorted(x_train, x_test)
     y_test = y_train[indx.clip(max=len(x_train) - 1)]
     y_test[indx == len(x_train)] = y_asymptotic
@@ -365,13 +386,19 @@ def group_frequency(frequency, value, n_sig_dig):
 
     if value_unique.size != frequency.size:
         if not all(sorted(start_indices) == start_indices):
-            raise ValueError("Value array must be sorted in ascending order.")
+            raise ValueError(
+                "Value array must be sorted in ascending order. This might be due to floating "
+                "point errors in the rounding process of `round_to_sig_digits()`."
+            )
         # add frequency for equal value
         start_indices = np.insert(start_indices, value_unique.size, frequency.size)
         frequency = np.add.reduceat(frequency, start_indices[:-1])
         return frequency, value_unique
     elif not all(sorted(value) == value):
-        raise ValueError("Value array must be sorted in ascending order!")
+        raise ValueError(
+            "Value array must be sorted in ascending order. This might be due to floating point "
+            "errors in the rounding process of `round_to_sig_digits()`."
+        )
 
     return frequency, value