change to decimals in binning

ValentinGebhart · ValentinGebhart · commit 463367e58609 · 2025-03-12T17:07:25.000+01:00
diff --git a/climada/engine/impact.py b/climada/engine/impact.py
@@ -498,7 +498,7 @@ def local_exceedance_impact(
         min_impact=0,
         log_frequency=True,
         log_impact=True,
-        n_sig_dig=3,
+        bin_decimals=None,
     ):
         """Compute local exceedance impact for given return periods. The default method
         is fitting the ordered impacts per centroid to the corresponding cummulated
@@ -607,7 +607,7 @@ def local_exceedance_impact(
                         value_threshold=min_impact,
                         method=method,
                         y_asymptotic=0.0,
-                        n_sig_dig=n_sig_dig,
+                        bin_decimals=bin_decimals,
                     )
                     for i_centroid in nonzero_centroids
                 ]
@@ -622,9 +622,11 @@ def local_exceedance_impact(
         gdf[col_names] = exceedance_impact
         # create label and column_label
         label = f"Impact ({self.unit})"
-        column_label = lambda column_names: [
-            f"Return Period: {col} {return_period_unit}" for col in column_names
-        ]
+
+        def column_label(column_names):
+            return [
+                f"Return Period: {col} {return_period_unit}" for col in column_names
+            ]
 
         return gdf, label, column_label
 
@@ -650,7 +652,7 @@ def local_return_period(
         min_impact=0,
         log_frequency=True,
         log_impact=True,
-        n_sig_dig=3,
+        bin_decimals=None,
     ):
         """Compute local return periods for given threshold impacts. The default method
         is fitting the ordered impacts per centroid to the corresponding cummulated
@@ -754,7 +756,7 @@ def local_return_period(
                         value_threshold=min_impact,
                         method=method,
                         y_asymptotic=np.nan,
-                        n_sig_dig=n_sig_dig,
+                        bin_decimals=bin_decimals,
                     )
                     for i_centroid in nonzero_centroids
                 ]
@@ -771,9 +773,11 @@ def local_return_period(
 
         # create label and column_label
         label = f"Return Periods ({return_period_unit})"
-        column_label = lambda column_names: [
-            f"Impact: {col} {self.unit}" for col in column_names
-        ]
+
+        def column_label(column_names):
+            return [
+                f"Return Period: {col} {return_period_unit}" for col in column_names
+            ]
 
         return gdf, label, column_label
 
@@ -1207,8 +1211,8 @@ def plot_rp_imp(
         """
 
         LOGGER.info(
-            "Some errors in the previous calculation of local exceedance impacts have been corrected,"
-            " see Impact.local_exceedance_impact. To reproduce data with the "
+            "Some errors in the previous calculation of local exceedance impacts have been "
+            "corrected, see Impact.local_exceedance_impact. To reproduce data with the "
             "previous calculation, use CLIMADA v5.0.0 or less."
         )
 
diff --git a/climada/hazard/base.py b/climada/hazard/base.py
@@ -491,7 +491,7 @@ def local_exceedance_intensity(
         min_intensity=None,
         log_frequency=True,
         log_intensity=True,
-        n_sig_dig=3,
+        bin_decimals=None,
     ):
         """Compute local exceedance intensity for given return periods. The default method
         is fitting the ordered intensitites per centroid to the corresponding cummulated
@@ -594,7 +594,7 @@ def local_exceedance_intensity(
                         value_threshold=min_intensity,
                         method=method,
                         y_asymptotic=0.0,
-                        n_sig_dig=n_sig_dig,
+                        bin_decimals=bin_decimals,
                     )
                     for i_centroid in nonzero_centroids
                 ]
@@ -609,9 +609,11 @@ def local_exceedance_intensity(
 
         # create label and column_label
         label = f"Intensity ({self.units})"
-        column_label = lambda column_names: [
-            f"Return Period: {col} {return_period_unit}" for col in column_names
-        ]
+
+        def column_label(column_names):
+            return [
+                f"Return Period: {col} {return_period_unit}" for col in column_names
+            ]
 
         return gdf, label, column_label
 
@@ -642,7 +644,7 @@ def local_return_period(
         min_intensity=None,
         log_frequency=True,
         log_intensity=True,
-        n_sig_dig=3,
+        bin_decimals=None,
     ):
         """Compute local return periods for given hazard intensities. The default method
         is fitting the ordered intensitites per centroid to the corresponding cummulated
@@ -742,7 +744,7 @@ def local_return_period(
                         value_threshold=min_intensity,
                         method=method,
                         y_asymptotic=np.nan,
-                        n_sig_dig=n_sig_dig,
+                        bin_decimals=bin_decimals,
                     )
                     for i_centroid in nonzero_centroids
                 ]
@@ -759,9 +761,11 @@ def local_return_period(
 
         # create label and column_label
         label = f"Return Periods ({return_period_unit})"
-        column_label = lambda column_names: [
-            f"Threshold Intensity: {col} {self.units}" for col in column_names
-        ]
+
+        def column_label(column_names):
+            return [
+                f"Return Period: {col} {return_period_unit}" for col in column_names
+            ]
 
         return gdf, label, column_label
 
diff --git a/climada/util/interpolation.py b/climada/util/interpolation.py
@@ -38,7 +38,7 @@ def preprocess_and_interpolate_ev(
     value_threshold=None,
     method="interpolate",
     y_asymptotic=np.nan,
-    n_sig_dig=3,
+    bin_decimals=None,
 ):
     """Function to first preprocess (frequency, values) data by binning the data according to
     their value with the given number of significant digits (see Notes), compute the cumulative
@@ -114,7 +114,8 @@ def preprocess_and_interpolate_ev(
     frequency = frequency[sorted_idxs]
 
     # group similar values together
-    frequency, values = _group_frequency(frequency, values, n_sig_dig)
+    if method == "extrapolate" and isinstance(bin_decimals, int):
+        frequency, values = _group_frequency(frequency, values, bin_decimals)
 
     # transform frequencies to cummulative frequencies
     frequency = np.cumsum(frequency[::-1])[::-1]
@@ -142,25 +143,24 @@ def preprocess_and_interpolate_ev(
         )
 
     # if test values are provided
-    else:
-        if method == "stepfunction":
-            return _stepfunction_ev(
-                test_values,
-                values,
-                frequency,
-                x_threshold=value_threshold,
-                y_asymptotic=y_asymptotic,
-            )
-        extrapolation = None if method == "interpolate" else method
-        return _interpolate_ev(
+    if method == "stepfunction":
+        return _stepfunction_ev(
             test_values,
             values,
             frequency,
-            logx=log_values,
-            logy=log_frequency,
             x_threshold=value_threshold,
-            extrapolation=extrapolation,
+            y_asymptotic=y_asymptotic,
         )
+    extrapolation = None if method == "interpolate" else method
+    return _interpolate_ev(
+        test_values,
+        values,
+        frequency,
+        logx=log_values,
+        logy=log_frequency,
+        x_threshold=value_threshold,
+        extrapolation=extrapolation,
+    )
 
 
 def _interpolate_ev(
@@ -346,7 +346,7 @@ def _interpolate_small_input(x_test, x_train, y_train, logy, y_asymptotic):
     return y_test
 
 
-def _group_frequency(frequency, value, n_sig_dig):
+def _group_frequency(frequency, value, bin_decimals):
     """
     Util function to aggregate (add) frequencies for equal values
 
@@ -370,14 +370,15 @@ def _group_frequency(frequency, value, n_sig_dig):
         return ([], [])
 
     # round values and group them
-    value = round_to_sig_digits(value, n_sig_dig)
+    value = np.around(value, decimals=bin_decimals)
     value_unique, start_indices = np.unique(value, return_index=True)
     if value_unique.size != frequency.size:
         if not all(sorted(start_indices) == start_indices):
             LOGGER.warning(
                 "After grouping values to significant digits, the value array is not sorted."
-                "The values are not binned. Please choose a larger value of n_sig_dig=%s.",
-                n_sig_dig,
+                "The values are not binned. This might be due to floating point error while "
+                "binning. Please choose a larger value of bin_decimals=%s.",
+                bin_decimals,
             )
             return frequency, value
 
@@ -387,24 +388,3 @@ def _group_frequency(frequency, value, n_sig_dig):
         return frequency, value_unique
 
     return frequency, value
-
-
-def round_to_sig_digits(values, n_sig_dig):
-    """round each element array to a number of significant digits
-
-    Parameters
-    ----------
-    values : array-like
-        values to be rounded
-    n_sig_dig : int
-        number of significant digits.
-
-    Returns
-    -------
-    np.array
-        rounded array
-    """
-
-    return np.vectorize(np.format_float_positional)(
-        values, precision=n_sig_dig, unique=False, fractional=False, trim="k"
-    ).astype(float)
diff --git a/climada/util/test/test_interpolation.py b/climada/util/test/test_interpolation.py
@@ -223,18 +223,15 @@ def test_frequency_group(self):
         frequency = np.ones(6)
         intensity = np.array([1.00001, 0.9998, 1.0, 2.0, 3.0, 3])
         np.testing.assert_allclose(
-            u_interp._group_frequency(frequency, intensity, n_sig_dig=3),
-            ([3, 1, 2], [1, 2, 3]),
+            u_interp._group_frequency(frequency, intensity),
+            (frequency, intensity),
         )
         np.testing.assert_allclose(
-            u_interp._group_frequency([], [], n_sig_dig=3), ([], [])
+            u_interp._group_frequency(frequency, intensity, bin_decimals=3),
+            ([3, 1, 2], [1, 2, 3]),
         )
-
-    def test_round_to_sig_digits(self):
-        array = [0.00111, 999.0, 55.5, 0.0, -1.001, -1.08]
         np.testing.assert_allclose(
-            u_interp.round_to_sig_digits(array, n_sig_dig=2),
-            [0.0011, 1000.0, 56, 0.0, -1.0, -1.1],
+            u_interp._group_frequency([], [], bin_decimals=3), ([], [])
         )
 
     def test_preprocess_and_interpolate_ev(self):