immproved inefficiency in fct group_frequency

ValentinGebhart · ValentinGebhart · commit 9edac4d507e6 · 2024-10-04T09:06:31.000+02:00
diff --git a/climada/util/interpolation.py b/climada/util/interpolation.py
@@ -344,16 +344,36 @@ def group_frequency(frequency, value, n_sig_dig=2):
     if frequency.size == 0 and value.size == 0:
         return ([], [])
 
-    if len(value) != len(np.unique(sig_dig_list(value, n_sig_dig=n_sig_dig))):
-        #check ordering of value
-        if not all(sorted(value) == value):
+    # round values and group them
+    value = round_to_sig_digits(value, n_sig_dig)
+    value_unique, start_indices = np.unique(value, return_index=True)
+
+    if value_unique.size != frequency.size:
+        if not all(sorted(start_indices) == start_indices):
             raise ValueError('Value array must be sorted in ascending order.')
         # add frequency for equal value
-        value, start_indices = np.unique(
-            sig_dig_list(value, n_sig_dig=n_sig_dig), return_index=True)
-        start_indices = np.insert(start_indices, len(value), len(frequency))
-        frequency = np.array([
-            sum(frequency[start_indices[i]:start_indices[i+1]])
-            for i in range(len(value))
-        ])
+        start_indices = np.insert(start_indices, value_unique.size, frequency.size)
+        frequency = np.add.reduceat(frequency, start_indices[:-1])
+        return frequency, value_unique
+    
     return frequency, value
+
+def round_to_sig_digits(x, n_sig_dig):
+    """round each element array to a number of significant digits
+
+    Parameters
+    ----------
+    x : array-like
+        array to be rounded
+    n_sig_dig : int
+        number of significant digits.
+
+    Returns
+    -------
+    np.array
+        rounded array
+    """
+    x = np.asarray(x)
+    x_positive = np.where(np.isfinite(x) & (x != 0), np.abs(x), 10**(n_sig_dig-1))
+    mags = 10 ** (n_sig_dig - 1 - np.floor(np.log10(x_positive)))
+    return np.round(x * mags) / mags
diff --git a/climada/util/test/test_interpolation.py b/climada/util/test/test_interpolation.py
@@ -22,7 +22,7 @@
 import unittest
 import numpy as np
 
-from climada.util.interpolation import interpolate_ev, stepfunction_ev, group_frequency, preprocess_and_interpolate_ev
+import climada.util.interpolation as u_interp
 
 
 class TestFitMethods(unittest.TestCase):
@@ -34,15 +34,15 @@ def test_interpolate_ev_linear_interp(self):
         y_train = np.array([8., 4., 2.])
         x_test = np.array([0., 3., 4., 6.])
         np.testing.assert_allclose(
-            interpolate_ev(x_test, x_train, y_train),
+            u_interp.interpolate_ev(x_test, x_train, y_train),
             np.array([np.nan, 4., 3., np.nan])
         )
         np.testing.assert_allclose(
-            interpolate_ev(x_test, x_train, y_train, extrapolation='extrapolate_constant'),
+            u_interp.interpolate_ev(x_test, x_train, y_train, extrapolation='extrapolate_constant'),
             np.array([8., 4., 3., np.nan])
         )
         np.testing.assert_allclose(
-            interpolate_ev(x_test, x_train, y_train,
+            u_interp.interpolate_ev(x_test, x_train, y_train,
                         extrapolation='extrapolate_constant', y_asymptotic = 0),
             np.array([8., 4., 3., 0.])
         )
@@ -53,16 +53,16 @@ def test_interpolate_ev_threshold_parameters(self):
         y_train = np.array([4., 1., 4.])
         x_test = np.array([-1., 3., 4.])
         np.testing.assert_allclose(
-            interpolate_ev(x_test, x_train, y_train, extrapolation='extrapolate_constant'),
+            u_interp.interpolate_ev(x_test, x_train, y_train, extrapolation='extrapolate_constant'),
             np.array([4., 1., 2.])
         )
         np.testing.assert_allclose(
-            interpolate_ev(x_test, x_train, y_train, x_threshold=1.,
+            u_interp.interpolate_ev(x_test, x_train, y_train, x_threshold=1.,
                            extrapolation='extrapolate_constant'),
             np.array([1., 1., 2.])
         )
         np.testing.assert_allclose(
-            interpolate_ev(x_test, x_train, y_train, y_threshold=2.,
+            u_interp.interpolate_ev(x_test, x_train, y_train, y_threshold=2.,
                            extrapolation='extrapolate_constant'),
             np.array([4., 4., 4.])
         )
@@ -73,26 +73,26 @@ def test_interpolate_ev_scale_parameters(self):
         y_train = np.array([1., 3.])
         x_test = np.array([1e0, 1e2])
         np.testing.assert_allclose(
-            interpolate_ev(x_test, x_train, y_train, logx=True, extrapolation='extrapolate'),
+            u_interp.interpolate_ev(x_test, x_train, y_train, logx=True, extrapolation='extrapolate'),
             np.array([0., 2.])
         )
         np.testing.assert_allclose(
-            interpolate_ev(x_test, x_train, y_train, logx=True,
+            u_interp.interpolate_ev(x_test, x_train, y_train, logx=True,
                            extrapolation='extrapolate_constant'),
             np.array([1., 2.])
         )
         x_train = np.array([1., 3.])
         y_train = np.array([1e1, 1e3])
         x_test = np.array([0., 2.])
         np.testing.assert_allclose(
-            interpolate_ev(x_test, x_train, y_train, logy=True, extrapolation='extrapolate'),
+            u_interp.interpolate_ev(x_test, x_train, y_train, logy=True, extrapolation='extrapolate'),
             np.array([1e0, 1e2])
         )
         x_train = np.array([1e1, 1e3])
         y_train = np.array([1e1, 1e5])
         x_test = np.array([1e0, 1e2])
         np.testing.assert_allclose(
-            interpolate_ev(x_test, x_train, y_train, logx=True, logy=True,
+            u_interp.interpolate_ev(x_test, x_train, y_train, logx=True, logy=True,
                            extrapolation='extrapolate'),
             np.array([1e-1, 1e3])
         )
@@ -103,7 +103,7 @@ def test_interpolate_ev_degenerate_input(self):
         x_test = np.array([0., 2., 4.])
         y_train = np.zeros(3)
         np.testing.assert_allclose(
-            interpolate_ev(x_test, x_train, y_train),
+            u_interp.interpolate_ev(x_test, x_train, y_train),
             np.array([np.nan, 0., 0.])
         )
 
@@ -113,27 +113,27 @@ def test_interpolate_ev_small_input(self):
         y_train = np.array([2.])
         x_test = np.array([0., 1., 2.])
         np.testing.assert_allclose(
-            interpolate_ev(x_test, x_train, y_train, extrapolation='extrapolate'),
+            u_interp.interpolate_ev(x_test, x_train, y_train, extrapolation='extrapolate'),
             np.array([2., 2., np.nan])
         )
         np.testing.assert_allclose(
-            interpolate_ev(x_test, x_train, y_train, extrapolation='extrapolate', y_asymptotic=0),
+            u_interp.interpolate_ev(x_test, x_train, y_train, extrapolation='extrapolate', y_asymptotic=0),
             np.array([2., 2., 0.])
         )
         np.testing.assert_allclose(
-            interpolate_ev(x_test, x_train, y_train),
+            u_interp.interpolate_ev(x_test, x_train, y_train),
             np.full(3, np.nan)
         )
 
         x_train = np.array([])
         y_train = np.array([])
         x_test = np.array([0., 1., 2.])
         np.testing.assert_allclose(
-            interpolate_ev(x_test, x_train, y_train),
+            u_interp.interpolate_ev(x_test, x_train, y_train),
             np.full(3, np.nan)
         )
         np.testing.assert_allclose(
-            interpolate_ev(x_test, x_train, y_train,
+            u_interp.interpolate_ev(x_test, x_train, y_train,
                            extrapolation='extrapolate_constant', y_asymptotic=0),
             np.zeros(3)
         )
@@ -144,11 +144,11 @@ def test_stepfunction_ev(self):
         y_train = np.array([8., 4., 2.])
         x_test = np.array([0., 3., 4., 6.])
         np.testing.assert_allclose(
-            stepfunction_ev(x_test, x_train, y_train),
+            u_interp.stepfunction_ev(x_test, x_train, y_train),
             np.array([8., 4., 2., np.nan])
         )
         np.testing.assert_allclose(
-            stepfunction_ev(x_test, x_train, y_train, y_asymptotic=0.),
+            u_interp.stepfunction_ev(x_test, x_train, y_train, y_asymptotic=0.),
             np.array([8., 4., 2., 0.])
         )
 
@@ -158,39 +158,46 @@ def test_stepfunction_ev_small_input(self):
         y_train = np.array([2.])
         x_test = np.array([0., 1., 2.])
         np.testing.assert_allclose(
-            stepfunction_ev(x_test, x_train, y_train),
+            u_interp.stepfunction_ev(x_test, x_train, y_train),
             np.array([2., 2., np.nan])
         )
         np.testing.assert_allclose(
-            stepfunction_ev(x_test, x_train, y_train, y_asymptotic=0),
+            u_interp.stepfunction_ev(x_test, x_train, y_train, y_asymptotic=0),
             np.array([2., 2., 0.])
         )
         x_train = np.array([])
         y_train = np.array([])
         x_test = np.array([0., 1., 2.])
         np.testing.assert_allclose(
-            stepfunction_ev(x_test, x_train, y_train),
+            u_interp.stepfunction_ev(x_test, x_train, y_train),
             np.full(3, np.nan)
         )
         np.testing.assert_allclose(
-            stepfunction_ev(x_test, x_train, y_train, y_asymptotic=0),
+            u_interp.stepfunction_ev(x_test, x_train, y_train, y_asymptotic=0),
             np.zeros(3)
         )
     
     def test_frequency_group(self):
         """Test frequency grouping method"""
         frequency = np.ones(6)
-        intensity = np.array([1., 1., 1., 2., 3., 3])
+        intensity = np.array([1.00001, .999, 1., 2., 3., 3])
         np.testing.assert_allclose(
-            group_frequency(frequency, intensity), 
+            u_interp.group_frequency(frequency, intensity), 
             ([3, 1, 2], [1, 2, 3])
         )
         np.testing.assert_allclose(
-            group_frequency([], []), 
+            u_interp.group_frequency([], []), 
             ([], [])
         )
         with self.assertRaises(ValueError):
-            group_frequency(frequency, intensity[::-1])
+            u_interp.group_frequency(frequency, intensity[::-1])
+
+    def test_round_to_sig_digits(self):
+        array = [.00111, 999., 55.5, 0., -1.001, -1.08]
+        np.testing.assert_allclose(
+            u_interp.round_to_sig_digits(array, n_sig_dig=2), 
+            [.0011, 1000., 56, 0., -1., -1.1]
+        )
 
     def test_preprocess_and_interpolate_ev(self):
         """Test wrapper function"""
@@ -202,30 +209,30 @@ def test_preprocess_and_interpolate_ev(self):
         # test interpolation
         np.testing.assert_allclose(
             [np.nan, 55., np.nan],
-            preprocess_and_interpolate_ev(test_frequency, None, frequency, values)
+            u_interp.preprocess_and_interpolate_ev(test_frequency, None, frequency, values)
         )
         np.testing.assert_allclose(
             [np.nan, .55, np.nan],
-            preprocess_and_interpolate_ev(None, test_values, frequency, values)
+            u_interp.preprocess_and_interpolate_ev(None, test_values, frequency, values)
         )
 
         # test extrapolation with constants
         np.testing.assert_allclose(
             [100. , 55., 0.],
-            preprocess_and_interpolate_ev(test_frequency, None, frequency, values,
+            u_interp.preprocess_and_interpolate_ev(test_frequency, None, frequency, values,
                                           method='extrapolate_constant', y_asymptotic=0.)
         )
         np.testing.assert_allclose(
             [1., .55, np.nan],
-            preprocess_and_interpolate_ev(None, test_values, frequency, values,
+            u_interp.preprocess_and_interpolate_ev(None, test_values, frequency, values,
                                           method='extrapolate_constant')
         )
 
         # test error raising
         with self.assertRaises(ValueError):
-            preprocess_and_interpolate_ev(test_frequency, test_values, frequency, values)
+            u_interp.preprocess_and_interpolate_ev(test_frequency, test_values, frequency, values)
         with self.assertRaises(ValueError):
-            preprocess_and_interpolate_ev(None, None, frequency, values)
+            u_interp.preprocess_and_interpolate_ev(None, None, frequency, values)
 
 # Execute Tests
 if __name__ == "__main__":