@@ -39,15 +39,18 @@ def preprocess_and_interpolate_ev(
3939 y_asymptotic = np .nan ,
4040 n_sig_dig = 3 ,
4141):
42- """Wrapper function to first preprocess (frequency, values) data and and then inter- and
43- extrapolate to test frequencies or test values.
42+ """Function to first preprocess (frequency, values) data by binning the data according to
43+ their value with the given number of significant digits (see Notes), compute the cumulative
44+ frequencies, and then inter- and extrapolate either to test frequencies or to test values.
4445
4546 Parameters
4647 ----------
4748 test_frequency : array_like
4849 1-D array of test frequencies for which values (e.g., intensities or impacts) should be assigned.
50+ If given, test_values must be None.
4951 test_values : array_like
5052 1-D array of test values (e.g., intensities or impacts) for which frequencies should be assigned.
53+ If given, test_frequency must be None.
5154 frequency : array_like
5255 1-D array of frequencies to be interpolated.
5356 values : array_like
@@ -72,7 +75,7 @@ def preprocess_and_interpolate_ev(
7275 for test x values larger than given x values, if size < 2 or if method is set
7376 to "extrapolate_constant" or "stepfunction". Defaults to np.nan.
7477 n_sig_dig : int, optional
75- number of significant digits to group the values (in order to avoid bad extrapolation behaviour) . Defaults to 3.
78+ Number of significant digits to group and bin the values, see Notes . Defaults to 3.
7679
7780 Returns
7881 -------
@@ -84,13 +87,25 @@ def preprocess_and_interpolate_ev(
8487 ------
8588 ValueError
8689 If both test frequencies and test values are given or none of them.
90+
91+ Notes
92+ -------
93+ Before inter- and extrapolation, the values are binned according to their n_sig_dig
94+ significant digits, and their corresponding frequencies are summed. For instance, if
95+ n_sig_dig=3, the two values 12.01 and 11.97 with corresponding frequencies 0.1 and 0.2 are
96+ combined to a value 12.0 with frequency 0.3. This binning leads to a coarser (and smoother)
97+ interpolation, and a more stable extrapolation. To not bin the values, you can use a large
98+ n_sig_dig, e.g., n_sig_dig=7.
8799 """
88100
89101 # check that only test frequencies or only test values are given
90102 if test_frequency is not None and test_values is not None :
91- raise ValueError ("Both test frequencies and test values are given." )
103+ raise ValueError (
104+ "Both test frequencies and test values are given. "
105+ "To use this method, please only use one of them."
106+ )
92107 elif test_frequency is None and test_values is None :
93- raise ValueError ("No test values or frequencies are given." )
108+ raise ValueError ("No test values or test frequencies are given." )
94109
95110 # sort values and frequencies
96111 sorted_idxs = np .argsort (values )
@@ -213,7 +228,10 @@ def interpolate_ev(
213228 fill_value = "extrapolate"
214229 elif extrapolation == "extrapolate_constant" :
215230 if not all (sorted (x_train ) == x_train ):
216- raise ValueError ("x_train array must be sorted in ascending order." )
231+ raise ValueError (
232+ "x_train array must be sorted in ascending order. This might be due to floating "
233+ "point errors in the rounding process of `group_frequency()`."
234+ )
217235 fill_value = (y_train [0 ], np .log10 (y_asymptotic ) if logy else y_asymptotic )
218236 else :
219237 fill_value = np .nan
@@ -268,7 +286,10 @@ def stepfunction_ev(
268286
269287 # find indices of x_test if sorted into x_train
270288 if not all (sorted (x_train ) == x_train ):
271- raise ValueError ("Input array x_train must be sorted in ascending order." )
289+ raise ValueError (
290+ "Input array x_train must be sorted in ascending order. This might be due to "
291+ "floating point errors in the rounding process of `group_frequency()`."
292+ )
272293 indx = np .searchsorted (x_train , x_test )
273294 y_test = y_train [indx .clip (max = len (x_train ) - 1 )]
274295 y_test [indx == len (x_train )] = y_asymptotic
@@ -365,13 +386,19 @@ def group_frequency(frequency, value, n_sig_dig):
365386
366387 if value_unique .size != frequency .size :
367388 if not all (sorted (start_indices ) == start_indices ):
368- raise ValueError ("Value array must be sorted in ascending order." )
389+ raise ValueError (
390+ "Value array must be sorted in ascending order. This might be due to floating "
391+ "point errors in the rounding process of `round_to_sig_digits()`."
392+ )
369393 # add frequency for equal value
370394 start_indices = np .insert (start_indices , value_unique .size , frequency .size )
371395 frequency = np .add .reduceat (frequency , start_indices [:- 1 ])
372396 return frequency , value_unique
373397 elif not all (sorted (value ) == value ):
374- raise ValueError ("Value array must be sorted in ascending order!" )
398+ raise ValueError (
399+ "Value array must be sorted in ascending order. This might be due to floating point "
400+ "errors in the rounding process of `round_to_sig_digits()`."
401+ )
375402
376403 return frequency , value
377404
0 commit comments