Skip to content

Commit 827ec99

Browse files
adapted docstrings for binning explanations and some error messages
1 parent 672051b commit 827ec99

File tree

3 files changed

+96
-17
lines changed

3 files changed

+96
-17
lines changed

climada/engine/impact.py

Lines changed: 30 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -498,10 +498,12 @@ def local_exceedance_impact(
498498
min_impact=0,
499499
log_frequency=True,
500500
log_impact=True,
501+
n_sig_dig=3,
501502
):
502503
"""Compute local exceedance impact for given return periods. The default method
503504
is fitting the ordered impacts per centroid to the corresponding cummulated
504-
frequency with linear interpolation on log-log scale.
505+
frequency with linear interpolation on log-log scale. Impacts are binned according
506+
to their n_sig_dig significant digits, see Notes.
505507
506508
Parameters
507509
----------
@@ -530,6 +532,9 @@ def local_exceedance_impact(
530532
This parameter is only used if method is set to "extrapolate" or "interpolate". If set
531533
to True, impact values are converted to log scale before inter- and extrapolation.
532534
Defaults to True.
535+
n_sig_dig : int, optional
536+
Number of significant digits for the binning of the impact values, see Notes.
537+
Defaults to 3.
533538
534539
Returns
535540
-------
@@ -542,6 +547,14 @@ def local_exceedance_impact(
542547
GeoDataFrame label, for reporting and plotting
543548
column_label : function
544549
Column-label-generating function, for reporting and plotting
550+
551+
Notes
552+
-------
553+
Contrary to Impact.calc_freq_curve(), impacts are binned according to their n_sig_dig
554+
significant digits. This results in a coarser (and smoother) interpolation, and a
555+
more stable extrapolation. To not bin the values, please use, e.g., n_sig_dig=7. For more
556+
information about the binning, see docstring of
557+
climada.util.interpolation.preprocess_and_interpolate_ev().
545558
"""
546559
LOGGER.info(
547560
"Computing exceedance impact map for return periods: %s", return_periods
@@ -589,7 +602,7 @@ def local_exceedance_impact(
589602
value_threshold=min_impact,
590603
method=method,
591604
y_asymptotic=0.0,
592-
n_sig_dig=3,
605+
n_sig_dig=n_sig_dig,
593606
)
594607
for i_centroid in nonzero_centroids
595608
]
@@ -632,10 +645,12 @@ def local_return_period(
632645
min_impact=0,
633646
log_frequency=True,
634647
log_impact=True,
648+
n_sig_dig=3,
635649
):
636650
"""Compute local return periods for given threshold impacts. The default method
637651
is fitting the ordered impacts per centroid to the corresponding cummulated
638-
frequency with linear interpolation on log-log scale.
652+
frequency with linear interpolation on log-log scale. Impacts are binned according
653+
to their n_sig_dig significant digits, see Notes.
639654
640655
Parameters
641656
----------
@@ -665,6 +680,9 @@ def local_return_period(
665680
This parameter is only used if method is set to "interpolate". If set to True,
666681
impact values are converted to log scale before inter- and extrapolation.
667682
Defaults to True.
683+
n_sig_dig : int, optional
684+
Number of significant digits for the binning of the impact values, see Notes.
685+
Defaults to 3.
668686
669687
Returns
670688
-------
@@ -677,6 +695,14 @@ def local_return_period(
677695
GeoDataFrame label, for reporting and plotting
678696
column_label : function
679697
Column-label-generating function, for reporting and plotting
698+
699+
Notes
700+
-------
701+
Contrary to Impact.calc_freq_curve(), impacts are binned according to their n_sig_dig
702+
significant digits. This results in a coarser (and smoother) interpolation, and a
703+
more stable extrapolation. To not bin the values, please use, e.g., n_sig_dig=7. For more
704+
information about the binning, see docstring of
705+
climada.util.interpolation.preprocess_and_interpolate_ev().
680706
"""
681707

682708
LOGGER.info("Computing return period map for impacts: %s", threshold_impact)
@@ -718,7 +744,7 @@ def local_return_period(
718744
value_threshold=min_impact,
719745
method=method,
720746
y_asymptotic=np.nan,
721-
n_sig_dig=3,
747+
n_sig_dig=n_sig_dig,
722748
)
723749
for i_centroid in nonzero_centroids
724750
]

climada/hazard/base.py

Lines changed: 30 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -491,10 +491,12 @@ def local_exceedance_intensity(
491491
min_intensity=None,
492492
log_frequency=True,
493493
log_intensity=True,
494+
n_sig_dig=3,
494495
):
495496
"""Compute local exceedance intensity for given return periods. The default method
496497
is fitting the ordered intensitites per centroid to the corresponding cummulated
497-
frequency with linear interpolation on log-log scale.
498+
frequency with linear interpolation on log-log scale. Intensities are binned according
499+
to their n_sig_dig significant digits, see Notes.
498500
499501
Parameters
500502
----------
@@ -524,6 +526,9 @@ def local_exceedance_intensity(
524526
This parameter is only used if method is set to "interpolate". If set to True,
525527
intensity values are converted to log scale before inter- and extrapolation.
526528
Defaults to True.
529+
n_sig_dig : int, optional
530+
Number of significant digits for the binning of the intensity values, see Notes.
531+
Defaults to 3.
527532
528533
Returns
529534
-------
@@ -536,6 +541,14 @@ def local_exceedance_intensity(
536541
GeoDataFrame label, for reporting and plotting
537542
column_label : function
538543
Column-label-generating function, for reporting and plotting
544+
545+
Notes
546+
-------
547+
Contrary to Impact.calc_freq_curve(), intensities are binned according to their n_sig_dig
548+
significant digits. This results in a coarser (and smoother) interpolation, and a
549+
more stable extrapolation. To not bin the values, please use, e.g., n_sig_dig=7. For more
550+
information about the binning, see docstring of
551+
climada.util.interpolation.preprocess_and_interpolate_ev().
539552
"""
540553
if not min_intensity and min_intensity != 0:
541554
min_intensity = self.intensity_thres
@@ -575,7 +588,7 @@ def local_exceedance_intensity(
575588
value_threshold=min_intensity,
576589
method=method,
577590
y_asymptotic=0.0,
578-
n_sig_dig=3,
591+
n_sig_dig=n_sig_dig,
579592
)
580593
for i_centroid in nonzero_centroids
581594
]
@@ -623,10 +636,12 @@ def local_return_period(
623636
min_intensity=None,
624637
log_frequency=True,
625638
log_intensity=True,
639+
n_sig_dig=3,
626640
):
627641
"""Compute local return periods for given hazard intensities. The default method
628642
is fitting the ordered intensitites per centroid to the corresponding cummulated
629-
frequency with linear interpolation on log-log scale.
643+
frequency with linear interpolation on log-log scale. Intensities are binned according
644+
to their n_sig_dig significant digits, see Notes.
630645
631646
Parameters
632647
----------
@@ -657,6 +672,9 @@ def local_return_period(
657672
This parameter is only used if method is set to "interpolate". If set to True,
658673
intensity values are converted to log scale before inter- and extrapolation.
659674
Defaults to True.
675+
n_sig_dig : int, optional
676+
Number of significant digits for the binning of the intensity values, see Notes.
677+
Defaults to 3.
660678
661679
Returns
662680
-------
@@ -669,6 +687,14 @@ def local_return_period(
669687
GeoDataFrame label, for reporting and plotting
670688
column_label : function
671689
Column-label-generating function, for reporting and plotting
690+
691+
Notes
692+
-------
693+
Contrary to Impact.calc_freq_curve(), intensities are binned according to their n_sig_dig
694+
significant digits. This results in a coarser (and smoother) interpolation, and a
695+
more stable extrapolation. To not bin the values, please use, e.g., n_sig_dig=7. For more
696+
information about the binning, see docstring of
697+
climada.util.interpolation.preprocess_and_interpolate_ev().
672698
"""
673699
if not min_intensity and min_intensity != 0:
674700
min_intensity = self.intensity_thres
@@ -705,7 +731,7 @@ def local_return_period(
705731
value_threshold=min_intensity,
706732
method=method,
707733
y_asymptotic=np.nan,
708-
n_sig_dig=3,
734+
n_sig_dig=n_sig_dig,
709735
)
710736
for i_centroid in nonzero_centroids
711737
]

climada/util/interpolation.py

Lines changed: 36 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -39,15 +39,18 @@ def preprocess_and_interpolate_ev(
3939
y_asymptotic=np.nan,
4040
n_sig_dig=3,
4141
):
42-
"""Wrapper function to first preprocess (frequency, values) data and and then inter- and
43-
extrapolate to test frequencies or test values.
42+
"""Function to first preprocess (frequency, values) data by binning the data according to
43+
their value with the given number of significant digits (see Notes), compute the cumulative
44+
frequencies, and then inter- and extrapolate either to test frequencies or to test values.
4445
4546
Parameters
4647
----------
4748
test_frequency : array_like
4849
1-D array of test frequencies for which values (e.g., intensities or impacts) should be assigned.
50+
If given, test_values must be None.
4951
test_values : array_like
5052
1-D array of test values (e.g., intensities or impacts) for which frequencies should be assigned.
53+
If given, test_frequency must be None.
5154
frequency : array_like
5255
1-D array of frequencies to be interpolated.
5356
values : array_like
@@ -72,7 +75,7 @@ def preprocess_and_interpolate_ev(
7275
for test x values larger than given x values, if size < 2 or if method is set
7376
to "extrapolate_constant" or "stepfunction". Defaults to np.nan.
7477
n_sig_dig : int, optional
75-
number of significant digits to group the values (in order to avoid bad extrapolation behaviour). Defaults to 3.
78+
Number of significant digits to group and bin the values, see Notes. Defaults to 3.
7679
7780
Returns
7881
-------
@@ -84,13 +87,25 @@ def preprocess_and_interpolate_ev(
8487
------
8588
ValueError
8689
If both test frequencies and test values are given or none of them.
90+
91+
Notes
92+
-------
93+
Before inter- and extrapolation, the values are binned according to their n_sig_dig
94+
significant digits, and their corresponding frequencies are summed. For instance, if
95+
n_sig_dig=3, the two values 12.01 and 11.97 with corresponding frequencies 0.1 and 0.2 are
96+
combined to a value 12.0 with frequency 0.3. This binning leads to a coarser (and smoother)
97+
interpolation, and a more stable extrapolation. To not bin the values, you can use a large
98+
n_sig_dig, e.g., n_sig_dig=7.
8799
"""
88100

89101
# check that only test frequencies or only test values are given
90102
if test_frequency is not None and test_values is not None:
91-
raise ValueError("Both test frequencies and test values are given.")
103+
raise ValueError(
104+
"Both test frequencies and test values are given. "
105+
"To use this method, please only use one of them."
106+
)
92107
elif test_frequency is None and test_values is None:
93-
raise ValueError("No test values or frequencies are given.")
108+
raise ValueError("No test values or test frequencies are given.")
94109

95110
# sort values and frequencies
96111
sorted_idxs = np.argsort(values)
@@ -213,7 +228,10 @@ def interpolate_ev(
213228
fill_value = "extrapolate"
214229
elif extrapolation == "extrapolate_constant":
215230
if not all(sorted(x_train) == x_train):
216-
raise ValueError("x_train array must be sorted in ascending order.")
231+
raise ValueError(
232+
"x_train array must be sorted in ascending order. This might be due to floating "
233+
"point errors in the rounding process of `group_frequency()`."
234+
)
217235
fill_value = (y_train[0], np.log10(y_asymptotic) if logy else y_asymptotic)
218236
else:
219237
fill_value = np.nan
@@ -268,7 +286,10 @@ def stepfunction_ev(
268286

269287
# find indices of x_test if sorted into x_train
270288
if not all(sorted(x_train) == x_train):
271-
raise ValueError("Input array x_train must be sorted in ascending order.")
289+
raise ValueError(
290+
"Input array x_train must be sorted in ascending order. This might be due to "
291+
"floating point errors in the rounding process of `group_frequency()`."
292+
)
272293
indx = np.searchsorted(x_train, x_test)
273294
y_test = y_train[indx.clip(max=len(x_train) - 1)]
274295
y_test[indx == len(x_train)] = y_asymptotic
@@ -365,13 +386,19 @@ def group_frequency(frequency, value, n_sig_dig):
365386

366387
if value_unique.size != frequency.size:
367388
if not all(sorted(start_indices) == start_indices):
368-
raise ValueError("Value array must be sorted in ascending order.")
389+
raise ValueError(
390+
"Value array must be sorted in ascending order. This might be due to floating "
391+
"point errors in the rounding process of `round_to_sig_digits()`."
392+
)
369393
# add frequency for equal value
370394
start_indices = np.insert(start_indices, value_unique.size, frequency.size)
371395
frequency = np.add.reduceat(frequency, start_indices[:-1])
372396
return frequency, value_unique
373397
elif not all(sorted(value) == value):
374-
raise ValueError("Value array must be sorted in ascending order!")
398+
raise ValueError(
399+
"Value array must be sorted in ascending order. This might be due to floating point "
400+
"errors in the rounding process of `round_to_sig_digits()`."
401+
)
375402

376403
return frequency, value
377404

0 commit comments

Comments
 (0)