Skip to content

Commit b0f3a14

Browse files
Merge pull request #930 from CLIMADA-project/feature/interp_util_func
New interpolation util functionality
2 parents 2736e62 + 118ac29 commit b0f3a14

File tree

3 files changed

+439
-1
lines changed

3 files changed

+439
-1
lines changed

CHANGELOG.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,8 @@ Code freeze date: YYYY-MM-DD
1111
### Dependency Changes
1212

1313
### Added
14-
14+
- `climada.util.interpolation` module for inter- and extrapolation util functions used in local exceedance intensity and return period functions [#930](https://github.com/CLIMADA-project/climada_python/pull/930)
15+
1516
### Changed
1617

1718
- In `climada.util.plot.geo_im_from_array`, NaNs are plotted in gray while cells with no centroid are not plotted [#929](https://github.com/CLIMADA-project/climada_python/pull/929)

climada/util/interpolation.py

Lines changed: 254 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,254 @@
1+
"""
2+
This file is part of CLIMADA.
3+
4+
Copyright (C) 2017 ETH Zurich, CLIMADA contributors listed in AUTHORS.
5+
6+
CLIMADA is free software: you can redistribute it and/or modify it under the
7+
terms of the GNU General Public License as published by the Free
8+
Software Foundation, version 3.
9+
10+
CLIMADA is distributed in the hope that it will be useful, but WITHOUT ANY
11+
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
12+
PARTICULAR PURPOSE. See the GNU General Public License for more details.
13+
14+
You should have received a copy of the GNU General Public License along
15+
with CLIMADA. If not, see <https://www.gnu.org/licenses/>.
16+
17+
---
18+
19+
Define interpolation and extrapolation functions for calculating (local) exceedance frequencies and return periods
20+
"""
21+
22+
23+
import logging
24+
25+
import numpy as np
26+
from scipy import interpolate
27+
28+
from climada.util.value_representation import sig_dig_list
29+
30+
LOGGER = logging.getLogger(__name__)
31+
32+
def interpolate_ev(
33+
x_test,
34+
x_train,
35+
y_train,
36+
logx = False,
37+
logy = False,
38+
x_threshold = None,
39+
y_threshold = None,
40+
extrapolation = False,
41+
y_asymptotic = np.nan
42+
):
43+
"""
44+
Util function to interpolate (and extrapolate) training data (x_train, y_train)
45+
to new points x_test with several options (log scale, thresholds)
46+
47+
Parameters:
48+
-------
49+
x_test : array_like
50+
1-D array of x-values for which training data should be interpolated
51+
x_train : array_like
52+
1-D array of x-values of training data
53+
y_train : array_like
54+
1-D array of y-values of training data
55+
logx : bool, optional
56+
If set to True, x_values are converted to log scale. Defaults to False.
57+
logy : bool, optional
58+
If set to True, y_values are converted to log scale. Defaults to False.
59+
x_threshold : float, optional
60+
Lower threshold to filter x_train. Defaults to None.
61+
y_threshold : float, optional
62+
Lower threshold to filter y_train. Defaults to None.
63+
extrapolation : bool, optional
64+
If set to True, values will be extrapolated. If set to False, x_test values
65+
smaller than x_train will be assigned y_train[0] (x_train must be sorted in
66+
ascending order), and x_test values larger than x_train will be assigned
67+
y_asymptotic. Defaults to False
68+
y_asymptotic : float, optional
69+
Return value and if extrapolation is True or x_train.size < 2, for x_test
70+
values larger than x_train. Defaults to np.nan.
71+
72+
Returns
73+
-------
74+
np.array
75+
interpolated values y_test for the test points x_test
76+
"""
77+
78+
# preprocess interpolation data
79+
x_test, x_train, y_train = _preprocess_interpolation_data(
80+
x_test, x_train, y_train, logx, logy, x_threshold, y_threshold
81+
)
82+
83+
# handle case of small training data sizes
84+
if x_train.size < 2:
85+
LOGGER.warning('Data is being extrapolated.')
86+
return _interpolate_small_input(x_test, x_train, y_train, logy, y_asymptotic)
87+
88+
# calculate fill values
89+
if extrapolation:
90+
fill_value = 'extrapolate'
91+
if np.min(x_test) < np.min(x_train) or np.max(x_test) > np.max(x_train):
92+
LOGGER.warning('Data is being extrapolated.')
93+
else:
94+
if not all(sorted(x_train) == x_train):
95+
raise ValueError('x_train array must be sorted in ascending order.')
96+
fill_value = (y_train[0], np.log10(y_asymptotic) if logy else y_asymptotic)
97+
98+
interpolation = interpolate.interp1d(
99+
x_train, y_train, fill_value=fill_value, bounds_error=False)
100+
y_test = interpolation(x_test)
101+
102+
# adapt output scale
103+
if logy:
104+
y_test = np.power(10., y_test)
105+
return y_test
106+
107+
def stepfunction_ev(
108+
x_test,
109+
x_train,
110+
y_train,
111+
x_threshold = None,
112+
y_threshold = None,
113+
y_asymptotic = np.nan
114+
):
115+
"""
116+
Util function to interpolate and extrapolate training data (x_train, y_train)
117+
to new points x_test using a step function
118+
119+
Parameters:
120+
-------
121+
x_test : array_like
122+
1-D array of x-values for which training data should be interpolated
123+
x_train : array_like
124+
1-D array of x-values of training data
125+
y_train : array_like
126+
1-D array of y-values of training data
127+
x_threshold : float, optional
128+
Lower threshold to filter x_train. Defaults to None.
129+
y_threshold : float, optional
130+
Lower threshold to filter y_train. Defaults to None.
131+
y_asymptotic : float, optional
132+
Return value if x_test > x_train. Defaults to np.nan.
133+
134+
Returns
135+
-------
136+
np.array
137+
interpolated values y_test for the test points x_test
138+
"""
139+
140+
# preprocess interpolation data
141+
x_test, x_train, y_train = _preprocess_interpolation_data(
142+
x_test, x_train, y_train, None, None, x_threshold, y_threshold
143+
)
144+
145+
# handle case of small training data sizes
146+
if x_train.size < 2:
147+
return _interpolate_small_input(x_test, x_train, y_train, None, y_asymptotic)
148+
149+
# find indices of x_test if sorted into x_train
150+
if not all(sorted(x_train) == x_train):
151+
raise ValueError('Input array x_train must be sorted in ascending order.')
152+
indx = np.searchsorted(x_train, x_test)
153+
y_test = y_train[indx.clip(max = len(x_train) - 1)]
154+
y_test[indx == len(x_train)] = y_asymptotic
155+
156+
return y_test
157+
158+
def _preprocess_interpolation_data(
159+
x_test,
160+
x_train,
161+
y_train,
162+
logx,
163+
logy,
164+
x_threshold,
165+
y_threshold
166+
):
167+
"""
168+
helper function to preprocess interpolation training and test data by filtering data below
169+
thresholds and converting to log scale if required
170+
"""
171+
172+
if x_train.shape != y_train.shape:
173+
raise ValueError(f'Incompatible shapes of input data, x_train {x_train.shape} '
174+
f'and y_train {y_train.shape}. Should be the same')
175+
176+
# transform input to float arrays
177+
x_test, x_train, y_train = (np.array(x_test).astype(float),
178+
np.array(x_train).astype(float),
179+
np.array(y_train).astype(float))
180+
181+
# cut x and y above threshold
182+
if x_threshold or x_threshold==0:
183+
x_th = np.asarray(x_train > x_threshold).squeeze()
184+
x_train = x_train[x_th]
185+
y_train = y_train[x_th]
186+
187+
if y_threshold or y_threshold==0:
188+
y_th = np.asarray(y_train > y_threshold).squeeze()
189+
x_train = x_train[y_th]
190+
y_train = y_train[y_th]
191+
192+
# convert to log scale
193+
if logx:
194+
x_train, x_test = np.log10(x_train), np.log10(x_test)
195+
if logy:
196+
y_train = np.log10(y_train)
197+
198+
return (x_test, x_train, y_train)
199+
200+
def _interpolate_small_input(x_test, x_train, y_train, logy, y_asymptotic):
201+
"""
202+
helper function to handle if interpolation data is small (empty or one point)
203+
"""
204+
# return y_asymptotic if x_train and y_train empty
205+
if x_train.size == 0:
206+
return np.full_like(x_test, y_asymptotic)
207+
208+
# reconvert logarithmic y_train to original y_train
209+
if logy:
210+
y_train = np.power(10., y_train)
211+
212+
# if only one (x_train, y_train), return stepfunction with
213+
# y_train if x_test < x_train and y_asymtotic if x_test > x_train
214+
y_test = np.full_like(x_test, y_train[0])
215+
y_test[np.squeeze(x_test) > np.squeeze(x_train)] = y_asymptotic
216+
return y_test
217+
218+
def group_frequency(frequency, value, n_sig_dig=2):
219+
"""
220+
Util function to aggregate (add) frequencies for equal values
221+
222+
Parameters:
223+
------
224+
frequency : array_like
225+
Frequency array
226+
value : array_like
227+
Value array in ascending order
228+
n_sig_dig : int
229+
number of significant digits for value when grouping frequency.
230+
Defaults to 2.
231+
232+
Returns:
233+
------
234+
tuple
235+
(frequency array after aggregation,
236+
unique value array in ascending order)
237+
"""
238+
frequency, value = np.array(frequency), np.array(value)
239+
if frequency.size == 0 and value.size == 0:
240+
return ([], [])
241+
242+
if len(value) != len(np.unique(sig_dig_list(value, n_sig_dig=n_sig_dig))):
243+
#check ordering of value
244+
if not all(sorted(value) == value):
245+
raise ValueError('Value array must be sorted in ascending order.')
246+
# add frequency for equal value
247+
value, start_indices = np.unique(
248+
sig_dig_list(value, n_sig_dig=n_sig_dig), return_index=True)
249+
start_indices = np.insert(start_indices, len(value), len(frequency))
250+
frequency = np.array([
251+
sum(frequency[start_indices[i]:start_indices[i+1]])
252+
for i in range(len(value))
253+
])
254+
return frequency, value

0 commit comments

Comments
 (0)