Skip to content

Commit 25209c2

Browse files
added new util module and tests
1 parent e2d1c7a commit 25209c2

File tree

2 files changed

+457
-0
lines changed

2 files changed

+457
-0
lines changed

climada/util/interpolation.py

Lines changed: 270 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,270 @@
1+
"""
2+
This file is part of CLIMADA.
3+
4+
Copyright (C) 2017 ETH Zurich, CLIMADA contributors listed in AUTHORS.
5+
6+
CLIMADA is free software: you can redistribute it and/or modify it under the
7+
terms of the GNU General Public License as published by the Free
8+
Software Foundation, version 3.
9+
10+
CLIMADA is distributed in the hope that it will be useful, but WITHOUT ANY
11+
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
12+
PARTICULAR PURPOSE. See the GNU General Public License for more details.
13+
14+
You should have received a copy of the GNU General Public License along
15+
with CLIMADA. If not, see <https://www.gnu.org/licenses/>.
16+
17+
---
18+
19+
define fit functionalities for (local) exceedance frequencies and return periods
20+
"""
21+
22+
23+
import logging
24+
25+
import numpy as np
26+
from scipy import interpolate
27+
28+
from climada.util.value_representation import sig_dig_list
29+
30+
LOGGER = logging.getLogger(__name__)
31+
32+
def interpolate_ev(
33+
x_test,
34+
x_train,
35+
y_train,
36+
x_scale = None,
37+
y_scale = None,
38+
x_threshold = None,
39+
y_threshold = None,
40+
y_asymptotic = np.nan,
41+
fill_value = np.nan
42+
):
43+
"""
44+
Util function to interpolate (and extrapolate) training data (x_train, y_train)
45+
to new points x_test with several options (log scale, thresholds)
46+
47+
Parameters:
48+
-------
49+
x_test : array_like
50+
1-D array of x-values for which training data should be interpolated
51+
x_train : array_like
52+
1-D array of x-values of training data
53+
y_train : array_like
54+
1-D array of y-values of training data
55+
x_scale : str, optional
56+
If set to 'log', x_values are convert to log scale. Defaults to None.
57+
y_scale : str, optional
58+
If set to 'log', x_values are convert to log scale. Defaults to None.
59+
x_threshold : float, optional
60+
Lower threshold to filter x_train. Defaults to None.
61+
y_threshold : float, optional
62+
Lower threshold to filter y_train. Defaults to None.
63+
y_asymptotic : float, optional
64+
Return value if x_test > x_train and if
65+
x_train.size < 2. Defaults to np.nan.
66+
fill_value : tuple, float, str
67+
fill values to use when x_test outside of range of x_train.
68+
If set to "extrapolate", values will be extrapolated. If set to a float, value will
69+
be used on both sides. If set to tuple, left value will be used for left side and
70+
right value will be used for right side. If tuple and left value is "maximum", the maximum
71+
of the cummulative frequencies will be used to compute exceedance intensities on the left.
72+
Defaults to np.nan
73+
74+
Returns
75+
-------
76+
np.array
77+
interpolated values y_test for the test points x_test
78+
"""
79+
80+
# # check if inputs are valid
81+
# if method not in ['interpolate', 'stepfunction']:
82+
# raise ValueError(f'Unknown method: {method}. Use "interpolate" or "stepfunction" instead')
83+
# if method == 'stepfunction': # x_scale and y_scale unnecessary if fitting stepfunction
84+
# x_scale, y_scale = None, None
85+
86+
# preprocess interpolation data
87+
x_test, x_train, y_train = _preprocess_interpolation_data(
88+
x_test, x_train, y_train, x_scale, y_scale, x_threshold, y_threshold
89+
)
90+
91+
# handle case of small training data sizes
92+
if x_train.size < 2:
93+
LOGGER.warning('Data is being extrapolated.')
94+
return _interpolate_small_input(x_test, x_train, y_train, y_scale, y_asymptotic)
95+
96+
# calculate fill values
97+
if isinstance(fill_value, tuple):
98+
if fill_value[0] == 'maximum':
99+
fill_value = (
100+
np.max(y_train),
101+
np.log10(fill_value[1]) if y_scale == 'log' else fill_value[1]
102+
)
103+
elif y_scale == 'log':
104+
fill_value = tuple(np.log10(fill_value))
105+
elif isinstance(fill_value, (float, int)) and y_scale == 'log':
106+
fill_value = np.log10(fill_value)
107+
108+
109+
# warn if data is being extrapolated
110+
if (
111+
fill_value == 'extrapolate' and
112+
((np.min(x_test) < np.min(x_train)) or (np.max(x_test) > np.max(x_train)))
113+
):
114+
LOGGER.warning('Data is being extrapolated.')
115+
116+
interpolation = interpolate.interp1d(x_train, y_train, fill_value=fill_value, bounds_error=False)
117+
y_test = interpolation(x_test)
118+
119+
# adapt output scale
120+
if y_scale == 'log':
121+
y_test = np.power(10., y_test)
122+
return y_test
123+
124+
def stepfunction_ev(
125+
x_test,
126+
x_train,
127+
y_train,
128+
x_threshold = None,
129+
y_threshold = None,
130+
y_asymptotic = np.nan
131+
):
132+
"""
133+
Util function to interpolate and extrapolate training data (x_train, y_train)
134+
to new points x_test using a step function
135+
136+
Parameters:
137+
-------
138+
x_test : array_like
139+
1-D array of x-values for which training data should be interpolated
140+
x_train : array_like
141+
1-D array of x-values of training data
142+
y_train : array_like
143+
1-D array of y-values of training data
144+
x_threshold : float, optional
145+
Lower threshold to filter x_train. Defaults to None.
146+
y_threshold : float, optional
147+
Lower threshold to filter y_train. Defaults to None.
148+
y_asymptotic : float, optional
149+
Return value if x_test > x_train. Defaults to np.nan.
150+
151+
Returns
152+
-------
153+
np.array
154+
interpolated values y_test for the test points x_test
155+
"""
156+
157+
# preprocess interpolation data
158+
x_test, x_train, y_train = _preprocess_interpolation_data(
159+
x_test, x_train, y_train, None, None, x_threshold, y_threshold
160+
)
161+
162+
# handle case of small training data sizes
163+
if x_train.size < 2:
164+
return _interpolate_small_input(x_test, x_train, y_train, None, y_asymptotic)
165+
166+
# find indeces of x_test if sorted into x_train
167+
if not all(sorted(x_train) == x_train):
168+
raise ValueError('Input array x_train must be sorted in ascending order.')
169+
indx = np.searchsorted(x_train, x_test)
170+
y_test = y_train[indx.clip(max = len(x_train) - 1)]
171+
y_test[indx == len(x_train)] = y_asymptotic
172+
173+
return y_test
174+
175+
def _preprocess_interpolation_data(
176+
x_test,
177+
x_train,
178+
y_train,
179+
x_scale,
180+
y_scale,
181+
x_threshold,
182+
y_threshold
183+
):
184+
"""
185+
helper function to preprocess interpolation training and test data by filtering data below
186+
thresholds and converting to log scale if required
187+
"""
188+
189+
if x_train.shape != y_train.shape:
190+
raise ValueError(f'Incompatible shapes of input data, x_train {x_train.shape} '
191+
f'and y_train {y_train.shape}. Should be the same')
192+
193+
# transform input to float arrays
194+
x_test, x_train, y_train = (np.array(x_test).astype(float),
195+
np.array(x_train).astype(float),
196+
np.array(y_train).astype(float))
197+
198+
# cut x and y above threshold
199+
if x_threshold or x_threshold==0:
200+
x_th = np.asarray(x_train > x_threshold).squeeze()
201+
x_train = x_train[x_th]
202+
y_train = y_train[x_th]
203+
204+
if y_threshold or y_threshold==0:
205+
y_th = np.asarray(y_train > y_threshold).squeeze()
206+
x_train = x_train[y_th]
207+
y_train = y_train[y_th]
208+
209+
# convert to log scale
210+
if x_scale == 'log':
211+
x_train, x_test = np.log10(x_train), np.log10(x_test)
212+
if y_scale == 'log':
213+
y_train = np.log10(y_train)
214+
215+
return (x_test, x_train, y_train)
216+
217+
def _interpolate_small_input(x_test, x_train, y_train, y_scale, y_asymptotic):
218+
"""
219+
helper function to handle if interpolation data is small (empty or one point)
220+
"""
221+
# return y_asymptotic if x_train and y_train empty
222+
if x_train.size == 0:
223+
return np.full_like(x_test, y_asymptotic)
224+
225+
# reconvert logarithmic y_scale to normal y_train
226+
if y_scale == 'log':
227+
y_train = np.power(10., y_train)
228+
229+
# if only one (x_train, y_train), return stepfunction with
230+
# y_train if x_test < x_train and y_asymtotic if x_test > x_train
231+
y_test = np.full_like(x_test, y_train[0])
232+
y_test[np.squeeze(x_test) > np.squeeze(x_train)] = y_asymptotic
233+
return y_test
234+
235+
def group_frequency(frequency, value, n_sig_dig=2):
236+
"""
237+
Util function to aggregate (add) frequencies for equal values
238+
239+
Parameters:
240+
------
241+
frequency : array_like
242+
Frequency array
243+
value : array_like
244+
Value array in ascending order
245+
n_sig_dig : int
246+
number of significant digits for value when grouping frequency.
247+
Defaults to 2.
248+
249+
Returns:
250+
------
251+
tuple
252+
(frequency array after aggregation,
253+
unique value array in ascending order)
254+
"""
255+
frequency, value = np.array(frequency), np.array(value)
256+
if frequency.size == 0 and value.size == 0:
257+
return ([], [])
258+
259+
if len(value) != len(np.unique(sig_dig_list(value, n_sig_dig=n_sig_dig))):
260+
#check ordering of value
261+
if not all(sorted(value) == value):
262+
raise ValueError('Value array must be sorted in ascending order.')
263+
# add frequency for equal value
264+
value, start_indices = np.unique(sig_dig_list(value, n_sig_dig=n_sig_dig), return_index=True)
265+
start_indices = np.insert(start_indices, len(value), len(frequency))
266+
frequency = np.array([
267+
sum(frequency[start_indices[i]:start_indices[i+1]])
268+
for i in range(len(value))
269+
])
270+
return frequency, value

0 commit comments

Comments
 (0)