|
| 1 | +""" |
| 2 | +This file is part of CLIMADA. |
| 3 | +
|
| 4 | +Copyright (C) 2017 ETH Zurich, CLIMADA contributors listed in AUTHORS. |
| 5 | +
|
| 6 | +CLIMADA is free software: you can redistribute it and/or modify it under the |
| 7 | +terms of the GNU General Public License as published by the Free |
| 8 | +Software Foundation, version 3. |
| 9 | +
|
| 10 | +CLIMADA is distributed in the hope that it will be useful, but WITHOUT ANY |
| 11 | +WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A |
| 12 | +PARTICULAR PURPOSE. See the GNU General Public License for more details. |
| 13 | +
|
| 14 | +You should have received a copy of the GNU General Public License along |
| 15 | +with CLIMADA. If not, see <https://www.gnu.org/licenses/>. |
| 16 | +
|
| 17 | +--- |
| 18 | +
|
| 19 | +Define interpolation and extrapolation functions for calculating (local) exceedance frequencies and return periods |
| 20 | +""" |
| 21 | + |
| 22 | + |
| 23 | +import logging |
| 24 | + |
| 25 | +import numpy as np |
| 26 | +from scipy import interpolate |
| 27 | + |
| 28 | +from climada.util.value_representation import sig_dig_list |
| 29 | + |
| 30 | +LOGGER = logging.getLogger(__name__) |
| 31 | + |
| 32 | +def interpolate_ev( |
| 33 | + x_test, |
| 34 | + x_train, |
| 35 | + y_train, |
| 36 | + logx = False, |
| 37 | + logy = False, |
| 38 | + x_threshold = None, |
| 39 | + y_threshold = None, |
| 40 | + extrapolation = False, |
| 41 | + y_asymptotic = np.nan |
| 42 | + ): |
| 43 | + """ |
| 44 | + Util function to interpolate (and extrapolate) training data (x_train, y_train) |
| 45 | + to new points x_test with several options (log scale, thresholds) |
| 46 | +
|
| 47 | + Parameters: |
| 48 | + ------- |
| 49 | + x_test : array_like |
| 50 | + 1-D array of x-values for which training data should be interpolated |
| 51 | + x_train : array_like |
| 52 | + 1-D array of x-values of training data |
| 53 | + y_train : array_like |
| 54 | + 1-D array of y-values of training data |
| 55 | + logx : bool, optional |
| 56 | + If set to True, x_values are converted to log scale. Defaults to False. |
| 57 | + logy : bool, optional |
| 58 | + If set to True, y_values are converted to log scale. Defaults to False. |
| 59 | + x_threshold : float, optional |
| 60 | + Lower threshold to filter x_train. Defaults to None. |
| 61 | + y_threshold : float, optional |
| 62 | + Lower threshold to filter y_train. Defaults to None. |
| 63 | + extrapolation : bool, optional |
| 64 | + If set to True, values will be extrapolated. If set to False, x_test values |
| 65 | + smaller than x_train will be assigned y_train[0] (x_train must be sorted in |
| 66 | + ascending order), and x_test values larger than x_train will be assigned |
| 67 | + y_asymptotic. Defaults to False |
| 68 | + y_asymptotic : float, optional |
| 69 | + Return value and if extrapolation is True or x_train.size < 2, for x_test |
| 70 | + values larger than x_train. Defaults to np.nan. |
| 71 | +
|
| 72 | + Returns |
| 73 | + ------- |
| 74 | + np.array |
| 75 | + interpolated values y_test for the test points x_test |
| 76 | + """ |
| 77 | + |
| 78 | + # preprocess interpolation data |
| 79 | + x_test, x_train, y_train = _preprocess_interpolation_data( |
| 80 | + x_test, x_train, y_train, logx, logy, x_threshold, y_threshold |
| 81 | + ) |
| 82 | + |
| 83 | + # handle case of small training data sizes |
| 84 | + if x_train.size < 2: |
| 85 | + LOGGER.warning('Data is being extrapolated.') |
| 86 | + return _interpolate_small_input(x_test, x_train, y_train, logy, y_asymptotic) |
| 87 | + |
| 88 | + # calculate fill values |
| 89 | + if extrapolation: |
| 90 | + fill_value = 'extrapolate' |
| 91 | + if np.min(x_test) < np.min(x_train) or np.max(x_test) > np.max(x_train): |
| 92 | + LOGGER.warning('Data is being extrapolated.') |
| 93 | + else: |
| 94 | + if not all(sorted(x_train) == x_train): |
| 95 | + raise ValueError('x_train array must be sorted in ascending order.') |
| 96 | + fill_value = (y_train[0], np.log10(y_asymptotic) if logy else y_asymptotic) |
| 97 | + |
| 98 | + interpolation = interpolate.interp1d( |
| 99 | + x_train, y_train, fill_value=fill_value, bounds_error=False) |
| 100 | + y_test = interpolation(x_test) |
| 101 | + |
| 102 | + # adapt output scale |
| 103 | + if logy: |
| 104 | + y_test = np.power(10., y_test) |
| 105 | + return y_test |
| 106 | + |
| 107 | +def stepfunction_ev( |
| 108 | + x_test, |
| 109 | + x_train, |
| 110 | + y_train, |
| 111 | + x_threshold = None, |
| 112 | + y_threshold = None, |
| 113 | + y_asymptotic = np.nan |
| 114 | + ): |
| 115 | + """ |
| 116 | + Util function to interpolate and extrapolate training data (x_train, y_train) |
| 117 | + to new points x_test using a step function |
| 118 | +
|
| 119 | + Parameters: |
| 120 | + ------- |
| 121 | + x_test : array_like |
| 122 | + 1-D array of x-values for which training data should be interpolated |
| 123 | + x_train : array_like |
| 124 | + 1-D array of x-values of training data |
| 125 | + y_train : array_like |
| 126 | + 1-D array of y-values of training data |
| 127 | + x_threshold : float, optional |
| 128 | + Lower threshold to filter x_train. Defaults to None. |
| 129 | + y_threshold : float, optional |
| 130 | + Lower threshold to filter y_train. Defaults to None. |
| 131 | + y_asymptotic : float, optional |
| 132 | + Return value if x_test > x_train. Defaults to np.nan. |
| 133 | +
|
| 134 | + Returns |
| 135 | + ------- |
| 136 | + np.array |
| 137 | + interpolated values y_test for the test points x_test |
| 138 | + """ |
| 139 | + |
| 140 | + # preprocess interpolation data |
| 141 | + x_test, x_train, y_train = _preprocess_interpolation_data( |
| 142 | + x_test, x_train, y_train, None, None, x_threshold, y_threshold |
| 143 | + ) |
| 144 | + |
| 145 | + # handle case of small training data sizes |
| 146 | + if x_train.size < 2: |
| 147 | + return _interpolate_small_input(x_test, x_train, y_train, None, y_asymptotic) |
| 148 | + |
| 149 | + # find indices of x_test if sorted into x_train |
| 150 | + if not all(sorted(x_train) == x_train): |
| 151 | + raise ValueError('Input array x_train must be sorted in ascending order.') |
| 152 | + indx = np.searchsorted(x_train, x_test) |
| 153 | + y_test = y_train[indx.clip(max = len(x_train) - 1)] |
| 154 | + y_test[indx == len(x_train)] = y_asymptotic |
| 155 | + |
| 156 | + return y_test |
| 157 | + |
| 158 | +def _preprocess_interpolation_data( |
| 159 | + x_test, |
| 160 | + x_train, |
| 161 | + y_train, |
| 162 | + logx, |
| 163 | + logy, |
| 164 | + x_threshold, |
| 165 | + y_threshold |
| 166 | + ): |
| 167 | + """ |
| 168 | + helper function to preprocess interpolation training and test data by filtering data below |
| 169 | + thresholds and converting to log scale if required |
| 170 | + """ |
| 171 | + |
| 172 | + if x_train.shape != y_train.shape: |
| 173 | + raise ValueError(f'Incompatible shapes of input data, x_train {x_train.shape} ' |
| 174 | + f'and y_train {y_train.shape}. Should be the same') |
| 175 | + |
| 176 | + # transform input to float arrays |
| 177 | + x_test, x_train, y_train = (np.array(x_test).astype(float), |
| 178 | + np.array(x_train).astype(float), |
| 179 | + np.array(y_train).astype(float)) |
| 180 | + |
| 181 | + # cut x and y above threshold |
| 182 | + if x_threshold or x_threshold==0: |
| 183 | + x_th = np.asarray(x_train > x_threshold).squeeze() |
| 184 | + x_train = x_train[x_th] |
| 185 | + y_train = y_train[x_th] |
| 186 | + |
| 187 | + if y_threshold or y_threshold==0: |
| 188 | + y_th = np.asarray(y_train > y_threshold).squeeze() |
| 189 | + x_train = x_train[y_th] |
| 190 | + y_train = y_train[y_th] |
| 191 | + |
| 192 | + # convert to log scale |
| 193 | + if logx: |
| 194 | + x_train, x_test = np.log10(x_train), np.log10(x_test) |
| 195 | + if logy: |
| 196 | + y_train = np.log10(y_train) |
| 197 | + |
| 198 | + return (x_test, x_train, y_train) |
| 199 | + |
| 200 | +def _interpolate_small_input(x_test, x_train, y_train, logy, y_asymptotic): |
| 201 | + """ |
| 202 | + helper function to handle if interpolation data is small (empty or one point) |
| 203 | + """ |
| 204 | + # return y_asymptotic if x_train and y_train empty |
| 205 | + if x_train.size == 0: |
| 206 | + return np.full_like(x_test, y_asymptotic) |
| 207 | + |
| 208 | + # reconvert logarithmic y_train to original y_train |
| 209 | + if logy: |
| 210 | + y_train = np.power(10., y_train) |
| 211 | + |
| 212 | + # if only one (x_train, y_train), return stepfunction with |
| 213 | + # y_train if x_test < x_train and y_asymtotic if x_test > x_train |
| 214 | + y_test = np.full_like(x_test, y_train[0]) |
| 215 | + y_test[np.squeeze(x_test) > np.squeeze(x_train)] = y_asymptotic |
| 216 | + return y_test |
| 217 | + |
| 218 | +def group_frequency(frequency, value, n_sig_dig=2): |
| 219 | + """ |
| 220 | + Util function to aggregate (add) frequencies for equal values |
| 221 | +
|
| 222 | + Parameters: |
| 223 | + ------ |
| 224 | + frequency : array_like |
| 225 | + Frequency array |
| 226 | + value : array_like |
| 227 | + Value array in ascending order |
| 228 | + n_sig_dig : int |
| 229 | + number of significant digits for value when grouping frequency. |
| 230 | + Defaults to 2. |
| 231 | +
|
| 232 | + Returns: |
| 233 | + ------ |
| 234 | + tuple |
| 235 | + (frequency array after aggregation, |
| 236 | + unique value array in ascending order) |
| 237 | + """ |
| 238 | + frequency, value = np.array(frequency), np.array(value) |
| 239 | + if frequency.size == 0 and value.size == 0: |
| 240 | + return ([], []) |
| 241 | + |
| 242 | + if len(value) != len(np.unique(sig_dig_list(value, n_sig_dig=n_sig_dig))): |
| 243 | + #check ordering of value |
| 244 | + if not all(sorted(value) == value): |
| 245 | + raise ValueError('Value array must be sorted in ascending order.') |
| 246 | + # add frequency for equal value |
| 247 | + value, start_indices = np.unique( |
| 248 | + sig_dig_list(value, n_sig_dig=n_sig_dig), return_index=True) |
| 249 | + start_indices = np.insert(start_indices, len(value), len(frequency)) |
| 250 | + frequency = np.array([ |
| 251 | + sum(frequency[start_indices[i]:start_indices[i+1]]) |
| 252 | + for i in range(len(value)) |
| 253 | + ]) |
| 254 | + return frequency, value |
0 commit comments