|
1 | 1 | import numpy as np |
| 2 | +import scipy.linalg as la |
| 3 | +from dask import delayed |
| 4 | +from scipy.signal import hilbert |
| 5 | +import xarray as xr |
| 6 | +from dask.distributed import Client, LocalCluster |
2 | 7 |
|
3 | 8 | # functions |
4 | 9 | #========================================= |
@@ -121,3 +126,155 @@ def my_eof_interp(M,nmodes,errmin=1e-15,repmax=None): |
121 | 126 | vi = (M.T+Mmean).T |
122 | 127 |
|
123 | 128 | return vi |
| 129 | + |
| 130 | +#========================================= |
| 131 | +# PERFORM COMPLEX EOF |
| 132 | +#========================================= |
| 133 | +def ceof(lon, lat, data, nkp = 10, parallel = True): |
| 134 | + ''' Complex (Hilbert) EOF to detect propagating features: waves, meanders, etc. |
| 135 | + Note: the mean field in each coordinate is subtracted within the function. |
| 136 | + Do not subtract the time-mean field before inputing. |
| 137 | + NaN values are removed in the algorithm. |
| 138 | + The user can input the data as it is. |
| 139 | + |
| 140 | + First written in MATLAB and found in Prof. Daniel J. Vimont webpage |
| 141 | + (https://www.aos.wisc.edu/~dvimont/matlab/Stat_Tools/complex_eof.html) |
| 142 | + ============================================================================== |
| 143 | + INPUT: |
| 144 | + lon = longitudes (array) |
| 145 | + lat = latitude (array) |
| 146 | + data = original data set [time, lat, lon] |
| 147 | + nkp = number of modes to return (default = 10) |
| 148 | + parallel = create a standard client kernel for parallel computing |
| 149 | + [switch parallel to False, in case you created your own client] |
| 150 | +
|
| 151 | + OUTPUT: |
| 152 | + The variables below return inside a DataArray. |
| 153 | + per = percent variance explained (real eigenvalues) |
| 154 | + modes = first nkp complex loadings or eigenvectors [lat, lon, nkp] |
| 155 | + SpAmp = spatial amplitude [lat, lon, nkp] |
| 156 | + SpPhase = spatial phase [lat, lon, nkp] |
| 157 | + pcs = first nkp complex principal components or amplitudes [time, nkp] |
| 158 | + TAmp = temporal amplitude [time, nkp] |
| 159 | + TPhase = temporal phase [time, nkp] |
| 160 | + ============================================================================== |
| 161 | + ''' |
| 162 | + # Configure client for parallel computing |
| 163 | + if parallel: |
| 164 | + cluster = LocalCluster() |
| 165 | + client = Client(cluster) |
| 166 | + |
| 167 | + # Organizing the data as time vs space |
| 168 | + data_ceof = _org_data_ceof(lon, lat, data) |
| 169 | + # We need to remove the mean field (i.e., the trend) in each coordinate to |
| 170 | + # evaluate the variability |
| 171 | + data_ceof = data_ceof - data_ceof.mean('time') |
| 172 | + |
| 173 | + # The variables below are useful later |
| 174 | + load_real = np.zeros([data_ceof.shape[1], nkp])*np.nan |
| 175 | + load_imag = np.zeros([data_ceof.shape[1], nkp])*np.nan |
| 176 | + # It is necessary to remove the nan values of the matrix to solve the eigenvalue problem |
| 177 | + nan_values = np.isnan(data_ceof[0,:]) # We can just look at each coordinate along a single time |
| 178 | + data_ceof = data_ceof[:,~nan_values] # Then, we remove all these coordinates in all of the occurences |
| 179 | + |
| 180 | + ntim, npt = data_ceof.shape |
| 181 | + |
| 182 | + # Hilbert transform: input sequence x and returns a complex result of the same length |
| 183 | + print('1: Performing Hilbert transform') |
| 184 | + data_hilbert = hilbert(data_ceof) |
| 185 | + # Compute the covariance matrix in the Hilbert transform |
| 186 | + print('2: Computing covariance matrix') |
| 187 | + c = delayed(np.dot)(data_hilbert.conjugate().T, data_hilbert).compute()/ntim |
| 188 | + print('3: Solving the eigenvalue problem') |
| 189 | + lamda, loadings = delayed(la.eig)(c).compute() # lamda: eigenvalue, loadings: eigenvectors |
| 190 | + |
| 191 | + l = lamda.conjugate().T; k = np.argsort(l) |
| 192 | + lamda, loadings = np.flip(l[k]), np.fliplr(loadings[:,k]) |
| 193 | + loadings = loadings[:,:nkp] |
| 194 | + # In case there were nan values in the orginal data, we need to perform the approach below: |
| 195 | + load_real[~nan_values,:] = loadings.real.copy() |
| 196 | + load_imag[~nan_values,:] = loadings.imag.copy() |
| 197 | + load = load_real + 1j*load_imag |
| 198 | + modes = load.reshape((len(lat),len(lon), nkp)) |
| 199 | + |
| 200 | + per = lamda.real*100/np.sum(lamda.real) |
| 201 | + per = per[:nkp].copy() |
| 202 | + pcs = np.dot(data_hilbert,loadings) |
| 203 | + |
| 204 | + sp_amp, sp_phase, t_amp, t_phase = _amplitude_phase(load, pcs) |
| 205 | + sp_amp = sp_amp.reshape((len(lat),len(lon), nkp)) |
| 206 | + sp_phase = sp_phase.reshape((len(lat),len(lon), nkp)) |
| 207 | + |
| 208 | + print('Done! \U0001F600') |
| 209 | + |
| 210 | + dims = ["lat", "lon", "nkp", "time"] |
| 211 | + ds = xr.Dataset({"per":(dims[2], per),"modes":(dims[:-1], modes),"SpAmp":(dims[:-1], sp_amp), |
| 212 | + "SpPhase":(dims[:-1], sp_phase),"pcs":(dims[-2:][::-1], pcs),"TAmp":(dims[-2:][::-1], t_amp), |
| 213 | + "TPhase":(dims[-2:][::-1], t_phase)}, |
| 214 | + coords={"lat":(dims[0], lat), "lon":(dims[1], lon), "nkp":(dims[2], np.arange(nkp)), |
| 215 | + "time":(dims[3], np.arange(len(data_ceof)))}) |
| 216 | + |
| 217 | + return ds |
| 218 | + |
| 219 | +def _org_data_ceof(lon, lat, data): |
| 220 | + dims = ["time", "lat", "lon"] |
| 221 | + datxarray = xr.Dataset({"data_latlon": (dims, data)}, |
| 222 | + coords={'lat':(dims[1], lat), 'lon':(dims[2], lon)}) |
| 223 | + data_ceof = datxarray.stack(lat_lon=("lat", "lon")).data_latlon |
| 224 | + return data_ceof |
| 225 | + |
| 226 | +def _amplitude_phase(evecs, amp): |
| 227 | + ''' Complex (Hilbert) EOF |
| 228 | + First written in MATLAB and found in the webpage below |
| 229 | + (https://www.jsg.utexas.edu/fu/files/GEO391-W11-CEOF.pdf) |
| 230 | + |
| 231 | + =========================================================================== |
| 232 | + INPUT: |
| 233 | + evecs = first nkp complex loadings or eigenvectors [lat, lon, nkp] |
| 234 | + amp = first nkp complex principal components or amplitudes [time, nkp] |
| 235 | +
|
| 236 | + OUTPUT: |
| 237 | + SpAmp = spatial amplitude [lat, lon, nkp] |
| 238 | + SpPhase = spatial phase [lat, lon, nkp] |
| 239 | + TAmp = temporal amplitude [time, nkp] |
| 240 | + TPhase = temporal phase [time, nkp] |
| 241 | + =========================================================================== |
| 242 | + ''' |
| 243 | + # Spatial amplitude |
| 244 | + SpAmp = pow(np.multiply(evecs, np.conj(evecs)),0.5) |
| 245 | + theta = np.arctan2(evecs.imag, evecs.real) |
| 246 | + # Spatial phase |
| 247 | + SpPhase = np.divide(np.multiply(theta, 180), np.pi) |
| 248 | + |
| 249 | + # Temporal amplitude |
| 250 | + TAmp = pow(np.multiply(amp, np.conj(amp)), 0.5) |
| 251 | + # Temporal phase |
| 252 | + phit = np.arctan2(amp.imag, amp.real) |
| 253 | + TPhase = np.divide(np.multiply(phit, 180), np.pi) |
| 254 | + |
| 255 | + return SpAmp, SpPhase, TAmp, TPhase |
| 256 | + |
| 257 | +def reconstruct_ceof(DataMean, amp, modes, n, day): |
| 258 | + ''' Reconstrucion of daily CEOF modes individually similar to Majumder et al. (2019). |
| 259 | + Here, the mean field in each coordinate is added within the function. |
| 260 | + Besides, each mode is reconstructed individually, instead of computing the sum of the |
| 261 | + reconstruction of different modes. |
| 262 | + |
| 263 | + ========================================================================================= |
| 264 | + INPUT: |
| 265 | + DataMean = time-mean of the original data [lat, lon] (e.g., np.nanmean(data,axis=0)) |
| 266 | + amp = principal components or amplitudes [time, nkp] |
| 267 | + mode = eigenvectors or loadings [lat, lon, nkp] |
| 268 | + n = mode of variability to be reconstructed |
| 269 | + day = day to be reconstructed. |
| 270 | +
|
| 271 | + OUTPUT: |
| 272 | + RecCEOF = reconstruction of a CEOF mode on a chosen day [lat, lon] |
| 273 | + ========================================================================================= |
| 274 | + ''' |
| 275 | + |
| 276 | + # Majumder et al (2019) compute the reconstructed CEOF field as the real part of the multiplication between |
| 277 | + # the coefficient of expansion (i.e., amplitude) and the complex conjugate of the loading (i.e., mode) |
| 278 | + Rec_ceof = amp[day,n]*np.conj(modes[:,:,n]) |
| 279 | + RecCEOF = Rec_ceof.real + DataMean |
| 280 | + return RecCEOF |
0 commit comments