Merge pull request #9 from OceanLabPy/complex_eof

vsilvafelipe · web-flow · commit 0e080958d560 · 2021-10-05T12:44:16.000-03:00
Complex eof
diff --git a/OceanLab/__init.py__ b/OceanLab/__init.py__
@@ -1,5 +1,5 @@
 from .dyn import zeta, vmode_amp, psi2uv, eqmodes, vmodes
-from .eof  import my_eof_interp, eoft
+from .eof  import my_eof_interp, eoft, ceof, reconstruct_ceof
 from .oa  import scaloa, vectoa
 from .utils import argdistnear, meaneddy
 
diff --git a/OceanLab/eof.py b/OceanLab/eof.py
@@ -1,4 +1,9 @@
 import numpy as np
+import scipy.linalg as la
+from dask import delayed
+from scipy.signal import hilbert
+import xarray as xr
+from dask.distributed import Client, LocalCluster
 
 # functions
 #=========================================
@@ -121,3 +126,155 @@ def my_eof_interp(M,nmodes,errmin=1e-15,repmax=None):
     vi = (M.T+Mmean).T
 
     return vi
+
+#=========================================
+# PERFORM COMPLEX EOF
+#=========================================
+def ceof(lon, lat, data, nkp = 10, parallel = True):
+    ''' Complex (Hilbert) EOF to detect propagating features: waves, meanders, etc.
+    Note: the mean field in each coordinate is subtracted within the function.
+    Do not subtract the time-mean field before inputing.
+    NaN values are removed in the algorithm. 
+    The user can input the data as it is.
+    
+    First written in MATLAB and found in Prof. Daniel J. Vimont webpage 
+    (https://www.aos.wisc.edu/~dvimont/matlab/Stat_Tools/complex_eof.html)
+    ==============================================================================
+    INPUT:
+       lon      = longitudes (array)
+       lat      = latitude (array)
+       data     = original data set [time, lat, lon]
+       nkp      = number of modes to return (default = 10)
+       parallel = create a standard client kernel for parallel computing
+                  [switch parallel to False, in case you created your own client]
+
+    OUTPUT:
+       The variables below return inside a DataArray.
+       per      = percent variance explained (real eigenvalues)
+       modes    = first nkp complex loadings or eigenvectors [lat, lon, nkp]
+       SpAmp    = spatial amplitude [lat, lon, nkp]
+       SpPhase  = spatial phase [lat, lon, nkp]
+       pcs      = first nkp complex principal components or amplitudes [time, nkp]
+       TAmp     = temporal amplitude [time, nkp]
+       TPhase   = temporal phase [time, nkp]
+    ==============================================================================
+    ''' 
+    # Configure client for parallel computing
+    if parallel:
+        cluster = LocalCluster()
+        client  = Client(cluster)
+    
+    # Organizing the data as time vs space
+    data_ceof = _org_data_ceof(lon, lat, data)
+    # We need to remove the mean field (i.e., the trend) in each coordinate to 
+    # evaluate the variability 
+    data_ceof = data_ceof - data_ceof.mean('time')
+    
+    # The variables below are useful later
+    load_real = np.zeros([data_ceof.shape[1], nkp])*np.nan
+    load_imag = np.zeros([data_ceof.shape[1], nkp])*np.nan
+    # It is necessary to remove the nan values of the matrix to solve the eigenvalue problem
+    nan_values = np.isnan(data_ceof[0,:]) # We can just look at each coordinate along a single time
+    data_ceof = data_ceof[:,~nan_values]  # Then, we remove all these coordinates in all of the occurences
+    
+    ntim, npt = data_ceof.shape
+    
+    # Hilbert transform: input sequence x and returns a complex result of the same length
+    print('1: Performing Hilbert transform')
+    data_hilbert = hilbert(data_ceof)
+    # Compute the covariance matrix in the Hilbert transform
+    print('2: Computing covariance matrix')
+    c = delayed(np.dot)(data_hilbert.conjugate().T, data_hilbert).compute()/ntim
+    print('3: Solving the eigenvalue problem')
+    lamda, loadings = delayed(la.eig)(c).compute() # lamda: eigenvalue, loadings: eigenvectors
+    
+    l = lamda.conjugate().T; k = np.argsort(l)
+    lamda, loadings = np.flip(l[k]), np.fliplr(loadings[:,k])
+    loadings = loadings[:,:nkp]
+    # In case there were nan values in the orginal data, we need to perform the approach below:
+    load_real[~nan_values,:] = loadings.real.copy()
+    load_imag[~nan_values,:] = loadings.imag.copy()
+    load = load_real + 1j*load_imag
+    modes = load.reshape((len(lat),len(lon), nkp))
+    
+    per = lamda.real*100/np.sum(lamda.real)
+    per = per[:nkp].copy()
+    pcs = np.dot(data_hilbert,loadings)
+    
+    sp_amp, sp_phase, t_amp, t_phase = _amplitude_phase(load, pcs)
+    sp_amp   = sp_amp.reshape((len(lat),len(lon), nkp))
+    sp_phase = sp_phase.reshape((len(lat),len(lon), nkp))    
+    
+    print('Done! \U0001F600')
+    
+    dims = ["lat", "lon", "nkp", "time"]
+    ds = xr.Dataset({"per":(dims[2], per),"modes":(dims[:-1], modes),"SpAmp":(dims[:-1], sp_amp),
+                    "SpPhase":(dims[:-1], sp_phase),"pcs":(dims[-2:][::-1], pcs),"TAmp":(dims[-2:][::-1], t_amp),
+                    "TPhase":(dims[-2:][::-1], t_phase)},
+                    coords={"lat":(dims[0], lat), "lon":(dims[1], lon), "nkp":(dims[2], np.arange(nkp)),
+                           "time":(dims[3], np.arange(len(data_ceof)))})
+
+    return ds
+
+def _org_data_ceof(lon, lat, data):
+    dims = ["time", "lat", "lon"]
+    datxarray = xr.Dataset({"data_latlon": (dims, data)}, 
+                           coords={'lat':(dims[1], lat), 'lon':(dims[2], lon)})
+    data_ceof = datxarray.stack(lat_lon=("lat", "lon")).data_latlon
+    return data_ceof
+
+def _amplitude_phase(evecs, amp):
+    ''' Complex (Hilbert) EOF
+    First written in MATLAB and found in the webpage below 
+    (https://www.jsg.utexas.edu/fu/files/GEO391-W11-CEOF.pdf)
+    
+    ===========================================================================
+    INPUT:
+       evecs   = first nkp complex loadings or eigenvectors [lat, lon, nkp]
+       amp     = first nkp complex principal components or amplitudes [time, nkp]
+
+    OUTPUT:
+       SpAmp   = spatial amplitude [lat, lon, nkp]
+       SpPhase = spatial phase [lat, lon, nkp]
+       TAmp    = temporal amplitude [time, nkp]
+       TPhase  = temporal phase [time, nkp]
+    ===========================================================================
+    '''
+    # Spatial amplitude
+    SpAmp = pow(np.multiply(evecs, np.conj(evecs)),0.5)
+    theta = np.arctan2(evecs.imag, evecs.real)
+    # Spatial phase
+    SpPhase = np.divide(np.multiply(theta, 180), np.pi)
+
+    # Temporal amplitude
+    TAmp = pow(np.multiply(amp, np.conj(amp)), 0.5)
+    # Temporal phase
+    phit = np.arctan2(amp.imag, amp.real)
+    TPhase = np.divide(np.multiply(phit, 180), np.pi)
+    
+    return SpAmp, SpPhase, TAmp, TPhase 
+
+def reconstruct_ceof(DataMean, amp, modes, n, day):
+    ''' Reconstrucion of daily CEOF modes individually similar to Majumder et al. (2019).
+    Here, the mean field in each coordinate is added within the function.
+    Besides, each mode is reconstructed individually, instead of computing the sum of the  
+    reconstruction of different modes.
+    
+    =========================================================================================
+    INPUT:
+       DataMean = time-mean of the original data [lat, lon] (e.g., np.nanmean(data,axis=0))
+       amp      = principal components or amplitudes [time, nkp]
+       mode     = eigenvectors or loadings [lat, lon, nkp]
+       n        = mode of variability to be reconstructed
+       day      = day to be reconstructed.
+
+    OUTPUT:
+       RecCEOF  = reconstruction of a CEOF mode on a chosen day [lat, lon]
+    =========================================================================================
+    '''   
+    
+    # Majumder et al (2019) compute the reconstructed CEOF field as the real part of the multiplication between
+    # the coefficient of expansion (i.e., amplitude) and the complex conjugate of the loading (i.e., mode)
+    Rec_ceof = amp[day,n]*np.conj(modes[:,:,n])
+    RecCEOF = Rec_ceof.real + DataMean
+    return RecCEOF
diff --git a/README.md b/README.md
@@ -24,6 +24,8 @@ Check `examples` folder in our [github repository](github.com/iuryt/OceanLab/exa
 - **EOF**
   - *eoft()*: Calculates the Empirical Orthogonal Functions;
   - *my_eof_interp()*: Fillgaps on matrix based on EOFs (translated from Cesar Rocha Matlab version);
+  - *ceof()*: Performs the Complex (or Hilbert) Empirical Orthogonal Functions decomposition;
+  - *reconstruct_ceof()*: Reconstructs the CEOF modes individually;
 - **UTILS**
   - *argdistnear()*: Searchs the position of the closest points in an array to a reference point;
   - *meaneddy()*: Performs an eddy-mean decomposition with a low-pass filter;