Winterstorm_damage_projections_Europe/functions_projUncWS.py at main · luseverin/Winterstorm_damage_projections_Europe · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
#import matplotlib.pyplot as plt
import xarray as xr
import numpy as np
import copy as cp
import pandas as pd
from scipy import sparse
import cartopy.crs as ccrs
from climada.hazard import Hazard,Centroids

def read_sfc(filepath, datestart, dateend):
    """Read in a time slice of a surface field from datestart to dateend.
    Try using datetime64 and if that doesn't work decode times manually.
    Args:
        filepath (string) = directory where files are located
        datestart (string) = start date for time slice
        dateend (string) = end date for time slice
    """

    #First try opening and doing the select assuming everything is working ok with the time axis
    try:
        dat = \
        xr.open_mfdataset\
        (filepath, coords="minimal", join="override", decode_times=True, use_cftime=True).\
        sel(time=slice(datestart, dateend))
        try:
            dat=dat.rename({"longitude":"lon", "latitude":"lat"}) #problematic coord names
            print("changing longitude --> lon, latitude --> lat")
        except: pass

    except:
        dat = xr.open_mfdataset(filepath, coords="minimal", join="override", decode_times = False)
        try:
            dat=dat.rename({"longitude":"lon", "latitude":"lat"}) #problematic coord names
        except: pass

        dat = xr.decode_cf(dat, use_cftime = True)
        dat = dat.sel(time=slice(datestart, dateend))
        datetimeindex = dat.indexes['time'].to_datetimeindex()
        dat['time'] = datetimeindex
        print("Something's wierd about the time axis, decoding manually")

    return dat

def read_field(filepath, datestart, dateend,latmin,latmax,lonmin,lonmax,plev):
    """Read in a time slice from datestart to dateend and calculate the zonal mean.
    Try using datetime64 and if that doesn't work decode times manually.
    Args:
        filepath (string) = path to files e.g., "/path/to/files/*.nc"
        datestart (string) = start date for time slice
        dateend (string) = end date for time slice
    """

    try:
        dat = xr.open_mfdataset(filepath, coords="minimal", join="override",
                 decode_times=True, use_cftime=True).\
                 sel(time=slice(datestart, dateend),lat=slice(latmin,latmax),lon=slice(lonmin,lonmax))

        if len(plev) == 1:
            dat = dat.sel(plev=plev,method="nearest", tolerance=1) #avoid issue for models with inaccurate plevs
        else:
            dat = dat.sel(plev=slice(plev[0]+1,plev[1]-1)) #manual tolerance of 1 because method="nearest" is not implemented for slices

    except:
        print("Something's wierd about the time axis, decoding manually")
        dat = xr.open_mfdataset(filepath, coords="minimal", join="override",
                   decode_times=False)

        dat=xr.decode_cf(dat, use_cftime=True)

        dat=dat.sel(time=slice(datestart, dateend),lat=slice(latmin,latmax),lon=slice(lonmin,lonmax))
        if len(plev) == 1:
            dat = dat.sel(plev=plev,method="nearest", tolerance=1) #avoid issue for models with inaccurate plevs
        else:
            dat = dat.sel(plev=slice(plev[0]+1,plev[1]-1))  #manual tolerance of 1 because method="nearest" is not implemented for slices

        datetimeindex=dat.indexes['time'].to_datetimeindex()
        dat['time'] = datetimeindex

    return dat

def get_lat_lon_res(ds):
    """Function to obtain the average lat and lon gridspacing from a dataset of a non regular model grid.
    Args:
        ds (xr.dataset or xr.dataarry) = input dataset from which average lat and lon resolutions must be calculated
    Ouputs:
        latres, lonres = average latitudinal and longitudinal resolutions
    """
    lat = ds.coords['lat']
    lon = ds.coords['lon']
    difflat = lat - lat.shift(lat=1)
    latres = difflat.mean().to_numpy()
    difflon = lon - lon.shift(lon=1)
    lonres = difflon.mean().to_numpy()
    return latres, lonres

def def_domain(ds,min_lat,max_lat,min_lon,max_lon):
    """Function that takes xr.dataset or xr.dataarry and box coordinates from a domain and crops the dataset or datarray to
    the domain defined by the box coordinates.
    Args:
        ds (xr.dataset or xr.dataarry) = input dataset which lat and lon needs to be cropped
        min_lat, max_lat = minimum and maximum latitudinal coordinates
        min_lon, max_lon = minimum and maximum longitudinal coordinates
    Ouputs:
        ds = xr.dataset or xr.dataarry cropped to the input domain
    """
    LatIndexer, LonIndexer = 'lat', 'lon'
    ds = ds.loc[{LatIndexer: slice(min_lat, max_lat),
                      LonIndexer: slice(min_lon, max_lon)}]
    return ds

def norm_lon(ds):
    """Function that takes xr.dataset or xr.dataarry and normalizes its longitude coordinate
    Args:
        ds (xr.dataset or xr.dataarry) = input dataset which lon coordinates needs to be normalized

    Ouputs:
        ds = xr.dataset or xr.dataarry with normalized lon coordinates
    """
    ds.coords['lon'] = (ds.coords['lon'] + 180) % 360 - 180
    return ds.sortby(ds.lon)

def get_ONDJFM_day(ds, months=[1,2,3,10,11,12],timedim="day"):
    """Function that takes xr.dataset or xr.dataarry and months of the year and returns a dataset corresponding to
    the specified months
    Args:
        ds (xr.dataset or xr.dataarry) = input dataset from which monthly data needs to be selected
        months = months which are requested, in ordinal format (1=January, 2=February, ...,12=December)
        timedim = name of the temporal coordinate of the dataset

    Ouputs:
        ds = xr.dataset or xr.dataarry corresponding to the months selected
    """
    return ds.isel({timedim:ds[timedim].dt.month.isin(months)})

## Define functions
def set_centroids(da,stack=True,haztype='WS',timeres="day",plot=False):
    '''Function which takes a xarray.DataArray as an input and returns a climada.hazard object, with centroids corresponding to
        latitude and longitude of the DataArray.'''

    period = da.name
    lat = da.lat
    lon = da.lon

    # number of points
    n_lat = len(lat)
    n_lon = len(lon)

    #stack first members and time resolution, and then latitutde and longitude to obtain the event matrix
    if stack:
        events = da.stack(events=("member",timeres)) #need to be of size n_ev * ncent = nmem*180 * ncent
        nmem = len(da["member"])
    else:
        events = da.rename({timeres:"events"})
        nmem = 1
    evmat = events.stack(cent=("lat","lon"))
    #print('Evmat: '+str(evmat.shape))
    n_ev = len(evmat["events"])

    #intiate Hazard object, using Centroids.from_pix_bounds
    haz = Hazard(haztype)

    #initiate lat and lon array for Centroids.from_lat_lon
    lat_ar = lat.values.repeat(len(lon))
    lon_ar = lon.values.reshape(1,n_lon).repeat(n_lat,axis=0).reshape(n_lon*n_lat)

    haz.centroids = Centroids.from_lat_lon(lat_ar,lon_ar)
    haz.intensity = sparse.csr_matrix(evmat)
    haz.units = 'm/s'
    ev_id = np.arange(n_ev, dtype=int)
    haz.event_id = ev_id
    ev_names = events.coords["events"].to_numpy().tolist()
    #ev_names = pd.to_datetime(evmat.events,format='YYYY-MM-DD')
    haz.event_name = ev_names
    haz.orig = np.zeros(n_ev, bool)
    haz.frequency = np.ones(n_ev)/(nmem*30)
    haz.fraction = haz.intensity.copy()
    haz.fraction.data.fill(1)
    haz.centroids.set_meta_to_lat_lon()
    #haz.centroids.set_geometry_points()
    haz.check()
    if plot:
        haz.centroids.plot()

    return haz

def sel_reg_exp(reg_ids,exp):
    """function that takes a list of country ISO codes reg_ids and a CLIMADA LitPop exposure
    object exp as inputs and returns a copy of the exposure object with only the regions selected in reg_ids"""
    sel_exp = cp.deepcopy(exp)
    sel_exp.gdf = sel_exp.gdf.where(sel_exp.gdf['region_id'].isin(reg_ids)).dropna()
    return sel_exp

def make_fn(addlist,basename="",sep="_",filetype=''):
    """Function to facilitate the creation of file names that takes a list of
    strings addlist, a string basename, a string sep and a string filetype
    as input and returns the string consisting of the keywords from addlist separated
    by the sep separator and concatenated with the string basename and the string filetype"""
    return sep.join(addlist)+sep+basename+filetype

#each impact function gets a preprocessing function to prepare the hazard data
def mask_qt(ds,q,mask_abs=None,timeres='day',stack=True,pastname='historical',futname='ssp585',cutarea=1000000):
    '''Function taking a dateset as an input, and returning it with the values below the quantile q at each grid cell
       masked. The mask is computed for each gridcell for the past period. Fields for which less than cutarea is above the
       quantile are dropped'''

    Upast = ds[pastname]
    Ufut = ds[futname]
    if stack:
        Upast = Upast.stack(real=("member",timeres))
        Ufut = Ufut.stack(real=("member",timeres))
        dim="real"
    else:
        dim = timeres

    Upast_qt = Upast.quantile(q,dim=dim)
    U_mask_past = Upast.where(Upast>Upast_qt)
    U_mask_fut = Ufut.where(Ufut>Upast_qt)

    #mask values below threshold
    if mask_abs:
        U_mask_past = U_mask_past.where(U_mask_past>=mask_abs)
        U_mask_fut = U_mask_fut.where(U_mask_fut>=mask_abs)

    latres, lonres = get_lat_lon_res(ds)
    gcarea = latres*lonres*100*100 #gridcell area approximated: 1 deg corresponds to 100km
    threshold = round(cutarea/gcarea)

    U_mask_past = U_mask_past.dropna(dim=dim,thresh=threshold) #keep fields for which at least X values are not NaN
    U_mask_fut = U_mask_fut.dropna(dim=dim,thresh=threshold) #keep fields for which at least X values are not NaN

    #unstack and assemble
    U_mask_past = U_mask_past.unstack().fillna(0)
    U_mask_fut = U_mask_fut.unstack().fillna(0)
    if futname != pastname:
        U_mask_fut.name = futname
        U_mask = xr.combine_by_coords([U_mask_past, U_mask_fut]).fillna(0)
    else:
        U_mask = U_mask_past

    return U_mask

def scale_qt(ds,q,mask_abs=None,timeres='day',stack=True,pastname='historical',futname='ssp585',cutarea=1000000):
    '''Same as mask_qt but scaling by the q quantile.'''
    Upast = ds[pastname]
    Ufut = ds[futname]

    if stack:
        Upast = Upast.stack(real=("member",timeres))
        Ufut = Ufut.stack(real=("member",timeres))
        dim = "real"
    else:
        dim = timeres

    Upast_qt = Upast.quantile(q,dim=dim)
    U_mask_past = Upast.where(Upast>Upast_qt)
    U_mask_fut = Ufut.where(Ufut>Upast_qt)
    #mask values below threshold
    if mask_abs:
        U_mask_past = U_mask_past.where(U_mask_past>=mask_abs)
        U_mask_fut = U_mask_fut.where(U_mask_fut>=mask_abs)

    U_scaled_past = (U_mask_past-Upast_qt)/Upast_qt
    U_scaled_fut = (U_mask_fut-Upast_qt)/Upast_qt

    latres, lonres = get_lat_lon_res(ds)
    gcarea = latres*lonres*100*100 #gridcell area approximated: 1 deg corresponds to 100km
    threshold = round(cutarea/gcarea)

    U_scaled_past = U_scaled_past.dropna(dim=dim,thresh=threshold) #keep fields for which at least X values are not NaN
    U_scaled_fut = U_scaled_fut.dropna(dim=dim,thresh=threshold) #keep fields for which at least X values are not NaN

    #unstack and assemble
    U_scaled_past = U_scaled_past.unstack().fillna(0)
    U_scaled_fut = U_scaled_fut.unstack().fillna(0)
    if futname != pastname:
        U_scaled_fut.name = futname
        U_scaled = xr.combine_by_coords([U_scaled_past, U_scaled_fut]).fillna(0)
    else:
        U_scaled = U_scaled_past
    return U_scaled

#put processing funcs in a dict
pp_func_dic ={}
pp_func_dic['Sw2010'] = mask_qt
pp_func_dic['CubEOT'] = scale_qt