Skip to content

Commit ee25633

Browse files
corrected longitude selection near 0 (#1843)
* corrected longitude selection near 0 * corrected longitude selection near 0 * corrected longitude selection near 0 * streamline a bit * ups and whats new * ups again... --------- Co-authored-by: Fabien Maussion <fabien.maussion@bristol.ac.uk>
1 parent dfdc044 commit ee25633

File tree

5 files changed

+94
-105
lines changed

5 files changed

+94
-105
lines changed

docs/whats-new.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,8 @@ Bug fixes
7373
- `apparent_mb_from_any_mb` no longer computes mass-balance twice (:pull:`1757`).
7474
By `Dan Goldberg <https://github.com/dngoldberg>`_ and
7575
`Fabien Maussion <https://github.com/fmaussion>`_.
76+
- Fixed small bug in which ERA5 data was missatributed for two Pyrenean glaciers (:pull:`1843`).
77+
By `Lilian Schuster <https://github.com/lilianschuster>`_
7678

7779
v1.6.2 (August 25, 2024)
7880
------------------------

oggm/shop/ecmwf.py

Lines changed: 31 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,9 @@ def _check_ds_validity(ds):
100100
ds['time'].data[:] = pd.to_datetime({'year': ds['time.year'],
101101
'month': ds['time.month'],
102102
'day': 1})
103-
assert ds.longitude.min() >= 0
103+
info = utils.climate_file_info(ds)
104+
assert info['lon_range'][0] >= 0
105+
return info
104106

105107

106108
@entity_task(log, writes=['climate_historical'])
@@ -127,18 +129,22 @@ def process_ecmwf_data(gdir, dataset=None, ensemble_member=0,
127129
the entire time period available in the file, but with this kwarg
128130
you can shorten it (to save space or to crop bad data)
129131
output_filesuffix : str
130-
this add a suffix to the output file (useful to avoid overwriting
132+
this adds a suffix to the output file (useful to avoid overwriting
131133
previous experiments)
132134
"""
133135

134136
if dataset is None:
135137
dataset = cfg.PARAMS['baseline_climate']
136138

137-
# Use xarray to read the data
138-
lon = gdir.cenlon + 360 if gdir.cenlon < 0 else gdir.cenlon
139139
lat = gdir.cenlat
140+
141+
# Use xarray to read the data
142+
# lon depends on the dataset and its resolution
140143
with xr.open_dataset(get_ecmwf_file(dataset, 'tmp')) as ds:
141-
_check_ds_validity(ds)
144+
info = _check_ds_validity(ds)
145+
# Longitude correction - we take the grid boundary, not the grid point center
146+
lon = gdir.cenlon + 360 if gdir.cenlon < info['lon_bounds'][0] else gdir.cenlon
147+
142148
yrs = ds['time.year'].data
143149
y0 = yrs[0] if y0 is None else y0
144150
y1 = yrs[-1] if y1 is None else y1
@@ -149,55 +155,57 @@ def process_ecmwf_data(gdir, dataset=None, ensemble_member=0,
149155
ds = ds.sel(time=slice(f'{y0}-01-01', f'{y1}-12-01'))
150156
if dataset == 'CERA':
151157
ds = ds.sel(number=ensemble_member)
152-
try:
153-
ds = ds.sel(longitude=lon, latitude=lat, method='nearest')
154-
except (ValueError, KeyError):
158+
if info['is_flat']:
155159
# Flattened ERA5
156160
c = (ds.longitude - lon)**2 + (ds.latitude - lat)**2
157161
ds = ds.isel(points=np.argmin(c.data))
162+
else:
163+
ds = ds.sel(longitude=lon, latitude=lat, method='nearest')
164+
158165
temp = ds['t2m'].data - 273.15
159166
time = ds.time.data
160167
ref_lon = float(ds['longitude'])
161168
ref_lon = ref_lon - 360 if ref_lon > 180 else ref_lon
162169
ref_lat = float(ds['latitude'])
163170
with xr.open_dataset(get_ecmwf_file(dataset, 'pre')) as ds:
164-
_check_ds_validity(ds)
171+
info = _check_ds_validity(ds)
172+
# Longitude correction - we take the grid boundary, not the grid point center
173+
lon = gdir.cenlon + 360 if gdir.cenlon < info['lon_bounds'][0] else gdir.cenlon
174+
165175
ds = ds.sel(time=slice(f'{y0}-01-01', f'{y1}-12-01'))
166176
if dataset == 'CERA':
167177
ds = ds.sel(number=ensemble_member)
168-
try:
169-
ds = ds.sel(longitude=lon, latitude=lat, method='nearest')
170-
except (ValueError, KeyError):
178+
if info['is_flat']:
171179
# Flattened ERA5
172180
c = (ds.longitude - lon)**2 + (ds.latitude - lat)**2
173181
ds = ds.isel(points=np.argmin(c.data))
182+
else:
183+
ds = ds.sel(longitude=lon, latitude=lat, method='nearest')
174184
prcp = ds['tp'].data * 1000 * ds['time.daysinmonth']
175185
with xr.open_dataset(get_ecmwf_file(dataset, 'inv')) as ds:
176-
_check_ds_validity(ds)
186+
info = _check_ds_validity(ds)
187+
# Longitude correction - we take the grid boundary, not the grid point center
188+
lon = gdir.cenlon + 360 if gdir.cenlon < info['lon_bounds'][0] else gdir.cenlon
177189
try:
178190
ds = ds.isel(time=0)
179191
except (ValueError):
180192
# new inv flattening files do not have any
181193
# time dependencies anymore
182194
pass
183-
try:
184-
ds = ds.sel(longitude=lon, latitude=lat, method='nearest')
185-
except (ValueError, KeyError):
195+
if info['is_flat']:
186196
# Flattened ERA5
187197
c = (ds.longitude - lon)**2 + (ds.latitude - lat)**2
188198
ds = ds.isel(points=np.argmin(c.data))
199+
else:
200+
ds = ds.sel(longitude=lon, latitude=lat, method='nearest')
189201
hgt = ds['z'].data / cfg.G
190202

191203
temp_std = None
192-
193204
if dataset == 'ERA5dr':
194-
with xr.open_dataset(get_ecmwf_file(dataset, 'lapserates')) as ds:
195-
_check_ds_validity(ds)
196-
ds = ds.sel(time=slice(f'{y0}-01-01', f'{y1}-12-01'))
197-
ds = ds.sel(longitude=lon, latitude=lat, method='nearest')
198-
199205
with xr.open_dataset(get_ecmwf_file(dataset, 'tempstd')) as ds:
200-
_check_ds_validity(ds)
206+
info = _check_ds_validity(ds)
207+
# Longitude correction - we take the grid boundary, not the grid point center
208+
lon = gdir.cenlon + 360 if gdir.cenlon < info['lon_bounds'][0] else gdir.cenlon
201209
ds = ds.sel(time=slice(f'{y0}-01-01', f'{y1}-12-01'))
202210
ds = ds.sel(longitude=lon, latitude=lat, method='nearest')
203211
temp_std = ds['t2m_std'].data

oggm/shop/w5e5.py

Lines changed: 24 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,12 @@ def process_gswp3_w5e5_data(gdir, y0=None, y1=None, output_filesuffix=None):
104104
# would go faster with only netCDF -.-, but easier with xarray
105105
# first temperature dataset
106106
with xr.open_dataset(path_tmp) as ds:
107-
assert ds.longitude.min() >= 0
107+
# sanity checks, safeguarding for unwanted future side effects
108+
# we do it only for temp and assume no mistake on the data prep side
109+
info = utils.climate_file_info(ds)
110+
assert info['lon_bounds'][0] >= 0
111+
assert info['is_flat']
112+
108113
yrs = ds['time.year'].data
109114
y0 = yrs[0] if y0 is None else y0
110115
y1 = yrs[-1] if y1 is None else y1
@@ -113,17 +118,14 @@ def process_gswp3_w5e5_data(gdir, y0=None, y1=None, output_filesuffix=None):
113118
text = 'The climate files only go from 1901--2019'
114119
raise InvalidParamsError(text)
115120
ds = ds.sel(time=slice(f'{y0}-01-01', f'{y1}-12-01'))
116-
try:
117-
# computing all the distances and choose the nearest gridpoint
118-
c = (ds.longitude - lon)**2 + (ds.latitude - lat)**2
119-
ds = ds.isel(points=np.argmin(c.data))
120-
except ValueError:
121-
ds = ds.sel(longitude=lon, latitude=lat, method='nearest')
122-
# normally if I do the flattening, this here should not occur
123-
124-
# because of the flattening, there is no time dependence of lon and lat anymore!
125-
ds['longitude'] = ds.longitude # .isel(time=0)
126-
ds['latitude'] = ds.latitude # .isel(time=0)
121+
122+
# computing all the distances and choose the nearest gridpoint
123+
c = (ds.longitude - lon)**2 + (ds.latitude - lat)**2
124+
ds = ds.isel(points=np.argmin(c.data))
125+
126+
# Feth lon and lat
127+
ds['longitude'] = ds.longitude
128+
ds['latitude'] = ds.latitude
127129

128130
# temperature should be in degree Celsius for the glacier climate files
129131
temp = ds[tvar].data - 273.15
@@ -136,84 +138,33 @@ def process_gswp3_w5e5_data(gdir, y0=None, y1=None, output_filesuffix=None):
136138

137139
# precipitation: similar as temperature
138140
with xr.open_dataset(path_prcp) as ds:
139-
assert ds.longitude.min() >= 0
140141

141142
# here we take the same y0 and y1 as given from the
142143
# tmp dataset
143144
ds = ds.sel(time=slice(f'{y0}-01-01', f'{y1}-12-01'))
144-
try:
145-
# ... prcp is also flattened
146-
c = (ds.longitude - lon)**2 + (ds.latitude - lat)**2
147-
ds = ds.isel(points=np.argmin(c.data))
148-
except ValueError:
149-
# this should not occur
150-
ds = ds.sel(longitude=lon, latitude=lat, method='nearest')
145+
146+
c = (ds.longitude - lon)**2 + (ds.latitude - lat)**2
147+
ds = ds.isel(points=np.argmin(c.data))
151148

152149
# convert kg m-2 s-1 monthly mean into monthly sum!!!
153150
prcp = ds[pvar].data*cfg.SEC_IN_DAY*ds['time.daysinmonth']
154151

155152
# w5e5 invariant file
156153
with xr.open_dataset(path_inv) as ds:
157-
assert ds.longitude.min() >= 0
158154
ds = ds.isel(time=0)
159-
try:
160-
# Flattened (only possibility at the moment)
161-
c = (ds.longitude - lon)**2 + (ds.latitude - lat)**2
162-
ds = ds.isel(points=np.argmin(c.data))
163-
except ValueError:
164-
# this should not occur
165-
ds = ds.sel(longitude=lon, latitude=lat, method='nearest')
155+
156+
c = (ds.longitude - lon)**2 + (ds.latitude - lat)**2
157+
ds = ds.isel(points=np.argmin(c.data))
158+
166159
# w5e5 inv ASurf/hgt is already in hgt coordinates
167160
hgt = ds['ASurf'].data
168161

169-
# here we need to use the ERA5dr data ...
170-
# there are no lapse rates from W5E5 !!!
171-
# TODO: use updated ERA5dr files that goes until end of 2019
172-
# and update the code accordingly !!!
173-
if y0 < 1979:
174-
# just don't compute lapse rates as ERA5 only starts in 1979
175-
# could use the average from 1979-2019, but for the moment
176-
# gradients are not used in OGGM anyway, so just ignore it:
177-
gradient = None
178-
else:
179-
path_lapserates = ecmwf.get_ecmwf_file('ERA5dr', 'lapserates')
180-
with xr.open_dataset(path_lapserates) as ds:
181-
ecmwf._check_ds_validity(ds)
182-
183-
# this has a different time span!
184-
yrs = ds['time.year'].data
185-
y0 = yrs[0] if y0 is None else y0
186-
y1 = yrs[-1] if y1 is None else y1
187-
ds = ds.sel(time=slice(f'{y0}-01-01', f'{y1}-12-01'))
188-
189-
# no flattening done for the ERA5dr gradient dataset
190-
ds = ds.sel(longitude=lon, latitude=lat, method='nearest')
191-
if y1 == 2019:
192-
# missing some months of ERA5dr (which only goes till middle of 2019)
193-
# otherwise it will fill it with large numbers ...
194-
ds = ds.sel(time=slice(f'{y0}-01-01', '2018-12-01'))
195-
# take for hydrological year 2019 just the avg. lapse rates
196-
# this gives in month order Jan-Dec the mean laperates,
197-
mean_grad = ds.groupby('time.month').mean().lapserate
198-
# fill the last year with mean gradients
199-
gradient = np.concatenate((ds['lapserate'].data, mean_grad),
200-
axis=None)
201-
else:
202-
# get the monthly gradient values
203-
gradient = ds['lapserate'].data
204-
205162
path_temp_std = get_gswp3_w5e5_file(dataset, 'temp_std')
206163
with xr.open_dataset(path_temp_std) as ds:
207164
ds = ds.sel(time=slice(f'{y0}-01-01', f'{y1}-12-01'))
208-
try:
209-
# ... prcp is also flattened
210-
c = (ds.longitude - lon)**2 + (ds.latitude - lat)**2
211-
ds = ds.isel(points=np.argmin(c.data))
212-
except ValueError:
213-
# this should not occur
214-
ds = ds.sel(longitude=lon, latitude=lat, method='nearest')
215-
216-
temp_std = ds['temp_std'].data # tas_std for W5E5!!!
165+
c = (ds.longitude - lon)**2 + (ds.latitude - lat)**2
166+
ds = ds.isel(points=np.argmin(c.data))
167+
temp_std = ds['temp_std'].data
217168

218169
# OK, ready to write
219170
gdir.write_monthly_climate_file(time, prcp, temp, hgt, ref_lon, ref_lat,

oggm/tests/test_shop.py

Lines changed: 21 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -310,8 +310,8 @@ def test_glacier_gridpoint_selection(self):
310310
# downloaded for the HEF gridpoints as they would be too large otherwise.
311311
# However, the same test and other tests are done for all files
312312
# (also ISIMIP3b) and all glaciers in this notebook:
313-
# https://nbviewer.org/urls/cluster.klima.uni-bremen.de/
314-
# ~lschuster/example_ipynb/flatten_glacier_gridpoint_tests.ipynb
313+
# https://nbviewer.org/urls/cluster.klima.uni-bremen.de/~oggm/
314+
# climate/notebooks/flatten_glacier_gridpoint_tests.ipynb
315315
if d == 'GSWP3_W5E5':
316316
res = 0.5
317317
with xr.open_dataset(w5e5.get_gswp3_w5e5_file(d, 'inv')) as dinv:
@@ -321,16 +321,28 @@ def test_glacier_gridpoint_selection(self):
321321
with xr.open_dataset(ecmwf.get_ecmwf_file('ERA5', 'inv')) as dinv:
322322
dinv = dinv.load()
323323

324-
# select five glaciers where two failed in the
324+
diffs = np.sort(dinv.longitude)[1:] - np.sort(dinv.longitude)[:-1]
325+
np.testing.assert_allclose(diffs[diffs > 0].min(), res)
325326
# previous gswp3_w5e5 version, and two are only necessary for RGI7
326327
for coord in [(10.7584, 46.8003), # HEF
327-
(-70.8931 + 360, -72.4474), # RGI60-19.00124
328+
(-70.8931, -72.4474), # RGI60-19.00124
328329
(51.495, 30.9010), # RGI60-12.01691
329330
(0, 0), # random gridpoint not near to a glacier
330-
( -141.670274+360, 69.166921), # in RGI7C, not in RGI6
331-
(-66.855668+360, -67.535551) # only in RGI7G, not in RGI6 or in RGI 7C
331+
( -141.670274, 69.166921), # in RGI7C, not in RGI6
332+
(-66.855668, -67.535551), # only in RGI7G, not in RGI6 or in RGI 7C
333+
(-0.039683, 42.695419), ## glacier near 0
334+
(-179.915527 + 360, 66.276108) # RGI60-10.05049 (near -180 longitude)
332335
]:
333336
lon, lat = coord
337+
# for ERA5, all glaciers with longitudes (-0.125, 0.125) should take the longitude 0 data
338+
# for glaciers between (-0.125 and 0) longitude, we do not want to transform to +360!!!
339+
lon = lon + 360 if lon < dinv.longitude.min() - res / 2 else lon
340+
if d == 'GSWP3_W5E5':
341+
# in case of GSWP3_W5E5, all glaciers with longitudes (0,1) will use the 0.5 gridpoint,
342+
# and those within (-1,0) use the 359.5 point
343+
# therefore, we only have the 0,360 transformation issue for ERA5
344+
np.testing.assert_allclose(dinv.longitude.min() - res / 2, 0)
345+
334346
# get the distances to the glacier coordinate
335347
c = (dinv.longitude - lon) ** 2 + (dinv.latitude - lat) ** 2
336348
c = c.to_dataframe('distance').sort_values('distance')
@@ -349,8 +361,8 @@ def test_glacier_gridpoint_selection(self):
349361
assert np.abs(lon_near - lon) <= res/2
350362
assert dist <= ((res/2) ** 2 + (res/2) ** 2) ** 0.5
351363

352-
# this only contains data for five glaciers, let's still check some basics
353-
# both glaciers are not at latitude or longitude 0
364+
# this only contains data for a few glaciers, let's still check some basics
365+
# both glaciers are not at latitude or longitude 0, but for one glacier we need to pick longitude 0
354366
if d == 'GSWP3_W5E5':
355367
with xr.open_dataset(w5e5.get_gswp3_w5e5_file(d, 'temp_std')) as dtemp_std:
356368
assert np.all(dtemp_std.latitude != 0)
@@ -361,7 +373,7 @@ def test_glacier_gridpoint_selection(self):
361373
else:
362374
with xr.open_dataset(ecmwf.get_ecmwf_file('ERA5', 'tmp')) as dtemp:
363375
assert np.all(dtemp.latitude != 0)
364-
assert np.all(dtemp.longitude != 0)
376+
assert np.all(dtemp.longitude >= 0)
365377
assert dtemp.isel(time=0).t2m.std() > 0
366378
assert dtemp.longitude.std() > 0
367379
assert dtemp.latitude.std() > 0

oggm/utils/_funcs.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -401,6 +401,22 @@ def clip_scalar(value, vmin, vmax):
401401
return vmin if value < vmin else vmax if value > vmax else value
402402

403403

404+
def climate_file_info(ds, varname='longitude'):
405+
"""Provides simple info on a climate file - useful for shop tools."""
406+
lons = np.unique(np.sort(ds[varname].data))
407+
# Assumption below is that at least two grid points in the file are
408+
# at 1 grid point distance
409+
dx = np.min(lons[1:] - lons[:-1])
410+
lon_range = (np.min(lons), np.max(lons))
411+
lon_bounds = (lon_range[0] - dx/2, lon_range[1] + dx/2)
412+
return {
413+
'dx': dx,
414+
'lon_range': lon_range,
415+
'lon_bounds': lon_bounds,
416+
'is_flat': varname not in ds.dims,
417+
}
418+
419+
404420
def weighted_average_1d(data, weights):
405421
"""A faster weighted average without dimension checks.
406422

0 commit comments

Comments
 (0)