diff --git a/docs/whats-new.rst b/docs/whats-new.rst index 6331f15a9..b91f114fc 100644 --- a/docs/whats-new.rst +++ b/docs/whats-new.rst @@ -73,6 +73,8 @@ Bug fixes - `apparent_mb_from_any_mb` no longer computes mass-balance twice (:pull:`1757`). By `Dan Goldberg `_ and `Fabien Maussion `_. +- Fixed small bug in which ERA5 data was missatributed for two Pyrenean glaciers (:pull:`1843`). + By `Lilian Schuster `_ v1.6.2 (August 25, 2024) ------------------------ diff --git a/oggm/shop/ecmwf.py b/oggm/shop/ecmwf.py index 823c3fee7..b43ef365a 100644 --- a/oggm/shop/ecmwf.py +++ b/oggm/shop/ecmwf.py @@ -100,7 +100,9 @@ def _check_ds_validity(ds): ds['time'].data[:] = pd.to_datetime({'year': ds['time.year'], 'month': ds['time.month'], 'day': 1}) - assert ds.longitude.min() >= 0 + info = utils.climate_file_info(ds) + assert info['lon_range'][0] >= 0 + return info @entity_task(log, writes=['climate_historical']) @@ -127,18 +129,22 @@ def process_ecmwf_data(gdir, dataset=None, ensemble_member=0, the entire time period available in the file, but with this kwarg you can shorten it (to save space or to crop bad data) output_filesuffix : str - this add a suffix to the output file (useful to avoid overwriting + this adds a suffix to the output file (useful to avoid overwriting previous experiments) """ if dataset is None: dataset = cfg.PARAMS['baseline_climate'] - # Use xarray to read the data - lon = gdir.cenlon + 360 if gdir.cenlon < 0 else gdir.cenlon lat = gdir.cenlat + + # Use xarray to read the data + # lon depends on the dataset and its resolution with xr.open_dataset(get_ecmwf_file(dataset, 'tmp')) as ds: - _check_ds_validity(ds) + info = _check_ds_validity(ds) + # Longitude correction - we take the grid boundary, not the grid point center + lon = gdir.cenlon + 360 if gdir.cenlon < info['lon_bounds'][0] else gdir.cenlon + yrs = ds['time.year'].data y0 = yrs[0] if y0 is None else y0 y1 = yrs[-1] if y1 is None else y1 @@ -149,55 +155,57 @@ def process_ecmwf_data(gdir, dataset=None, ensemble_member=0, ds = ds.sel(time=slice(f'{y0}-01-01', f'{y1}-12-01')) if dataset == 'CERA': ds = ds.sel(number=ensemble_member) - try: - ds = ds.sel(longitude=lon, latitude=lat, method='nearest') - except (ValueError, KeyError): + if info['is_flat']: # Flattened ERA5 c = (ds.longitude - lon)**2 + (ds.latitude - lat)**2 ds = ds.isel(points=np.argmin(c.data)) + else: + ds = ds.sel(longitude=lon, latitude=lat, method='nearest') + temp = ds['t2m'].data - 273.15 time = ds.time.data ref_lon = float(ds['longitude']) ref_lon = ref_lon - 360 if ref_lon > 180 else ref_lon ref_lat = float(ds['latitude']) with xr.open_dataset(get_ecmwf_file(dataset, 'pre')) as ds: - _check_ds_validity(ds) + info = _check_ds_validity(ds) + # Longitude correction - we take the grid boundary, not the grid point center + lon = gdir.cenlon + 360 if gdir.cenlon < info['lon_bounds'][0] else gdir.cenlon + ds = ds.sel(time=slice(f'{y0}-01-01', f'{y1}-12-01')) if dataset == 'CERA': ds = ds.sel(number=ensemble_member) - try: - ds = ds.sel(longitude=lon, latitude=lat, method='nearest') - except (ValueError, KeyError): + if info['is_flat']: # Flattened ERA5 c = (ds.longitude - lon)**2 + (ds.latitude - lat)**2 ds = ds.isel(points=np.argmin(c.data)) + else: + ds = ds.sel(longitude=lon, latitude=lat, method='nearest') prcp = ds['tp'].data * 1000 * ds['time.daysinmonth'] with xr.open_dataset(get_ecmwf_file(dataset, 'inv')) as ds: - _check_ds_validity(ds) + info = _check_ds_validity(ds) + # Longitude correction - we take the grid boundary, not the grid point center + lon = gdir.cenlon + 360 if gdir.cenlon < info['lon_bounds'][0] else gdir.cenlon try: ds = ds.isel(time=0) except (ValueError): # new inv flattening files do not have any # time dependencies anymore pass - try: - ds = ds.sel(longitude=lon, latitude=lat, method='nearest') - except (ValueError, KeyError): + if info['is_flat']: # Flattened ERA5 c = (ds.longitude - lon)**2 + (ds.latitude - lat)**2 ds = ds.isel(points=np.argmin(c.data)) + else: + ds = ds.sel(longitude=lon, latitude=lat, method='nearest') hgt = ds['z'].data / cfg.G temp_std = None - if dataset == 'ERA5dr': - with xr.open_dataset(get_ecmwf_file(dataset, 'lapserates')) as ds: - _check_ds_validity(ds) - ds = ds.sel(time=slice(f'{y0}-01-01', f'{y1}-12-01')) - ds = ds.sel(longitude=lon, latitude=lat, method='nearest') - with xr.open_dataset(get_ecmwf_file(dataset, 'tempstd')) as ds: - _check_ds_validity(ds) + info = _check_ds_validity(ds) + # Longitude correction - we take the grid boundary, not the grid point center + lon = gdir.cenlon + 360 if gdir.cenlon < info['lon_bounds'][0] else gdir.cenlon ds = ds.sel(time=slice(f'{y0}-01-01', f'{y1}-12-01')) ds = ds.sel(longitude=lon, latitude=lat, method='nearest') temp_std = ds['t2m_std'].data diff --git a/oggm/shop/w5e5.py b/oggm/shop/w5e5.py index 974a895ff..c3487aa43 100644 --- a/oggm/shop/w5e5.py +++ b/oggm/shop/w5e5.py @@ -104,7 +104,12 @@ def process_gswp3_w5e5_data(gdir, y0=None, y1=None, output_filesuffix=None): # would go faster with only netCDF -.-, but easier with xarray # first temperature dataset with xr.open_dataset(path_tmp) as ds: - assert ds.longitude.min() >= 0 + # sanity checks, safeguarding for unwanted future side effects + # we do it only for temp and assume no mistake on the data prep side + info = utils.climate_file_info(ds) + assert info['lon_bounds'][0] >= 0 + assert info['is_flat'] + yrs = ds['time.year'].data y0 = yrs[0] if y0 is None else y0 y1 = yrs[-1] if y1 is None else y1 @@ -113,17 +118,14 @@ def process_gswp3_w5e5_data(gdir, y0=None, y1=None, output_filesuffix=None): text = 'The climate files only go from 1901--2019' raise InvalidParamsError(text) ds = ds.sel(time=slice(f'{y0}-01-01', f'{y1}-12-01')) - try: - # computing all the distances and choose the nearest gridpoint - c = (ds.longitude - lon)**2 + (ds.latitude - lat)**2 - ds = ds.isel(points=np.argmin(c.data)) - except ValueError: - ds = ds.sel(longitude=lon, latitude=lat, method='nearest') - # normally if I do the flattening, this here should not occur - - # because of the flattening, there is no time dependence of lon and lat anymore! - ds['longitude'] = ds.longitude # .isel(time=0) - ds['latitude'] = ds.latitude # .isel(time=0) + + # computing all the distances and choose the nearest gridpoint + c = (ds.longitude - lon)**2 + (ds.latitude - lat)**2 + ds = ds.isel(points=np.argmin(c.data)) + + # Feth lon and lat + ds['longitude'] = ds.longitude + ds['latitude'] = ds.latitude # temperature should be in degree Celsius for the glacier climate files temp = ds[tvar].data - 273.15 @@ -136,84 +138,33 @@ def process_gswp3_w5e5_data(gdir, y0=None, y1=None, output_filesuffix=None): # precipitation: similar as temperature with xr.open_dataset(path_prcp) as ds: - assert ds.longitude.min() >= 0 # here we take the same y0 and y1 as given from the # tmp dataset ds = ds.sel(time=slice(f'{y0}-01-01', f'{y1}-12-01')) - try: - # ... prcp is also flattened - c = (ds.longitude - lon)**2 + (ds.latitude - lat)**2 - ds = ds.isel(points=np.argmin(c.data)) - except ValueError: - # this should not occur - ds = ds.sel(longitude=lon, latitude=lat, method='nearest') + + c = (ds.longitude - lon)**2 + (ds.latitude - lat)**2 + ds = ds.isel(points=np.argmin(c.data)) # convert kg m-2 s-1 monthly mean into monthly sum!!! prcp = ds[pvar].data*cfg.SEC_IN_DAY*ds['time.daysinmonth'] # w5e5 invariant file with xr.open_dataset(path_inv) as ds: - assert ds.longitude.min() >= 0 ds = ds.isel(time=0) - try: - # Flattened (only possibility at the moment) - c = (ds.longitude - lon)**2 + (ds.latitude - lat)**2 - ds = ds.isel(points=np.argmin(c.data)) - except ValueError: - # this should not occur - ds = ds.sel(longitude=lon, latitude=lat, method='nearest') + + c = (ds.longitude - lon)**2 + (ds.latitude - lat)**2 + ds = ds.isel(points=np.argmin(c.data)) + # w5e5 inv ASurf/hgt is already in hgt coordinates hgt = ds['ASurf'].data - # here we need to use the ERA5dr data ... - # there are no lapse rates from W5E5 !!! - # TODO: use updated ERA5dr files that goes until end of 2019 - # and update the code accordingly !!! - if y0 < 1979: - # just don't compute lapse rates as ERA5 only starts in 1979 - # could use the average from 1979-2019, but for the moment - # gradients are not used in OGGM anyway, so just ignore it: - gradient = None - else: - path_lapserates = ecmwf.get_ecmwf_file('ERA5dr', 'lapserates') - with xr.open_dataset(path_lapserates) as ds: - ecmwf._check_ds_validity(ds) - - # this has a different time span! - yrs = ds['time.year'].data - y0 = yrs[0] if y0 is None else y0 - y1 = yrs[-1] if y1 is None else y1 - ds = ds.sel(time=slice(f'{y0}-01-01', f'{y1}-12-01')) - - # no flattening done for the ERA5dr gradient dataset - ds = ds.sel(longitude=lon, latitude=lat, method='nearest') - if y1 == 2019: - # missing some months of ERA5dr (which only goes till middle of 2019) - # otherwise it will fill it with large numbers ... - ds = ds.sel(time=slice(f'{y0}-01-01', '2018-12-01')) - # take for hydrological year 2019 just the avg. lapse rates - # this gives in month order Jan-Dec the mean laperates, - mean_grad = ds.groupby('time.month').mean().lapserate - # fill the last year with mean gradients - gradient = np.concatenate((ds['lapserate'].data, mean_grad), - axis=None) - else: - # get the monthly gradient values - gradient = ds['lapserate'].data - path_temp_std = get_gswp3_w5e5_file(dataset, 'temp_std') with xr.open_dataset(path_temp_std) as ds: ds = ds.sel(time=slice(f'{y0}-01-01', f'{y1}-12-01')) - try: - # ... prcp is also flattened - c = (ds.longitude - lon)**2 + (ds.latitude - lat)**2 - ds = ds.isel(points=np.argmin(c.data)) - except ValueError: - # this should not occur - ds = ds.sel(longitude=lon, latitude=lat, method='nearest') - - temp_std = ds['temp_std'].data # tas_std for W5E5!!! + c = (ds.longitude - lon)**2 + (ds.latitude - lat)**2 + ds = ds.isel(points=np.argmin(c.data)) + temp_std = ds['temp_std'].data # OK, ready to write gdir.write_monthly_climate_file(time, prcp, temp, hgt, ref_lon, ref_lat, diff --git a/oggm/tests/test_shop.py b/oggm/tests/test_shop.py index 5bd684503..5986e8249 100644 --- a/oggm/tests/test_shop.py +++ b/oggm/tests/test_shop.py @@ -310,8 +310,8 @@ def test_glacier_gridpoint_selection(self): # downloaded for the HEF gridpoints as they would be too large otherwise. # However, the same test and other tests are done for all files # (also ISIMIP3b) and all glaciers in this notebook: - # https://nbviewer.org/urls/cluster.klima.uni-bremen.de/ - # ~lschuster/example_ipynb/flatten_glacier_gridpoint_tests.ipynb + # https://nbviewer.org/urls/cluster.klima.uni-bremen.de/~oggm/ + # climate/notebooks/flatten_glacier_gridpoint_tests.ipynb if d == 'GSWP3_W5E5': res = 0.5 with xr.open_dataset(w5e5.get_gswp3_w5e5_file(d, 'inv')) as dinv: @@ -321,16 +321,28 @@ def test_glacier_gridpoint_selection(self): with xr.open_dataset(ecmwf.get_ecmwf_file('ERA5', 'inv')) as dinv: dinv = dinv.load() - # select five glaciers where two failed in the + diffs = np.sort(dinv.longitude)[1:] - np.sort(dinv.longitude)[:-1] + np.testing.assert_allclose(diffs[diffs > 0].min(), res) # previous gswp3_w5e5 version, and two are only necessary for RGI7 for coord in [(10.7584, 46.8003), # HEF - (-70.8931 + 360, -72.4474), # RGI60-19.00124 + (-70.8931, -72.4474), # RGI60-19.00124 (51.495, 30.9010), # RGI60-12.01691 (0, 0), # random gridpoint not near to a glacier - ( -141.670274+360, 69.166921), # in RGI7C, not in RGI6 - (-66.855668+360, -67.535551) # only in RGI7G, not in RGI6 or in RGI 7C + ( -141.670274, 69.166921), # in RGI7C, not in RGI6 + (-66.855668, -67.535551), # only in RGI7G, not in RGI6 or in RGI 7C + (-0.039683, 42.695419), ## glacier near 0 + (-179.915527 + 360, 66.276108) # RGI60-10.05049 (near -180 longitude) ]: lon, lat = coord + # for ERA5, all glaciers with longitudes (-0.125, 0.125) should take the longitude 0 data + # for glaciers between (-0.125 and 0) longitude, we do not want to transform to +360!!! + lon = lon + 360 if lon < dinv.longitude.min() - res / 2 else lon + if d == 'GSWP3_W5E5': + # in case of GSWP3_W5E5, all glaciers with longitudes (0,1) will use the 0.5 gridpoint, + # and those within (-1,0) use the 359.5 point + # therefore, we only have the 0,360 transformation issue for ERA5 + np.testing.assert_allclose(dinv.longitude.min() - res / 2, 0) + # get the distances to the glacier coordinate c = (dinv.longitude - lon) ** 2 + (dinv.latitude - lat) ** 2 c = c.to_dataframe('distance').sort_values('distance') @@ -349,8 +361,8 @@ def test_glacier_gridpoint_selection(self): assert np.abs(lon_near - lon) <= res/2 assert dist <= ((res/2) ** 2 + (res/2) ** 2) ** 0.5 - # this only contains data for five glaciers, let's still check some basics - # both glaciers are not at latitude or longitude 0 + # this only contains data for a few glaciers, let's still check some basics + # both glaciers are not at latitude or longitude 0, but for one glacier we need to pick longitude 0 if d == 'GSWP3_W5E5': with xr.open_dataset(w5e5.get_gswp3_w5e5_file(d, 'temp_std')) as dtemp_std: assert np.all(dtemp_std.latitude != 0) @@ -361,7 +373,7 @@ def test_glacier_gridpoint_selection(self): else: with xr.open_dataset(ecmwf.get_ecmwf_file('ERA5', 'tmp')) as dtemp: assert np.all(dtemp.latitude != 0) - assert np.all(dtemp.longitude != 0) + assert np.all(dtemp.longitude >= 0) assert dtemp.isel(time=0).t2m.std() > 0 assert dtemp.longitude.std() > 0 assert dtemp.latitude.std() > 0 diff --git a/oggm/utils/_funcs.py b/oggm/utils/_funcs.py index 0658a6db3..3a4e540a1 100644 --- a/oggm/utils/_funcs.py +++ b/oggm/utils/_funcs.py @@ -401,6 +401,22 @@ def clip_scalar(value, vmin, vmax): return vmin if value < vmin else vmax if value > vmax else value +def climate_file_info(ds, varname='longitude'): + """Provides simple info on a climate file - useful for shop tools.""" + lons = np.unique(np.sort(ds[varname].data)) + # Assumption below is that at least two grid points in the file are + # at 1 grid point distance + dx = np.min(lons[1:] - lons[:-1]) + lon_range = (np.min(lons), np.max(lons)) + lon_bounds = (lon_range[0] - dx/2, lon_range[1] + dx/2) + return { + 'dx': dx, + 'lon_range': lon_range, + 'lon_bounds': lon_bounds, + 'is_flat': varname not in ds.dims, + } + + def weighted_average_1d(data, weights): """A faster weighted average without dimension checks.