corrected longitude selection near 0 (#1843)

lilianschuster · fmaussion · web-flow · commit ee25633818f8 · 2026-02-19T16:30:55.000Z
* corrected longitude selection near 0

* corrected longitude selection near 0

* corrected longitude selection near 0

* streamline a bit

* ups and whats new

* ups again...

---------

Co-authored-by: Fabien Maussion &lt;fabien.maussion@bristol.ac.uk&gt;
diff --git a/docs/whats-new.rst b/docs/whats-new.rst
@@ -73,6 +73,8 @@ Bug fixes
 - `apparent_mb_from_any_mb` no longer computes mass-balance twice (:pull:`1757`).
   By `Dan Goldberg <https://github.com/dngoldberg>`_ and
   `Fabien Maussion <https://github.com/fmaussion>`_.
+- Fixed small bug in which ERA5 data was missatributed for two Pyrenean glaciers (:pull:`1843`).
+  By `Lilian Schuster <https://github.com/lilianschuster>`_
 
 v1.6.2 (August 25, 2024)
 ------------------------
diff --git a/oggm/shop/ecmwf.py b/oggm/shop/ecmwf.py
@@ -100,7 +100,9 @@ def _check_ds_validity(ds):
         ds['time'].data[:] = pd.to_datetime({'year': ds['time.year'],
                                              'month': ds['time.month'],
                                              'day': 1})
-    assert ds.longitude.min() >= 0
+    info = utils.climate_file_info(ds)
+    assert info['lon_range'][0] >= 0
+    return info
 
 
 @entity_task(log, writes=['climate_historical'])
@@ -127,18 +129,22 @@ def process_ecmwf_data(gdir, dataset=None, ensemble_member=0,
         the entire time period available in the file, but with this kwarg
         you can shorten it (to save space or to crop bad data)
     output_filesuffix : str
-        this add a suffix to the output file (useful to avoid overwriting
+        this adds a suffix to the output file (useful to avoid overwriting
         previous experiments)
     """
 
     if dataset is None:
         dataset = cfg.PARAMS['baseline_climate']
 
-    # Use xarray to read the data
-    lon = gdir.cenlon + 360 if gdir.cenlon < 0 else gdir.cenlon
     lat = gdir.cenlat
+
+    # Use xarray to read the data
+    # lon depends on the dataset and its resolution
     with xr.open_dataset(get_ecmwf_file(dataset, 'tmp')) as ds:
-        _check_ds_validity(ds)
+        info = _check_ds_validity(ds)
+        # Longitude correction - we take the grid boundary, not the grid point center
+        lon = gdir.cenlon + 360 if gdir.cenlon < info['lon_bounds'][0] else gdir.cenlon
+
         yrs = ds['time.year'].data
         y0 = yrs[0] if y0 is None else y0
         y1 = yrs[-1] if y1 is None else y1
@@ -149,55 +155,57 @@ def process_ecmwf_data(gdir, dataset=None, ensemble_member=0,
         ds = ds.sel(time=slice(f'{y0}-01-01', f'{y1}-12-01'))
         if dataset == 'CERA':
             ds = ds.sel(number=ensemble_member)
-        try:
-            ds = ds.sel(longitude=lon, latitude=lat, method='nearest')
-        except (ValueError, KeyError):
+        if info['is_flat']:
             # Flattened ERA5
             c = (ds.longitude - lon)**2 + (ds.latitude - lat)**2
             ds = ds.isel(points=np.argmin(c.data))
+        else:
+            ds = ds.sel(longitude=lon, latitude=lat, method='nearest')
+
         temp = ds['t2m'].data - 273.15
         time = ds.time.data
         ref_lon = float(ds['longitude'])
         ref_lon = ref_lon - 360 if ref_lon > 180 else ref_lon
         ref_lat = float(ds['latitude'])
     with xr.open_dataset(get_ecmwf_file(dataset, 'pre')) as ds:
-        _check_ds_validity(ds)
+        info = _check_ds_validity(ds)
+        # Longitude correction - we take the grid boundary, not the grid point center
+        lon = gdir.cenlon + 360 if gdir.cenlon < info['lon_bounds'][0] else gdir.cenlon
+
         ds = ds.sel(time=slice(f'{y0}-01-01', f'{y1}-12-01'))
         if dataset == 'CERA':
             ds = ds.sel(number=ensemble_member)
-        try:
-            ds = ds.sel(longitude=lon, latitude=lat, method='nearest')
-        except (ValueError, KeyError):
+        if info['is_flat']:
             # Flattened ERA5
             c = (ds.longitude - lon)**2 + (ds.latitude - lat)**2
             ds = ds.isel(points=np.argmin(c.data))
+        else:
+            ds = ds.sel(longitude=lon, latitude=lat, method='nearest')
         prcp = ds['tp'].data * 1000 * ds['time.daysinmonth']
     with xr.open_dataset(get_ecmwf_file(dataset, 'inv')) as ds:
-        _check_ds_validity(ds)
+        info = _check_ds_validity(ds)
+        # Longitude correction - we take the grid boundary, not the grid point center
+        lon = gdir.cenlon + 360 if gdir.cenlon < info['lon_bounds'][0] else gdir.cenlon
         try:
             ds = ds.isel(time=0)
         except (ValueError):
             # new inv flattening files do not have any
             # time dependencies anymore
             pass
-        try:
-            ds = ds.sel(longitude=lon, latitude=lat, method='nearest')
-        except (ValueError, KeyError):
+        if info['is_flat']:
             # Flattened ERA5
             c = (ds.longitude - lon)**2 + (ds.latitude - lat)**2
             ds = ds.isel(points=np.argmin(c.data))
+        else:
+            ds = ds.sel(longitude=lon, latitude=lat, method='nearest')
         hgt = ds['z'].data / cfg.G
 
     temp_std = None
-
     if dataset == 'ERA5dr':
-        with xr.open_dataset(get_ecmwf_file(dataset, 'lapserates')) as ds:
-            _check_ds_validity(ds)
-            ds = ds.sel(time=slice(f'{y0}-01-01', f'{y1}-12-01'))
-            ds = ds.sel(longitude=lon, latitude=lat, method='nearest')
-
         with xr.open_dataset(get_ecmwf_file(dataset, 'tempstd')) as ds:
-            _check_ds_validity(ds)
+            info = _check_ds_validity(ds)
+            # Longitude correction - we take the grid boundary, not the grid point center
+            lon = gdir.cenlon + 360 if gdir.cenlon < info['lon_bounds'][0] else gdir.cenlon
             ds = ds.sel(time=slice(f'{y0}-01-01', f'{y1}-12-01'))
             ds = ds.sel(longitude=lon, latitude=lat, method='nearest')
             temp_std = ds['t2m_std'].data
diff --git a/oggm/shop/w5e5.py b/oggm/shop/w5e5.py
@@ -104,7 +104,12 @@ def process_gswp3_w5e5_data(gdir, y0=None, y1=None, output_filesuffix=None):
     # would go faster with only netCDF -.-, but easier with xarray
     # first temperature dataset
     with xr.open_dataset(path_tmp) as ds:
-        assert ds.longitude.min() >= 0
+        # sanity checks, safeguarding for unwanted future side effects
+        # we do it only for temp and assume no mistake on the data prep side
+        info = utils.climate_file_info(ds)
+        assert info['lon_bounds'][0] >= 0
+        assert info['is_flat']
+
         yrs = ds['time.year'].data
         y0 = yrs[0] if y0 is None else y0
         y1 = yrs[-1] if y1 is None else y1
@@ -113,17 +118,14 @@ def process_gswp3_w5e5_data(gdir, y0=None, y1=None, output_filesuffix=None):
             text = 'The climate files only go from 1901--2019'
             raise InvalidParamsError(text)
         ds = ds.sel(time=slice(f'{y0}-01-01', f'{y1}-12-01'))
-        try:
-            # computing all the distances and choose the nearest gridpoint
-            c = (ds.longitude - lon)**2 + (ds.latitude - lat)**2
-            ds = ds.isel(points=np.argmin(c.data))
-        except ValueError:
-            ds = ds.sel(longitude=lon, latitude=lat, method='nearest')
-            # normally if I do the flattening, this here should not occur
-
-        # because of the flattening, there is no time dependence of lon and lat anymore!
-        ds['longitude'] = ds.longitude  # .isel(time=0)
-        ds['latitude'] = ds.latitude  # .isel(time=0)
+
+        # computing all the distances and choose the nearest gridpoint
+        c = (ds.longitude - lon)**2 + (ds.latitude - lat)**2
+        ds = ds.isel(points=np.argmin(c.data))
+
+        # Feth lon and lat
+        ds['longitude'] = ds.longitude
+        ds['latitude'] = ds.latitude
 
         # temperature should be in degree Celsius for the glacier climate files
         temp = ds[tvar].data - 273.15
@@ -136,84 +138,33 @@ def process_gswp3_w5e5_data(gdir, y0=None, y1=None, output_filesuffix=None):
 
     # precipitation: similar as temperature
     with xr.open_dataset(path_prcp) as ds:
-        assert ds.longitude.min() >= 0
 
         # here we take the same y0 and y1 as given from the
         # tmp dataset
         ds = ds.sel(time=slice(f'{y0}-01-01', f'{y1}-12-01'))
-        try:
-            # ... prcp is also flattened
-            c = (ds.longitude - lon)**2 + (ds.latitude - lat)**2
-            ds = ds.isel(points=np.argmin(c.data))
-        except ValueError:
-            # this should not occur
-            ds = ds.sel(longitude=lon, latitude=lat, method='nearest')
+
+        c = (ds.longitude - lon)**2 + (ds.latitude - lat)**2
+        ds = ds.isel(points=np.argmin(c.data))
 
         # convert kg m-2 s-1 monthly mean into monthly sum!!!
         prcp = ds[pvar].data*cfg.SEC_IN_DAY*ds['time.daysinmonth']
 
     # w5e5 invariant file
     with xr.open_dataset(path_inv) as ds:
-        assert ds.longitude.min() >= 0
         ds = ds.isel(time=0)
-        try:
-            # Flattened  (only possibility at the moment)
-            c = (ds.longitude - lon)**2 + (ds.latitude - lat)**2
-            ds = ds.isel(points=np.argmin(c.data))
-        except ValueError:
-            # this should not occur
-            ds = ds.sel(longitude=lon, latitude=lat, method='nearest')
+
+        c = (ds.longitude - lon)**2 + (ds.latitude - lat)**2
+        ds = ds.isel(points=np.argmin(c.data))
+
         # w5e5 inv ASurf/hgt is already in hgt coordinates
         hgt = ds['ASurf'].data
 
-    # here we need to use the ERA5dr data ...
-    # there are no lapse rates from W5E5 !!!
-    # TODO: use updated ERA5dr files that goes until end of 2019
-    #  and update the code accordingly !!!
-    if y0 < 1979:
-        # just don't compute lapse rates as ERA5 only starts in 1979
-        # could use the average from 1979-2019, but for the moment
-        # gradients are not used in OGGM anyway, so just ignore it:
-        gradient = None
-    else:
-        path_lapserates = ecmwf.get_ecmwf_file('ERA5dr', 'lapserates')
-        with xr.open_dataset(path_lapserates) as ds:
-            ecmwf._check_ds_validity(ds)
-
-            # this has a different time span!
-            yrs = ds['time.year'].data
-            y0 = yrs[0] if y0 is None else y0
-            y1 = yrs[-1] if y1 is None else y1
-            ds = ds.sel(time=slice(f'{y0}-01-01', f'{y1}-12-01'))
-
-            # no flattening done for the ERA5dr gradient dataset
-            ds = ds.sel(longitude=lon, latitude=lat, method='nearest')
-            if y1 == 2019:
-                # missing some months of ERA5dr (which only goes till middle of 2019)
-                # otherwise it will fill it with large numbers ...
-                ds = ds.sel(time=slice(f'{y0}-01-01', '2018-12-01'))
-                # take for hydrological year 2019 just the avg. lapse rates
-                # this gives in month order Jan-Dec the mean laperates,
-                mean_grad = ds.groupby('time.month').mean().lapserate
-                # fill the last year with mean gradients
-                gradient = np.concatenate((ds['lapserate'].data, mean_grad),
-                                          axis=None)
-            else:
-                # get the monthly gradient values
-                gradient = ds['lapserate'].data
-
     path_temp_std = get_gswp3_w5e5_file(dataset, 'temp_std')
     with xr.open_dataset(path_temp_std) as ds:
         ds = ds.sel(time=slice(f'{y0}-01-01', f'{y1}-12-01'))
-        try:
-            # ... prcp is also flattened
-            c = (ds.longitude - lon)**2 + (ds.latitude - lat)**2
-            ds = ds.isel(points=np.argmin(c.data))
-        except ValueError:
-            # this should not occur
-            ds = ds.sel(longitude=lon, latitude=lat, method='nearest')
-
-        temp_std = ds['temp_std'].data  # tas_std for W5E5!!!
+        c = (ds.longitude - lon)**2 + (ds.latitude - lat)**2
+        ds = ds.isel(points=np.argmin(c.data))
+        temp_std = ds['temp_std'].data
 
     # OK, ready to write
     gdir.write_monthly_climate_file(time, prcp, temp, hgt, ref_lon, ref_lat,
diff --git a/oggm/tests/test_shop.py b/oggm/tests/test_shop.py
@@ -310,8 +310,8 @@ def test_glacier_gridpoint_selection(self):
             # downloaded for the HEF gridpoints as they would be too large otherwise.
             # However, the same test and other tests are done for all files
             # (also ISIMIP3b) and all glaciers in this notebook:
-            # https://nbviewer.org/urls/cluster.klima.uni-bremen.de/
-            # ~lschuster/example_ipynb/flatten_glacier_gridpoint_tests.ipynb
+            # https://nbviewer.org/urls/cluster.klima.uni-bremen.de/~oggm/
+            # climate/notebooks/flatten_glacier_gridpoint_tests.ipynb
             if d == 'GSWP3_W5E5':
                 res = 0.5
                 with xr.open_dataset(w5e5.get_gswp3_w5e5_file(d, 'inv')) as dinv:
@@ -321,16 +321,28 @@ def test_glacier_gridpoint_selection(self):
                 with xr.open_dataset(ecmwf.get_ecmwf_file('ERA5', 'inv')) as dinv:
                     dinv = dinv.load()
 
-                # select five glaciers where two failed in the
+            diffs = np.sort(dinv.longitude)[1:] - np.sort(dinv.longitude)[:-1]
+            np.testing.assert_allclose(diffs[diffs > 0].min(), res)
             # previous gswp3_w5e5 version, and two are only necessary for RGI7
             for coord in [(10.7584, 46.8003),  # HEF
-                          (-70.8931 + 360, -72.4474),  # RGI60-19.00124
+                          (-70.8931, -72.4474),  # RGI60-19.00124
                           (51.495, 30.9010),  # RGI60-12.01691
                           (0, 0),  # random gridpoint not near to a glacier
-                          ( -141.670274+360, 69.166921),  # in RGI7C, not in RGI6
-                          (-66.855668+360, -67.535551)  # only in RGI7G, not in RGI6 or in RGI 7C
+                          ( -141.670274, 69.166921),  # in RGI7C, not in RGI6
+                          (-66.855668, -67.535551),  # only in RGI7G, not in RGI6 or in RGI 7C
+                          (-0.039683, 42.695419),  ## glacier near 0
+                          (-179.915527 + 360, 66.276108)  # RGI60-10.05049 	(near -180 longitude)
                           ]:
                 lon, lat = coord
+                # for ERA5, all glaciers with longitudes (-0.125, 0.125) should take the longitude 0 data
+                # for glaciers between (-0.125 and 0) longitude, we do not want to transform to +360!!!
+                lon = lon + 360 if lon < dinv.longitude.min() - res / 2 else lon
+                if d == 'GSWP3_W5E5':
+                    # in case of GSWP3_W5E5, all glaciers with longitudes (0,1) will use the 0.5 gridpoint,
+                    # and those within (-1,0) use the 359.5 point
+                    # therefore, we only have the 0,360 transformation issue for ERA5
+                    np.testing.assert_allclose(dinv.longitude.min() - res / 2, 0)
+
                 # get the distances to the glacier coordinate
                 c = (dinv.longitude - lon) ** 2 + (dinv.latitude - lat) ** 2
                 c = c.to_dataframe('distance').sort_values('distance')
@@ -349,8 +361,8 @@ def test_glacier_gridpoint_selection(self):
                     assert np.abs(lon_near - lon) <= res/2
                     assert dist <= ((res/2) ** 2 + (res/2) ** 2) ** 0.5
 
-            # this only contains data for five glaciers, let's still check some basics
-            # both glaciers are not at latitude or longitude 0
+            # this only contains data for a few glaciers, let's still check some basics
+            # both glaciers are not at latitude or longitude 0, but for one glacier we need to pick longitude 0
             if d == 'GSWP3_W5E5':
                 with xr.open_dataset(w5e5.get_gswp3_w5e5_file(d, 'temp_std')) as dtemp_std:
                     assert np.all(dtemp_std.latitude != 0)
@@ -361,7 +373,7 @@ def test_glacier_gridpoint_selection(self):
             else:
                 with xr.open_dataset(ecmwf.get_ecmwf_file('ERA5', 'tmp')) as dtemp:
                     assert np.all(dtemp.latitude != 0)
-                    assert np.all(dtemp.longitude != 0)
+                    assert np.all(dtemp.longitude >= 0)
                     assert dtemp.isel(time=0).t2m.std() > 0
                     assert dtemp.longitude.std() > 0
                     assert dtemp.latitude.std() > 0
diff --git a/oggm/utils/_funcs.py b/oggm/utils/_funcs.py
@@ -401,6 +401,22 @@ def clip_scalar(value, vmin, vmax):
     return vmin if value < vmin else vmax if value > vmax else value
 
 
+def climate_file_info(ds, varname='longitude'):
+    """Provides simple info on a climate file - useful for shop tools."""
+    lons = np.unique(np.sort(ds[varname].data))
+    # Assumption below is that at least two grid points in the file are
+    # at 1 grid point distance
+    dx = np.min(lons[1:] - lons[:-1])
+    lon_range = (np.min(lons), np.max(lons))
+    lon_bounds = (lon_range[0] - dx/2, lon_range[1] + dx/2)
+    return {
+        'dx': dx,
+        'lon_range': lon_range,
+        'lon_bounds': lon_bounds,
+        'is_flat': varname not in ds.dims,
+    }
+
+
 def weighted_average_1d(data, weights):
     """A faster weighted average without dimension checks.