Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 21 additions & 3 deletions oggm/shop/ecmwf.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,18 +127,23 @@ def process_ecmwf_data(gdir, dataset=None, ensemble_member=0,
the entire time period available in the file, but with this kwarg
you can shorten it (to save space or to crop bad data)
output_filesuffix : str
this add a suffix to the output file (useful to avoid overwriting
this adds a suffix to the output file (useful to avoid overwriting
previous experiments)
"""

if dataset is None:
dataset = cfg.PARAMS['baseline_climate']

# Use xarray to read the data
lon = gdir.cenlon + 360 if gdir.cenlon < 0 else gdir.cenlon
lat = gdir.cenlat
# Use xarray to read the data
# lon depends on the dataset and its resolution
with xr.open_dataset(get_ecmwf_file(dataset, 'tmp')) as ds:
_check_ds_validity(ds)
diffs = np.sort(ds.longitude)[1:] - np.sort(ds.longitude)[:-1]
# resolution: diffs[diffs > 0].min() (necessary approach due to flattened files)
# thresh should not be above 0 (could be because of some regional data...)
thresh = max(ds.longitude.min() - diffs[diffs > 0].min() / 2, 0)
Copy link

Copilot AI Feb 18, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thresh = max(ds.longitude.min() - res/2, 0) is inverted relative to the comment and will also keep the original ERA5 bug: for datasets with longitude starting at 0 (ERA5), this forces thresh to 0 so glaciers in (-res/2, 0) still get shifted to +360 and can select the 359.75 gridpoint instead of 0. It also makes thresh positive for regional datasets with ds.longitude.min() > 0, which would incorrectly shift many positive longitudes. Consider computing res once, then using a cap of 0 (i.e. min(..., 0)) and ensure ds.longitude.min() is converted to a Python float before applying min/max.

Suggested change
thresh = max(ds.longitude.min() - diffs[diffs > 0].min() / 2, 0)
res = float(diffs[diffs > 0].min())
min_lon = float(ds.longitude.min())
thresh = min(min_lon - res / 2.0, 0.0)

Copilot uses AI. Check for mistakes.
lon = gdir.cenlon + 360 if gdir.cenlon < thresh else gdir.cenlon
yrs = ds['time.year'].data
y0 = yrs[0] if y0 is None else y0
y1 = yrs[-1] if y1 is None else y1
Expand All @@ -162,6 +167,9 @@ def process_ecmwf_data(gdir, dataset=None, ensemble_member=0,
ref_lat = float(ds['latitude'])
with xr.open_dataset(get_ecmwf_file(dataset, 'pre')) as ds:
_check_ds_validity(ds)
diffs = np.sort(ds.longitude)[1:] - np.sort(ds.longitude)[:-1]
thresh = max(ds.longitude.min() - diffs[diffs > 0].min() / 2, 0)
lon = gdir.cenlon + 360 if gdir.cenlon < thresh else gdir.cenlon
Copy link

Copilot AI Feb 18, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same issue as above: the thresh = max(...) logic will transform longitudes that should not be transformed (including the (-res/2, 0) ERA5 case), and can become positive when ds.longitude.min() > 0 (regional files), shifting valid positive longitudes to +360. Use a 0-cap with min(..., 0) (not max) and cast ds.longitude.min() to float.

Copilot uses AI. Check for mistakes.
ds = ds.sel(time=slice(f'{y0}-01-01', f'{y1}-12-01'))
if dataset == 'CERA':
ds = ds.sel(number=ensemble_member)
Expand All @@ -174,6 +182,9 @@ def process_ecmwf_data(gdir, dataset=None, ensemble_member=0,
prcp = ds['tp'].data * 1000 * ds['time.daysinmonth']
with xr.open_dataset(get_ecmwf_file(dataset, 'inv')) as ds:
_check_ds_validity(ds)
diffs = np.sort(ds.longitude)[1:] - np.sort(ds.longitude)[:-1]
thresh = max(ds.longitude.min() - diffs[diffs > 0].min() / 2, 0)
lon = gdir.cenlon + 360 if gdir.cenlon < thresh else gdir.cenlon
Copy link

Copilot AI Feb 18, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same issue as above: thresh = max(ds.longitude.min() - res/2, 0) is the wrong direction for capping at 0 and does not prevent the ERA5 near-0 longitude mis-selection. Use min(..., 0) (and cast ds.longitude.min() to float) so that small negative longitudes in (-res/2, 0) are not shifted to +360 for datasets with a 0° gridpoint.

Copilot uses AI. Check for mistakes.
try:
ds = ds.isel(time=0)
except (ValueError):
Expand All @@ -192,12 +203,19 @@ def process_ecmwf_data(gdir, dataset=None, ensemble_member=0,

if dataset == 'ERA5dr':
with xr.open_dataset(get_ecmwf_file(dataset, 'lapserates')) as ds:
# todo: lapse rate code could be removed, as it is not included in monthly_climate_file anyways ...
diffs = np.sort(ds.longitude)[1:] - np.sort(ds.longitude)[:-1]
thresh = max(ds.longitude.min() - diffs[diffs > 0].min() / 2, 0)
lon = gdir.cenlon + 360 if gdir.cenlon < thresh else gdir.cenlon
_check_ds_validity(ds)
Copy link

Copilot AI Feb 18, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In addition to the near-0 longitude issue, using Python's max() with ds.longitude.min() (an xarray scalar) can raise type/boolean ambiguity errors. Cast the minimum longitude to a Python float (e.g. float(ds.longitude.min())) before applying the threshold logic, and cap at 0 with min(..., 0) rather than max(..., 0).

Copilot uses AI. Check for mistakes.
ds = ds.sel(time=slice(f'{y0}-01-01', f'{y1}-12-01'))
ds = ds.sel(longitude=lon, latitude=lat, method='nearest')

with xr.open_dataset(get_ecmwf_file(dataset, 'tempstd')) as ds:
_check_ds_validity(ds)
diffs = np.sort(ds.longitude)[1:] - np.sort(ds.longitude)[:-1]
thresh = max(ds.longitude.min() - diffs[diffs > 0].min() / 2, 0)
lon = gdir.cenlon + 360 if gdir.cenlon < thresh else gdir.cenlon
ds = ds.sel(time=slice(f'{y0}-01-01', f'{y1}-12-01'))
Copy link

Copilot AI Feb 18, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same threshold bug here: max(..., 0) does not match the intent of “thresh should not be above 0” and can both (1) keep the ERA5 near-0 longitude mis-selection and (2) incorrectly shift positive longitudes for datasets whose minimum longitude is > 0. Prefer min(float(ds.longitude.min()) - res/2, 0).

Copilot uses AI. Check for mistakes.
ds = ds.sel(longitude=lon, latitude=lat, method='nearest')
temp_std = ds['t2m_std'].data
Expand Down
4 changes: 4 additions & 0 deletions oggm/shop/w5e5.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,10 @@ def process_gswp3_w5e5_data(gdir, y0=None, y1=None, output_filesuffix=None):
# first temperature dataset
with xr.open_dataset(path_tmp) as ds:
assert ds.longitude.min() >= 0
# check that W5E5 gridpoints do not cross the "0" line (gridpoint center should be at res/2)
diffs = np.sort(ds.longitude)[1:] - np.sort(ds.longitude)[:-1]
res_lon = diffs[diffs > 0].min()
np.testing.assert_allclose(ds.longitude.min(), res_lon / 2)
Copy link

Copilot AI Feb 18, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This adds a full sort of all longitudes and an np.testing.assert_allclose inside the per-glacier processing path. Sorting can be expensive for large flattened datasets and np.testing.* raises AssertionError in production rather than a domain-specific error. Consider moving this grid-origin/resolution check into tests (or a one-time validation/cached value), and avoid per-call sorting in process_gswp3_w5e5_data.

Suggested change
assert ds.longitude.min() >= 0
# check that W5E5 gridpoints do not cross the "0" line (gridpoint center should be at res/2)
diffs = np.sort(ds.longitude)[1:] - np.sort(ds.longitude)[:-1]
res_lon = diffs[diffs > 0].min()
np.testing.assert_allclose(ds.longitude.min(), res_lon / 2)
lon_min = float(ds.longitude.min())
if lon_min < 0:
raise InvalidParamsError(
'W5E5 longitude coordinates must be non-negative (0–360). '
f'Found minimum longitude {lon_min}.'
)

Copilot uses AI. Check for mistakes.
yrs = ds['time.year'].data
y0 = yrs[0] if y0 is None else y0
y1 = yrs[-1] if y1 is None else y1
Expand Down
30 changes: 21 additions & 9 deletions oggm/tests/test_shop.py
Original file line number Diff line number Diff line change
Expand Up @@ -310,8 +310,8 @@ def test_glacier_gridpoint_selection(self):
# downloaded for the HEF gridpoints as they would be too large otherwise.
# However, the same test and other tests are done for all files
# (also ISIMIP3b) and all glaciers in this notebook:
# https://nbviewer.org/urls/cluster.klima.uni-bremen.de/
# ~lschuster/example_ipynb/flatten_glacier_gridpoint_tests.ipynb
# https://nbviewer.org/urls/cluster.klima.uni-bremen.de/~oggm/
# climate/notebooks/flatten_glacier_gridpoint_tests.ipynb
if d == 'GSWP3_W5E5':
res = 0.5
with xr.open_dataset(w5e5.get_gswp3_w5e5_file(d, 'inv')) as dinv:
Expand All @@ -321,16 +321,28 @@ def test_glacier_gridpoint_selection(self):
with xr.open_dataset(ecmwf.get_ecmwf_file('ERA5', 'inv')) as dinv:
dinv = dinv.load()

# select five glaciers where two failed in the
diffs = np.sort(dinv.longitude)[1:] - np.sort(dinv.longitude)[:-1]
np.testing.assert_allclose(diffs[diffs > 0].min(), res)
# previous gswp3_w5e5 version, and two are only necessary for RGI7
for coord in [(10.7584, 46.8003), # HEF
(-70.8931 + 360, -72.4474), # RGI60-19.00124
(-70.8931, -72.4474), # RGI60-19.00124
(51.495, 30.9010), # RGI60-12.01691
(0, 0), # random gridpoint not near to a glacier
( -141.670274+360, 69.166921), # in RGI7C, not in RGI6
(-66.855668+360, -67.535551) # only in RGI7G, not in RGI6 or in RGI 7C
( -141.670274, 69.166921), # in RGI7C, not in RGI6
(-66.855668, -67.535551), # only in RGI7G, not in RGI6 or in RGI 7C
(-0.039683, 42.695419), ## glacier near 0
(-179.915527 + 360, 66.276108) # RGI60-10.05049 (near -180 longitude)
]:
lon, lat = coord
# for ERA5, all glaciers with longitudes (-0.125, 0.125) should take the longitude 0 data
# for glaciers between (-0.125 and 0) longitude, we do not want to transform to +360!!!
lon = lon + 360 if lon < dinv.longitude.min() - res / 2 else lon
if d == 'GSWP3_W5E5':
# in case of GSWP3_W5E5, all glaciers with longitudes (0,1) will use the 0.5 gridpoint,
# and those within (-1,0) use the 359.5 point
# therefore, we only have the 0,360 transformation issue for ERA5
np.testing.assert_allclose(dinv.longitude.min() - res / 2, 0)

# get the distances to the glacier coordinate
c = (dinv.longitude - lon) ** 2 + (dinv.latitude - lat) ** 2
c = c.to_dataframe('distance').sort_values('distance')
Expand All @@ -349,8 +361,8 @@ def test_glacier_gridpoint_selection(self):
assert np.abs(lon_near - lon) <= res/2
assert dist <= ((res/2) ** 2 + (res/2) ** 2) ** 0.5

# this only contains data for five glaciers, let's still check some basics
# both glaciers are not at latitude or longitude 0
# this only contains data for a few glaciers, let's still check some basics
# both glaciers are not at latitude or longitude 0, but for one glacier we need to pick longitude 0
if d == 'GSWP3_W5E5':
with xr.open_dataset(w5e5.get_gswp3_w5e5_file(d, 'temp_std')) as dtemp_std:
assert np.all(dtemp_std.latitude != 0)
Expand All @@ -361,7 +373,7 @@ def test_glacier_gridpoint_selection(self):
else:
with xr.open_dataset(ecmwf.get_ecmwf_file('ERA5', 'tmp')) as dtemp:
assert np.all(dtemp.latitude != 0)
assert np.all(dtemp.longitude != 0)
assert np.all(dtemp.longitude >= 0)
assert dtemp.isel(time=0).t2m.std() > 0
assert dtemp.longitude.std() > 0
assert dtemp.latitude.std() > 0
Expand Down
Loading