Skip to content

Commit 646ac66

Browse files
authored
Merge pull request #42 from dgergel/add_cmip6_model_cleanup_service
add new leapday removal service and cmip6 cleanup service
2 parents cbdf90b + 1035d99 commit 646ac66

File tree

4 files changed

+211
-2
lines changed

4 files changed

+211
-2
lines changed

dodola/cli.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,27 @@ def dodola_cli(debug):
5151
logging.basicConfig(level=loglevel)
5252

5353

54+
@dodola_cli.command(help="Clean up and standardize GCM")
55+
@click.argument("x", required=True)
56+
@click.argument("out", required=True)
57+
@click.option(
58+
"--drop-leapdays/--no-drop-leapdays",
59+
default=True,
60+
help="Whether to remove leap days",
61+
)
62+
def cleancmip6(x, out, drop_leapdays):
63+
"""Clean and standardize CMIP6 GCM to 'out'. If drop-leapdays option is set, remove leap days"""
64+
services.clean_cmip6(x, out, drop_leapdays, storage=_authenticate_storage())
65+
66+
67+
@dodola_cli.command(help="Remove leap days and update calendar")
68+
@click.argument("x", required=True)
69+
@click.argument("out", required=True)
70+
def removeleapdays(x, out):
71+
""" Remove leap days and update calendar attribute"""
72+
services.remove_leapdays(x, out, storage=_authenticate_storage())
73+
74+
5475
@dodola_cli.command(help="Bias-correct GCM on observations")
5576
@click.argument("x", required=True)
5677
@click.argument("xtrain", required=True)

dodola/core.py

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from skdownscale.pointwise_models import PointWiseDownscaler, BcsdTemperature
77
import xarray as xr
88
from xclim import sdba
9+
from xclim.core.calendar import convert_calendar
910
import xesmf as xe
1011

1112
# Break this down into a submodule(s) if needed.
@@ -117,3 +118,58 @@ def xesmf_regrid(x, domain, method, weights_path=None):
117118
filename=weights_path,
118119
)
119120
return regridder(x)
121+
122+
123+
def standardize_gcm(ds, leapday_removal=True):
124+
"""
125+
126+
Parameters
127+
----------
128+
x : xr.Dataset
129+
leapday_removal : bool, optional
130+
131+
Returns
132+
-------
133+
xr.Dataset
134+
"""
135+
dims_to_drop = []
136+
if "height" in ds.dims:
137+
dims_to_drop.append("height")
138+
if "member_id" in ds.dims:
139+
dims_to_drop.append("member_id")
140+
if "time_bnds" in ds.dims:
141+
dims_to_drop.append("time_bnds")
142+
143+
if "member_id" in ds.dims:
144+
ds_cleaned = ds.isel(member_id=0).drop(dims_to_drop)
145+
else:
146+
ds_cleaned = ds.isel.drop(dims_to_drop)
147+
148+
if leapday_removal:
149+
# if calendar is just integers, xclim cannot understand it
150+
if ds.time.dtype == "int64":
151+
ds_cleaned["time"] = xr.decode_cf(ds_cleaned).time
152+
# remove leap days and update calendar
153+
ds_noleap = xclim_remove_leapdays(ds_cleaned)
154+
155+
# rechunk, otherwise chunks are different sizes
156+
ds_out = ds_noleap.chunk(730, len(ds.lat), len(ds.lon))
157+
else:
158+
ds_out = ds_cleaned
159+
160+
return ds_out
161+
162+
163+
def xclim_remove_leapdays(ds):
164+
"""
165+
166+
Parameters
167+
----------
168+
ds : xr.Dataset
169+
170+
Returns
171+
-------
172+
xr.Dataset
173+
"""
174+
ds_noleap = convert_calendar(ds, target="noleap")
175+
return ds_noleap

dodola/services.py

Lines changed: 45 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,13 @@
55
import os
66
from tempfile import TemporaryDirectory
77
from rechunker import rechunk as rechunker_rechunk
8-
from dodola.core import apply_bias_correction, build_xesmf_weights_file, xesmf_regrid
8+
from dodola.core import (
9+
apply_bias_correction,
10+
build_xesmf_weights_file,
11+
xesmf_regrid,
12+
standardize_gcm,
13+
xclim_remove_leapdays,
14+
)
915

1016
logger = logging.getLogger(__name__)
1117

@@ -158,6 +164,44 @@ def regrid(x, out, method, storage, domain_file, weights_path=None):
158164
storage.write(out, regridded_ds)
159165

160166

167+
@log_service
168+
def clean_cmip6(x, out, leapday_removal, storage):
169+
"""Cleans and standardizes CMIP6 GCM
170+
171+
Parameters
172+
----------
173+
x : str
174+
Storage URL to input xr.Dataset that will be cleaned.
175+
out : str
176+
Storage URL to write cleaned GCM output to.
177+
leapday_removal : bool
178+
Whether or not to remove leap days.
179+
storage : dodola.repository._ZarrRepo
180+
Storage abstraction for data IO.
181+
"""
182+
ds = storage.read(x)
183+
cleaned_ds = standardize_gcm(ds, leapday_removal)
184+
storage.write(out, cleaned_ds)
185+
186+
187+
@log_service
188+
def remove_leapdays(x, out, storage):
189+
"""Removes leap days and updates calendar attribute
190+
191+
Parameters
192+
----------
193+
x : str
194+
Storage URL to input xr.Dataset that will be regridded.
195+
out : str
196+
Storage URL to write regridded output to.
197+
storage : dodola.repository._ZarrRepo
198+
Storage abstraction for data IO.
199+
"""
200+
ds = storage.read(x)
201+
noleap_ds = xclim_remove_leapdays(ds)
202+
storage.write(out, noleap_ds)
203+
204+
161205
@log_service
162206
def disaggregate(x, weights, out, repo):
163207
"""This is just an example. Please replace or delete."""

dodola/tests/test_services.py

Lines changed: 89 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,15 @@
66
import xarray as xr
77
from xesmf.data import wave_smooth
88
from xesmf.util import grid_global
9-
from dodola.services import bias_correct, build_weights, rechunk, regrid
9+
from xclim.core.calendar import convert_calendar
10+
from dodola.services import (
11+
bias_correct,
12+
build_weights,
13+
rechunk,
14+
regrid,
15+
remove_leapdays,
16+
clean_cmip6,
17+
)
1018
from dodola.repository import memory_repository
1119

1220

@@ -32,6 +40,39 @@ def _datafactory(x, start_time="1950-01-01"):
3240
return out
3341

3442

43+
def _gcmfactory(x, start_time="1950-01-01"):
44+
"""Populate xr.Dataset with synthetic GCM data for testing
45+
that includes extra dimensions and leap days to be removed.
46+
"""
47+
start_time = str(start_time)
48+
if x.ndim != 1:
49+
raise ValueError("'x' needs dim of one")
50+
51+
time = xr.cftime_range(
52+
start=start_time, freq="D", periods=len(x), calendar="standard"
53+
)
54+
55+
out = xr.Dataset(
56+
{
57+
"fakevariable": (
58+
["time", "lon", "lat", "member_id"],
59+
x[:, np.newaxis, np.newaxis, np.newaxis],
60+
)
61+
},
62+
coords={
63+
"index": time,
64+
"time": time,
65+
"lon": (["lon"], [1.0]),
66+
"lat": (["lat"], [1.0]),
67+
"member_id": (["member_id"], [1.0]),
68+
"height": (["height"], [1.0]),
69+
"time_bnds": (["time_bnds"], [1.0]),
70+
},
71+
)
72+
# out['time'] = out['time'].assign_attrs({'calendar': 'standard'})
73+
return out
74+
75+
3576
@pytest.fixture
3677
def domain_file(request):
3778
""" Creates a fake domain Dataset for testing"""
@@ -299,3 +340,50 @@ def test_regrid_weights_integration(domain_file, tmpdir):
299340
)
300341
actual_shape = fakestorage.read("an/output/path.zarr")["fakevariable"].shape
301342
assert actual_shape == expected_shape
343+
344+
345+
def test_clean_cmip6():
346+
""" Tests that cmip6 cleanup removes extra dimensions on dataset """
347+
# Setup input data
348+
n = 1500 # need over four years of daily data
349+
ts = np.sin(np.linspace(-10 * np.pi, 10 * np.pi, n)) * 0.5
350+
ds_gcm = _gcmfactory(ts, start_time="1950-01-01")
351+
352+
fakestorage = memory_repository(
353+
{
354+
"an/input/path.zarr": ds_gcm,
355+
}
356+
)
357+
358+
clean_cmip6(
359+
"an/input/path.zarr",
360+
"an/output/path.zarr",
361+
storage=fakestorage,
362+
leapday_removal=True,
363+
)
364+
ds_cleaned = fakestorage.read("an/output/path.zarr")
365+
366+
assert "height" not in ds_cleaned.dims
367+
assert "member_id" not in ds_cleaned.dims
368+
assert "time_bnds" not in ds_cleaned.dims
369+
370+
371+
def test_remove_leapdays():
372+
""" Test that leapday removal service removes leap days """
373+
# Setup input data
374+
n = 1500 # need over four years of daily data
375+
ts = np.sin(np.linspace(-10 * np.pi, 10 * np.pi, n)) * 0.5
376+
ds_leap = _gcmfactory(ts, start_time="1950-01-01")
377+
378+
fakestorage = memory_repository(
379+
{
380+
"an/input/path.zarr": ds_leap,
381+
}
382+
)
383+
384+
remove_leapdays("an/input/path.zarr", "an/output/path.zarr", storage=fakestorage)
385+
ds_noleap = fakestorage.read("an/output/path.zarr")
386+
ds_leapyear = ds_noleap.loc[dict(time=slice("1952-01-01", "1952-12-31"))]
387+
388+
# check to be sure that leap days have been removed
389+
assert len(ds_leapyear.time) == 365

0 commit comments

Comments
 (0)