66import xarray as xr
77from xesmf .data import wave_smooth
88from xesmf .util import grid_global
9- from dodola .services import bias_correct , build_weights , rechunk , regrid
9+ from xclim .core .calendar import convert_calendar
10+ from dodola .services import (
11+ bias_correct ,
12+ build_weights ,
13+ rechunk ,
14+ regrid ,
15+ remove_leapdays ,
16+ clean_cmip6 ,
17+ )
1018from dodola .repository import memory_repository
1119
1220
@@ -32,6 +40,39 @@ def _datafactory(x, start_time="1950-01-01"):
3240 return out
3341
3442
43+ def _gcmfactory (x , start_time = "1950-01-01" ):
44+ """Populate xr.Dataset with synthetic GCM data for testing
45+ that includes extra dimensions and leap days to be removed.
46+ """
47+ start_time = str (start_time )
48+ if x .ndim != 1 :
49+ raise ValueError ("'x' needs dim of one" )
50+
51+ time = xr .cftime_range (
52+ start = start_time , freq = "D" , periods = len (x ), calendar = "standard"
53+ )
54+
55+ out = xr .Dataset (
56+ {
57+ "fakevariable" : (
58+ ["time" , "lon" , "lat" , "member_id" ],
59+ x [:, np .newaxis , np .newaxis , np .newaxis ],
60+ )
61+ },
62+ coords = {
63+ "index" : time ,
64+ "time" : time ,
65+ "lon" : (["lon" ], [1.0 ]),
66+ "lat" : (["lat" ], [1.0 ]),
67+ "member_id" : (["member_id" ], [1.0 ]),
68+ "height" : (["height" ], [1.0 ]),
69+ "time_bnds" : (["time_bnds" ], [1.0 ]),
70+ },
71+ )
72+ # out['time'] = out['time'].assign_attrs({'calendar': 'standard'})
73+ return out
74+
75+
3576@pytest .fixture
3677def domain_file (request ):
3778 """ Creates a fake domain Dataset for testing"""
@@ -299,3 +340,50 @@ def test_regrid_weights_integration(domain_file, tmpdir):
299340 )
300341 actual_shape = fakestorage .read ("an/output/path.zarr" )["fakevariable" ].shape
301342 assert actual_shape == expected_shape
343+
344+
345+ def test_clean_cmip6 ():
346+ """ Tests that cmip6 cleanup removes extra dimensions on dataset """
347+ # Setup input data
348+ n = 1500 # need over four years of daily data
349+ ts = np .sin (np .linspace (- 10 * np .pi , 10 * np .pi , n )) * 0.5
350+ ds_gcm = _gcmfactory (ts , start_time = "1950-01-01" )
351+
352+ fakestorage = memory_repository (
353+ {
354+ "an/input/path.zarr" : ds_gcm ,
355+ }
356+ )
357+
358+ clean_cmip6 (
359+ "an/input/path.zarr" ,
360+ "an/output/path.zarr" ,
361+ storage = fakestorage ,
362+ leapday_removal = True ,
363+ )
364+ ds_cleaned = fakestorage .read ("an/output/path.zarr" )
365+
366+ assert "height" not in ds_cleaned .dims
367+ assert "member_id" not in ds_cleaned .dims
368+ assert "time_bnds" not in ds_cleaned .dims
369+
370+
371+ def test_remove_leapdays ():
372+ """ Test that leapday removal service removes leap days """
373+ # Setup input data
374+ n = 1500 # need over four years of daily data
375+ ts = np .sin (np .linspace (- 10 * np .pi , 10 * np .pi , n )) * 0.5
376+ ds_leap = _gcmfactory (ts , start_time = "1950-01-01" )
377+
378+ fakestorage = memory_repository (
379+ {
380+ "an/input/path.zarr" : ds_leap ,
381+ }
382+ )
383+
384+ remove_leapdays ("an/input/path.zarr" , "an/output/path.zarr" , storage = fakestorage )
385+ ds_noleap = fakestorage .read ("an/output/path.zarr" )
386+ ds_leapyear = ds_noleap .loc [dict (time = slice ("1952-01-01" , "1952-12-31" ))]
387+
388+ # check to be sure that leap days have been removed
389+ assert len (ds_leapyear .time ) == 365
0 commit comments