diff --git a/CHANGES.md b/CHANGES.md index 9c10486d6..3922075a6 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -2,7 +2,27 @@ * Changed the `xcube gen` tool to extract metadata for pre-sorting inputs from other than NetCDF inputs, e.g. GeoTIFF. - +* Cube generator `xcube gen2` allows to use temporal resampling. To use it, + a user must set the parameter `time_period` (in a pandas-interpretable + pattern, e.g., '4D') and the newly introduced parameter `temporal_resampling`, + to which a dictionary with entries for upsampling and/or downsampling can be + passed. Upsampling and downsampling can be used with or without parameters, + depending on the selected method. To sample down to a broader temporal + resolution, you need to specify a downsampling method (any of `['count', + 'first', 'last', 'min', 'max', 'sum', 'prod', 'mean', 'median', 'std', 'var', + 'percentile']`). If you also want to add parameters, you can pass a tuple + consisting of the method name and a dictionary with parameters. Analogously, + you can sample up to a finer temporal resolution using any of `['asfreq', + 'ffill', 'bfill', 'pad', 'nearest', 'interpolate']`. + Example: + ```python + temporal_resampling=dict( + downsampling=('percentile', {'threshold': 75}), + upsampling='pad' + ), + ``` +(#523) + ## Changes in 0.9.2 ### Fixes diff --git a/test/core/gen2/local/test_resamplert.py b/test/core/gen2/local/test_resamplert.py new file mode 100644 index 000000000..343a18582 --- /dev/null +++ b/test/core/gen2/local/test_resamplert.py @@ -0,0 +1,293 @@ +from xcube.core.new import new_cube +from xcube.core.gen2 import CubeConfig +from xcube.core.gen2.local.resamplert import CubeResamplerT +from xcube.core.gridmapping import GridMapping + +import cftime +import numpy as np +import unittest + + +class CubeResamplerTTest(unittest.TestCase): + + @staticmethod + def _get_cube(time_freq: str, time_periods: int, use_cftime: bool = False): + + def b3(index1, index2, index3): + return index1 + index2 * 0.1 + index3 * 0.01 + + return new_cube(variables=dict(B03=b3), + time_periods=time_periods, + time_freq=time_freq, + use_cftime=use_cftime, + time_dtype='datetime64[s]' if not use_cftime else None, + width=10, height=5, time_start='2010-08-04') + + def test_transform_cube_no_time_period(self): + cube_config = CubeConfig(time_range=('2010-01-01', '2012-12-31')) + temporal_resampler = CubeResamplerT() + + cube = self._get_cube(time_freq='M', time_periods=12) + + resampled_cube, grid_mapping, cube_config = temporal_resampler.\ + transform_cube(cube, + GridMapping.from_dataset(cube), + cube_config) + self.assertEqual(cube, resampled_cube) + + def test_transform_cube_downsample_to_years(self): + cube_config = CubeConfig(time_range=('2010-01-01', '2014-12-31'), + time_period='2Y', + temporal_resampling=dict( + downsampling=('min', {})) + ) + temporal_resampler = CubeResamplerT() + + cube = self._get_cube(time_freq='M', time_periods=24) + + resampled_cube, grid_mapping, cube_config = temporal_resampler.\ + transform_cube(cube, + GridMapping.from_dataset(cube), + cube_config) + self.assertIsNotNone(resampled_cube) + np.testing.assert_equal( + resampled_cube.time.values, + np.array(['2011-01-01T00:00:00', '2013-01-01T00:00:00'], + dtype=np.datetime64)) + np.testing.assert_equal( + resampled_cube.time_bnds.values, + np.array([['2010-01-01T00:00:00', '2012-01-01T00:00:00'], + ['2012-01-01T00:00:00', '2014-01-01T00:00:00']], + dtype=np.datetime64)) + self.assertEqual((2, 5, 10), resampled_cube.B03.shape) + self.assertAlmostEqual(0.0, resampled_cube.B03[0].values.min(), 8) + self.assertAlmostEqual(16.0, resampled_cube.B03[1].values.min(), 8) + + def test_transform_cube_downsample_to_months(self): + cube_config = CubeConfig(time_range=('2010-08-01', '2010-11-30'), + time_period='2M', + temporal_resampling=dict( + downsampling=('min', {})) + ) + temporal_resampler = CubeResamplerT() + + cube = self._get_cube(time_freq='W', time_periods=12) + + resampled_cube, grid_mapping, cube_config = temporal_resampler.\ + transform_cube(cube, + GridMapping.from_dataset(cube), + cube_config) + self.assertIsNotNone(resampled_cube) + np.testing.assert_equal( + resampled_cube.time.values, + np.array(['2010-09-01T00:00:00', '2010-11-01T00:00:00'], + dtype=np.datetime64)) + np.testing.assert_equal( + resampled_cube.time_bnds.values, + np.array([['2010-08-01T00:00:00', '2010-10-01T00:00:00'], + ['2010-10-01T00:00:00', '2010-12-01T00:00:00']], + dtype=np.datetime64)) + self.assertEqual((2, 5, 10), resampled_cube.B03.shape) + self.assertAlmostEqual(0.0, resampled_cube.B03[0].values.min(), 8) + self.assertAlmostEqual(8.0, resampled_cube.B03[1].values.min(), 8) + + def test_transform_cube_downsample_to_weeks(self): + cube_config = CubeConfig(time_range=('2010-08-03', '2010-09-10'), + time_period='2W', + temporal_resampling=dict( + downsampling=('max', {})) + ) + temporal_resampler = CubeResamplerT() + + cube = self._get_cube(time_freq='D', time_periods=32) + + resampled_cube, grid_mapping, cube_config = temporal_resampler.\ + transform_cube(cube, + GridMapping.from_dataset(cube), + cube_config) + self.assertIsNotNone(resampled_cube) + np.testing.assert_equal( + resampled_cube.time.values, + np.array(['2010-08-08T00:00:00', '2010-08-22T00:00:00', + '2010-09-05T00:00:00'], + dtype=np.datetime64)) + np.testing.assert_equal( + resampled_cube.time_bnds.values, + np.array([['2010-08-01T00:00:00', '2010-08-15T00:00:00'], + ['2010-08-15T00:00:00', '2010-08-29T00:00:00'], + ['2010-08-29T00:00:00', '2010-09-12T00:00:00']], + dtype=np.datetime64)) + self.assertEqual((3, 5, 10), resampled_cube.B03.shape) + self.assertAlmostEqual(10.0, resampled_cube.B03[0].values.min(), 8) + self.assertAlmostEqual(24.0, resampled_cube.B03[1].values.min(), 8) + self.assertAlmostEqual(31.0, resampled_cube.B03[2].values.min(), 8) + + def test_transform_cube_upsample_to_months(self): + cube_config = CubeConfig(time_range=('2011-10-01', '2012-03-31'), + time_period='2M', + temporal_resampling=dict( + upsampling=('interpolate', + {'kind': 'linear'}) + )) + temporal_resampler = CubeResamplerT() + + cube = self._get_cube(time_freq='Y', time_periods=2) + + resampled_cube, grid_mapping, cube_config = temporal_resampler.\ + transform_cube(cube, + GridMapping.from_dataset(cube), + cube_config) + self.assertIsNotNone(resampled_cube) + np.testing.assert_equal( + resampled_cube.time.values, + np.array(['2011-11-01T00:00:00', '2012-01-01T00:00:00', + '2012-03-01T00:00:00'], + dtype=np.datetime64)) + np.testing.assert_equal( + resampled_cube.time_bnds.values, + np.array([['2011-10-01T00:00:00', '2011-12-01T00:00:00'], + ['2011-12-01T00:00:00', '2012-02-01T00:00:00'], + ['2012-02-01T00:00:00', '2012-04-01T00:00:00']], + dtype=np.datetime64)) + self.assertEqual((3, 5, 10), resampled_cube.B03.shape) + self.assertAlmostEqual(0.33561644, + resampled_cube.B03[0].values.min(), 8) + self.assertAlmostEqual(0.50273973, + resampled_cube.B03[1].values.min(), 8) + self.assertAlmostEqual(0.66712329, + resampled_cube.B03[2].values.min(), 8) + + def test_transform_cube_upsample_to_weeks(self): + cube_config = CubeConfig(time_range=('2010-09-01', '2010-10-10'), + time_period='4W', + temporal_resampling=dict( + upsampling=('nearest', {})) + ) + temporal_resampler = CubeResamplerT() + + cube = self._get_cube(time_freq='M', time_periods=4) + + resampled_cube, grid_mapping, cube_config = temporal_resampler.\ + transform_cube(cube, + GridMapping.from_dataset(cube), + cube_config) + self.assertIsNotNone(resampled_cube) + np.testing.assert_equal( + resampled_cube.time.values, + np.array(['2010-09-12T00:00:00', '2010-10-10T00:00:00'], + dtype=np.datetime64)) + np.testing.assert_equal( + resampled_cube.time_bnds.values, + np.array([['2010-08-29T00:00:00', '2010-09-26T00:00:00'], + ['2010-09-26T00:00:00', '2010-10-24T00:00:00']], + dtype=np.datetime64)) + self.assertEqual((2, 5, 10), resampled_cube.B03.shape) + self.assertAlmostEqual(0.0, resampled_cube.B03[0].values.min(), 8) + self.assertAlmostEqual(1.0, resampled_cube.B03[1].values.min(), 8) + + def test_transform_cube_upsample_to_days(self): + cube_config = CubeConfig(time_range=('2010-08-14', '2010-08-24'), + time_period='2D', + temporal_resampling=dict( + upsampling=('interpolate', + {'kind': 'linear'}) + )) + temporal_resampler = CubeResamplerT() + + cube = self._get_cube(time_freq='W', time_periods=3) + + resampled_cube, grid_mapping, cube_config = temporal_resampler.\ + transform_cube(cube, + GridMapping.from_dataset(cube), + cube_config) + self.assertIsNotNone(resampled_cube) + np.testing.assert_equal( + resampled_cube.time.values, + np.array(['2010-08-15T00:00:00', '2010-08-17T00:00:00', + '2010-08-19T00:00:00', '2010-08-21T00:00:00', + '2010-08-23T00:00:00'], + dtype=np.datetime64)) + np.testing.assert_equal( + resampled_cube.time_bnds.values, + np.array([['2010-08-14T00:00:00', '2010-08-16T00:00:00'], + ['2010-08-16T00:00:00', '2010-08-18T00:00:00'], + ['2010-08-18T00:00:00', '2010-08-20T00:00:00'], + ['2010-08-20T00:00:00', '2010-08-22T00:00:00'], + ['2010-08-22T00:00:00', '2010-08-24T00:00:00']], + dtype=np.datetime64)) + self.assertEqual((5, 5, 10), resampled_cube.B03.shape) + self.assertAlmostEqual(0.5, + resampled_cube.B03[0].values.min(), 8) + self.assertAlmostEqual(0.78571429, + resampled_cube.B03[1].values.min(), 8) + self.assertAlmostEqual(1.07142857, + resampled_cube.B03[2].values.min(), 8) + self.assertAlmostEqual(1.35714286, + resampled_cube.B03[3].values.min(), 8) + self.assertAlmostEqual(1.64285714, + resampled_cube.B03[4].values.min(), 8) + + def test_transform_cube_downsample_to_years_cftimes(self): + cube_config = CubeConfig(time_range=('2010-01-01', '2014-12-31'), + time_period='2Y', + temporal_resampling=dict( + downsampling=('min', {})) + ) + temporal_resampler = CubeResamplerT() + + cube = self._get_cube(time_freq='M', time_periods=24, use_cftime=True) + + resampled_cube, grid_mapping, cube_config = temporal_resampler.\ + transform_cube(cube, + GridMapping.from_dataset(cube), + cube_config) + self.assertIsNotNone(resampled_cube) + np.testing.assert_equal(resampled_cube.time.values, + [cftime.DatetimeProlepticGregorian(2011, 1, 1), + cftime.DatetimeProlepticGregorian(2013, 1, 1)]) + np.testing.assert_equal( + resampled_cube.time_bnds.values, + [[cftime.DatetimeProlepticGregorian(2010, 1, 1), + cftime.DatetimeProlepticGregorian(2012, 1, 1)], + [cftime.DatetimeProlepticGregorian(2012, 1, 1), + cftime.DatetimeProlepticGregorian(2014, 1, 1)]]) + self.assertEqual((2, 5, 10), resampled_cube.B03.shape) + self.assertAlmostEqual(0.0, resampled_cube.B03[0].values.min(), 8) + self.assertAlmostEqual(16.0, resampled_cube.B03[1].values.min(), 8) + + def test_transform_cube_upsample_to_months_cftimes(self): + cube_config = CubeConfig(time_range=('2011-10-01', '2012-03-31'), + time_period='2M', + temporal_resampling=dict( + upsampling=('interpolate', + {'kind': 'linear'}) + )) + temporal_resampler = CubeResamplerT() + + cube = self._get_cube(time_freq='Y', time_periods=2, use_cftime=True) + + resampled_cube, grid_mapping, cube_config = temporal_resampler.\ + transform_cube(cube, + GridMapping.from_dataset(cube), + cube_config) + self.assertIsNotNone(resampled_cube) + np.testing.assert_equal( + resampled_cube.time.values, + [cftime.DatetimeProlepticGregorian(2011, 11, 1), + cftime.DatetimeProlepticGregorian(2012, 1, 1), + cftime.DatetimeProlepticGregorian(2012, 3, 1)]) + np.testing.assert_equal( + resampled_cube.time_bnds.values, + [[cftime.DatetimeProlepticGregorian(2011, 10, 1), + cftime.DatetimeProlepticGregorian(2011, 12, 1)], + [cftime.DatetimeProlepticGregorian(2011, 12, 1), + cftime.DatetimeProlepticGregorian(2012, 2, 1)], + [cftime.DatetimeProlepticGregorian(2012, 2, 1), + cftime.DatetimeProlepticGregorian(2012, 4, 1)]]) + self.assertEqual((3, 5, 10), resampled_cube.B03.shape) + self.assertAlmostEqual(0.33561644, + resampled_cube.B03[0].values.min(), 8) + self.assertAlmostEqual(0.50273973, + resampled_cube.B03[1].values.min(), 8) + self.assertAlmostEqual(0.66712329, + resampled_cube.B03[2].values.min(), 8) diff --git a/test/core/gen2/test_config.py b/test/core/gen2/test_config.py index 35e015b8d..8600c7349 100644 --- a/test/core/gen2/test_config.py +++ b/test/core/gen2/test_config.py @@ -56,6 +56,9 @@ def test_from_dict(self): spatial_res=0.05, time_range=['2018-01-01', None], time_period='4D', + temporal_resampling=dict( + upsampling=('interpolate', {'kind': 'slinear'}) + ), metadata=dict(title='S2L2A subset'), variable_metadata=dict( B03=dict(long_name='Band 3'), @@ -69,6 +72,8 @@ def test_from_dict(self): self.assertEqual(0.05, cube_config.spatial_res) self.assertEqual(('2018-01-01', None), cube_config.time_range) self.assertEqual('4D', cube_config.time_period) + self.assertEqual(dict(upsampling=('interpolate', {'kind': 'slinear'})), + cube_config.temporal_resampling) self.assertEqual(dict(title='S2L2A subset'), cube_config.metadata) self.assertEqual( @@ -86,6 +91,10 @@ def test_to_dict(self): spatial_res=0.05, time_range=['2018-01-01', None], time_period='4D', + temporal_resampling=dict( + downsampling=('percentile', {'threshold': 75}), + upsampling='pad' + ), metadata=dict(title='S2L2A subset'), variable_metadata=dict( B03=dict(long_name='Band 3'), diff --git a/test/core/gen2/test_request.py b/test/core/gen2/test_request.py index c3ed5e9d8..520a14fe0 100644 --- a/test/core/gen2/test_request.py +++ b/test/core/gen2/test_request.py @@ -49,7 +49,11 @@ def test_from_dict(self): bbox=[12.2, 52.1, 13.9, 54.8], spatial_res=0.05, time_range=['2018-01-01', None], - time_period='4D'), + time_period='4D', + temporal_resampling=dict( + upsampling=('interpolate', + {'kind': 'slinear'})) + ), output_config=dict(store_id='memory', data_id='CHL') ) @@ -69,6 +73,8 @@ def test_from_dict(self): self.assertEqual(0.05, gen_config.cube_config.spatial_res) self.assertEqual(('2018-01-01', None), gen_config.cube_config.time_range) self.assertEqual('4D', gen_config.cube_config.time_period) + self.assertEqual(dict(upsampling=('interpolate', {'kind': 'slinear'})), + gen_config.cube_config.temporal_resampling) def test_to_dict(self): expected_dict = dict( @@ -79,7 +85,11 @@ def test_to_dict(self): bbox=[12.2, 52.1, 13.9, 54.8], spatial_res=0.05, time_range=['2018-01-01', None], - time_period='4D'), + time_period='4D', + temporal_resampling=dict( + downsampling=('percentile', {'threshold': 70}), + upsampling='pad' + )), output_config=dict(store_id='memory', replace=False, data_id='CHL') diff --git a/test/core/resampling/test_temporal.py b/test/core/resampling/test_temporal.py index 28d4ffb9f..43b97df75 100644 --- a/test/core/resampling/test_temporal.py +++ b/test/core/resampling/test_temporal.py @@ -36,28 +36,46 @@ def test_resample_in_time_min_max(self): self.assertIn('precipitation_min', resampled_cube) self.assertIn('precipitation_max', resampled_cube) self.assertEqual(('time',), resampled_cube.time.dims) - self.assertEqual(('time', 'lat', 'lon'), resampled_cube.temperature_min.dims) - self.assertEqual(('time', 'lat', 'lon'), resampled_cube.temperature_max.dims) - self.assertEqual(('time', 'lat', 'lon'), resampled_cube.precipitation_min.dims) - self.assertEqual(('time', 'lat', 'lon'), resampled_cube.precipitation_max.dims) + self.assertEqual(('time', 'lat', 'lon'), + resampled_cube.temperature_min.dims) + self.assertEqual(('time', 'lat', 'lon'), + resampled_cube.temperature_max.dims) + self.assertEqual(('time', 'lat', 'lon'), + resampled_cube.precipitation_min.dims) + self.assertEqual(('time', 'lat', 'lon'), + resampled_cube.precipitation_max.dims) self.assertEqual((6,), resampled_cube.time.shape) self.assertEqual((6, 180, 360), resampled_cube.temperature_min.shape) self.assertEqual((6, 180, 360), resampled_cube.temperature_max.shape) self.assertEqual((6, 180, 360), resampled_cube.precipitation_min.shape) self.assertEqual((6, 180, 360), resampled_cube.precipitation_max.shape) - np.testing.assert_equal(resampled_cube.time.values, - np.array( - ['2017-06-25T00:00:00', '2017-07-09T00:00:00', - '2017-07-23T00:00:00', '2017-08-06T00:00:00', - '2017-08-20T00:00:00', '2017-09-03T00:00:00'], dtype=np.datetime64)) - np.testing.assert_allclose(resampled_cube.temperature_min.values[..., 0, 0], - np.array([272.0, 272.4, 273.0, 273.8, 274.4, 274.9])) - np.testing.assert_allclose(resampled_cube.temperature_max.values[..., 0, 0], - np.array([272.3, 272.9, 273.7, 274.3, 274.8, 274.9])) - np.testing.assert_allclose(resampled_cube.precipitation_min.values[..., 0, 0], - np.array([119.4, 118.2, 116.6, 115.4, 114.4, 114.2])) - np.testing.assert_allclose(resampled_cube.precipitation_max.values[..., 0, 0], - np.array([120.0, 119.2, 118.0, 116.4, 115.2, 114.2])) + np.testing.assert_equal( + resampled_cube.time.values, + np.array(['2017-07-02T00:00:00', '2017-07-16T00:00:00', + '2017-07-30T00:00:00', '2017-08-13T00:00:00', + '2017-08-27T00:00:00', '2017-09-10T00:00:00'], + dtype=np.datetime64)) + np.testing.assert_equal( + resampled_cube.time_bnds.values, + np.array([['2017-06-25T00:00:00', '2017-07-09T00:00:00'], + ['2017-07-09T00:00:00', '2017-07-23T00:00:00'], + ['2017-07-23T00:00:00', '2017-08-06T00:00:00'], + ['2017-08-06T00:00:00', '2017-08-20T00:00:00'], + ['2017-08-20T00:00:00', '2017-09-03T00:00:00'], + ['2017-09-03T00:00:00', '2017-09-17T00:00:00']], + dtype=np.datetime64)) + np.testing.assert_allclose( + resampled_cube.temperature_min.values[..., 0, 0], + np.array([272.0, 272.4, 273.0, 273.8, 274.4, 274.9])) + np.testing.assert_allclose( + resampled_cube.temperature_max.values[..., 0, 0], + np.array([272.3, 272.9, 273.7, 274.3, 274.8, 274.9])) + np.testing.assert_allclose( + resampled_cube.precipitation_min.values[..., 0, 0], + np.array([119.4, 118.2, 116.6, 115.4, 114.4, 114.2])) + np.testing.assert_allclose( + resampled_cube.precipitation_max.values[..., 0, 0], + np.array([120.0, 119.2, 118.0, 116.4, 115.2, 114.2])) schema = CubeSchema.new(resampled_cube) self.assertEqual(3, schema.ndim) @@ -144,3 +162,17 @@ def test_resample_f_all(self): self.assertEqual(3, schema.ndim) self.assertEqual(('time', 'lat', 'lon'), schema.dims) self.assertEqual((1, 180, 360), schema.shape) + + def test_resample_in_time_resample_to_quarter(self): + resampled_cube = resample_in_time(self.input_cube, '1Q', ['min']) + self.assertIsNot(resampled_cube, self.input_cube) + self.assertIn('time', resampled_cube) + self.assertEqual(1, resampled_cube.time.size) + self.assertEqual(np.datetime64('2017-08-16T00:00:00'), + resampled_cube.time[0].values) + self.assertIn('time_bnds', resampled_cube) + self.assertEqual((1, 2), resampled_cube.time_bnds.shape) + self.assertEqual(np.datetime64('2017-07-01'), + resampled_cube.time_bnds[0, 0].values) + self.assertEqual(np.datetime64('2017-10-01'), + resampled_cube.time_bnds[0, 1].values) diff --git a/test/util/test_jsonschema.py b/test/util/test_jsonschema.py index a77587306..c0c57c2cc 100644 --- a/test/util/test_jsonschema.py +++ b/test/util/test_jsonschema.py @@ -1,5 +1,6 @@ import unittest from collections import namedtuple +from jsonschema import ValidationError from typing import Dict, Any from xcube.util.jsonschema import JsonArraySchema @@ -47,6 +48,66 @@ def test_to_dict(self): JsonComplexSchema(all_of=[JsonIntegerSchema(multiple_of=5), JsonIntegerSchema(multiple_of=3)]).to_dict()) + def test_to_instance_one_of(self): + schema = JsonComplexSchema(one_of=[JsonIntegerSchema(multiple_of=5), + JsonIntegerSchema(multiple_of=3)]) + self.assertEqual(5, schema.to_instance(5)) + self.assertEqual(6, schema.to_instance(6)) + with self.assertRaises(ValidationError) as cm: + schema.to_instance(7) + with self.assertRaises(ValidationError) as cm: + schema.to_instance(15) + + def test_to_instance_any_of(self): + schema = JsonComplexSchema(any_of=[JsonIntegerSchema(multiple_of=5), + JsonIntegerSchema(multiple_of=3)]) + self.assertEqual(5, schema.to_instance(5)) + self.assertEqual(6, schema.to_instance(6)) + self.assertEqual(15, schema.to_instance(15)) + with self.assertRaises(ValidationError) as cm: + schema.to_instance(7) + + def test_to_instance_all_of(self): + schema = JsonComplexSchema(all_of=[JsonIntegerSchema(multiple_of=5), + JsonIntegerSchema(multiple_of=3)]) + self.assertEqual(15, schema.to_instance(15)) + with self.assertRaises(ValidationError) as cm: + schema.to_instance(5) + with self.assertRaises(ValidationError) as cm: + schema.to_instance(6) + with self.assertRaises(ValidationError) as cm: + schema.to_instance(7) + + def test_from_instance_one_of(self): + schema = JsonComplexSchema(one_of=[JsonIntegerSchema(multiple_of=5), + JsonIntegerSchema(multiple_of=3)]) + self.assertEqual(5, schema.from_instance(5)) + self.assertEqual(6, schema.from_instance(6)) + with self.assertRaises(ValidationError) as cm: + schema.from_instance(7) + with self.assertRaises(ValidationError) as cm: + schema.from_instance(15) + + def test_from_instance_any_of(self): + schema = JsonComplexSchema(any_of=[JsonIntegerSchema(multiple_of=5), + JsonIntegerSchema(multiple_of=3)]) + self.assertEqual(5, schema.from_instance(5)) + self.assertEqual(6, schema.from_instance(6)) + self.assertEqual(15, schema.from_instance(15)) + with self.assertRaises(ValidationError) as cm: + schema.from_instance(7) + + def test_from_instance_all_of(self): + schema = JsonComplexSchema(all_of=[JsonIntegerSchema(multiple_of=5), + JsonIntegerSchema(multiple_of=3)]) + self.assertEqual(15, schema.from_instance(15)) + with self.assertRaises(ValidationError) as cm: + schema.from_instance(5) + with self.assertRaises(ValidationError) as cm: + schema.from_instance(6) + with self.assertRaises(ValidationError) as cm: + schema.from_instance(7) + class JsonSimpleSchemaTest(unittest.TestCase): diff --git a/xcube/cli/resample.py b/xcube/cli/resample.py index fa9ba6ab7..e9cf69721 100644 --- a/xcube/cli/resample.py +++ b/xcube/cli/resample.py @@ -130,7 +130,7 @@ def resample(cube, config['frequency'] = frequency if offset: config['offset'] = offset - if offset: + if base: config['base'] = base if kind: config['interp_kind'] = kind diff --git a/xcube/core/gen2/config.py b/xcube/core/gen2/config.py index 7a8489198..6bedc45db 100644 --- a/xcube/core/gen2/config.py +++ b/xcube/core/gen2/config.py @@ -25,11 +25,14 @@ import pyproj +from xcube.core.resampling.temporal import DOWNSAMPLING_METHODS +from xcube.core.resampling.temporal import UPSAMPLING_METHODS from xcube.util.assertions import assert_given from xcube.util.assertions import assert_instance from xcube.util.assertions import assert_true from xcube.util.jsonschema import JsonArraySchema from xcube.util.jsonschema import JsonBooleanSchema +from xcube.util.jsonschema import JsonComplexSchema from xcube.util.jsonschema import JsonDateSchema from xcube.util.jsonschema import JsonIntegerSchema from xcube.util.jsonschema import JsonNumberSchema @@ -142,6 +145,8 @@ def __init__(self, tile_size: Union[int, Tuple[int, int]] = None, time_range: Tuple[str, Optional[str]] = None, time_period: str = None, + temporal_resampling: + Mapping[str, Union[str, Tuple[str, Mapping[str, Any]]]] = None, chunks: Mapping[str, Optional[int]] = None, metadata: Mapping[str, Any] = None, variable_metadata: Mapping[str, Mapping[str, Any]] = None,): @@ -196,6 +201,25 @@ def __init__(self, assert_instance(time_period, str, 'time_period') self.time_period = time_period + self.temporal_resampling = None + if temporal_resampling is not None: + assert_instance(temporal_resampling, collections.Mapping, + 'temporal_resampling') + for resampling_direction, resampling_method \ + in temporal_resampling.items(): + assert_instance(resampling_direction, str, + 'resampling type name') + if not isinstance(resampling_method, str): + assert_true(len(resampling_method) == 2, + 'Resampling method must consist of a method ' + 'and a dictionary with additional parameters') + assert_instance(resampling_method[0], str) + assert_instance(resampling_method[1], collections.Mapping, + 'resampling params') + temporal_resampling[resampling_direction] = \ + tuple(resampling_method) + self.temporal_resampling = temporal_resampling + self.chunks = None if chunks is not None: assert_instance(chunks, collections.Mapping, 'chunks') @@ -271,6 +295,43 @@ def get_schema(cls): nullable=True, pattern=r'^([1-9][0-9]*)?[DWMY]$' ), + temporal_resampling=JsonObjectSchema( + nullable=True, + properties=dict( + upsampling=JsonComplexSchema( + one_of=[ + JsonArraySchema( + nullable=True, + items=[ + JsonStringSchema( + enum=UPSAMPLING_METHODS + ), + JsonObjectSchema( + additional_properties=True + ) + ] + ), + JsonStringSchema(enum=UPSAMPLING_METHODS) + ] + ), + downsampling=JsonComplexSchema( + one_of=[ + JsonArraySchema( + nullable=True, + items=[ + JsonStringSchema( + enum=DOWNSAMPLING_METHODS + ), + JsonObjectSchema( + additional_properties=True + ) + ] + ), + JsonStringSchema(enum=DOWNSAMPLING_METHODS) + ] + ) + ) + ), chunks=JsonObjectSchema( nullable=True, additional_properties=JsonIntegerSchema(nullable=True, diff --git a/xcube/core/gen2/local/resamplert.py b/xcube/core/gen2/local/resamplert.py index 25b573641..57da9105e 100644 --- a/xcube/core/gen2/local/resamplert.py +++ b/xcube/core/gen2/local/resamplert.py @@ -19,12 +19,28 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. +import cftime +import numpy as np +import pandas as pd import xarray as xr from xcube.core.gridmapping import GridMapping +from xcube.core.resampling import resample_in_time +from xcube.core.resampling.temporal import adjust_metadata_and_chunking +from xcube.core.resampling.temporal import INTERPOLATION_KINDS +from xcube.core.timecoord import get_time_range_from_data from .transformer import CubeTransformer from .transformer import TransformedCube from ..config import CubeConfig +from ..error import CubeGeneratorError + +MIN_MAX_DELTAS = dict( + H=(1, 1, 'H'), + D=(1, 1, 'D'), + W=(7, 7, 'D'), + M=(28, 31, 'D'), + Y=(365, 366, 'D') +) class CubeResamplerT(CubeTransformer): @@ -33,5 +49,242 @@ def transform_cube(self, cube: xr.Dataset, gm: GridMapping, cube_config: CubeConfig) -> TransformedCube: - # TODO (forman): implement me - return cube, gm, cube_config + to_drop = [] + if cube_config.time_range is not None: + start_time, end_time = cube_config.time_range + to_drop.append('time_range') + else: + start_time, end_time = \ + get_time_range_from_data(cube, maybe_consider_metadata=False) + if cube_config.time_period is None: + resampled_cube = cube + else: + to_drop.append('time_period') + time_resample_params = dict() + time_resample_params['frequency'] = cube_config.time_period + time_resample_params['method'] = 'first' + import re + time_unit = re.findall('[A-Z]+', cube_config.time_period)[0] + time_frequency = int(cube_config.time_period.split(time_unit)[0]) + if time_unit in ['H', 'D']: + if start_time is not None: + start_time_as_datetime = pd.to_datetime(start_time) + dataset_start_time = pd.Timestamp(cube.time[0].values) + time_delta = _normalize_time(dataset_start_time) \ + - start_time_as_datetime + _adjust_time_resample_params(time_resample_params, + cube_config.time_period, + time_delta, + time_unit) + elif end_time is not None: + end_time_as_datetime = pd.to_datetime(end_time) + dataset_end_time = pd.Timestamp(cube.time[-1].values) + time_delta = end_time_as_datetime - \ + _normalize_time(dataset_end_time) + _adjust_time_resample_params(time_resample_params, + cube_config.time_period, + time_delta, + time_unit) + if cube_config.temporal_resampling is not None: + to_drop.append('temporal_resampling') + min_data_delta, max_data_delta = \ + get_min_max_timedeltas_from_data(cube) + min_period_delta, max_period_delta = \ + get_min_max_timedeltas_for_time_period(time_frequency, + time_unit) + if max_data_delta < min_period_delta: + if 'downsampling' not in cube_config.temporal_resampling: + raise ValueError('Data must be sampled down to a' + 'coarser temporal resolution, ' + 'but no temporal downsampling ' + 'method is set') + try: + method, method_args = \ + cube_config.temporal_resampling['downsampling'] + except ValueError: + method = cube_config.temporal_resampling['downsampling'] + method_args = {} + elif max_period_delta < min_data_delta: + if 'upsampling' not in cube_config.temporal_resampling: + raise ValueError('Data must be sampled up to a' + 'finer temporal resolution, ' + 'but no temporal upsampling ' + 'method is set') + try: + method, method_args = \ + cube_config.temporal_resampling['upsampling'] + except ValueError: + method = cube_config.temporal_resampling['upsampling'] + method_args = {} + else: + if 'downsampling' not in cube_config.temporal_resampling \ + and 'upsampling' not in \ + cube_config.temporal_resampling: + raise ValueError('Please specify a method for temporal ' + 'resampling.') + if 'downsampling' in cube_config.temporal_resampling and \ + 'upsampling' in cube_config.temporal_resampling: + raise ValueError('Cannot determine unambiguously ' + 'whether data needs to be sampled up ' + 'or down temporally. Please only ' + 'specify one method for temporal ' + 'resampling.') + try: + method, method_args = cube_config.temporal_resampling.\ + get('downsampling', + cube_config.temporal_resampling. + get('upsampling')) + except ValueError: + method = cube_config.temporal_resampling.get( + 'downsampling', + cube_config.temporal_resampling.get('upsampling')) + method_args = {} + if method == 'interpolate': + time_resample_params['method'] = method + if 'kind' not in method_args: + interpolation_kinds = \ + ', '.join(map(repr, INTERPOLATION_KINDS)) + raise ValueError(f"To use 'interpolation' as " + f"upsampling method, the " + f"interpolation kind must be set. " + f"Use any of the following: " + f"{interpolation_kinds}.") + if method_args['kind'] not in INTERPOLATION_KINDS: + interpolation_kinds = \ + ', '.join(map(repr, INTERPOLATION_KINDS)) + raise ValueError(f'Interpolation kind must be one of ' + f'the following: ' + f'{interpolation_kinds}. Was: ' + f'"{method_args["kind"]}".') + time_resample_params['interp_kind'] = method_args['kind'] + elif method == 'percentile': + if 'threshold' not in method_args: + raise ValueError(f"To use 'percentile' as " + f"downsampling method, a " + f"threshold must be set.") + method = f'percentile_{method_args["threshold"]}' + time_resample_params['method'] = method + else: + time_resample_params['method'] = method + # we set cube_asserted to true so the resampling can deal with + # cftime data + resampled_cube = resample_in_time( + cube, + rename_variables=False, + cube_asserted=True, + **time_resample_params + ) + if start_time is not None or end_time is not None: + # cut possible overlapping time steps + is_cf_time = isinstance(resampled_cube.time_bnds[0].values[0], + cftime.datetime) + if is_cf_time: + resampled_cube = _get_temporal_subset_cf(resampled_cube, + start_time, + end_time) + else: + resampled_cube = _get_temporal_subset(resampled_cube, + start_time, + end_time) + adjust_metadata_and_chunking(resampled_cube, time_chunk_size=1) + + cube_config = cube_config.drop_props(to_drop) + + return resampled_cube, gm, cube_config + + +def _adjust_time_resample_params(time_resample_params, + time_period, + time_delta, + time_unit): + period_delta = pd.Timedelta(time_period) + if time_delta > period_delta: + if time_unit == 'H': + time_resample_params['base'] = \ + time_delta.hours / period_delta.hours + elif time_unit == 'D': + time_resample_params['base'] = \ + time_delta.days / period_delta.days + + +def _get_temporal_subset_cf(resampled_cube, start_time, end_time): + data_start_index = 0 + data_end_index = resampled_cube.time.size + if start_time: + try: + data_start_index = resampled_cube.time_bnds[:, 0].to_index().\ + get_loc(start_time, method='bfill') + if isinstance(data_start_index, slice): + data_start_index = data_start_index.start + except KeyError: + pass + if end_time: + try: + data_end_index = resampled_cube.time_bnds[:, 1].to_index().\ + get_loc(end_time, method='ffill') + if isinstance(data_end_index, slice): + data_end_index = data_end_index.stop + 1 + except KeyError: + pass + return resampled_cube.isel(time=slice(data_start_index, data_end_index)) + + +def _get_temporal_subset(resampled_cube, start_time, end_time): + data_start_time = resampled_cube.time_bnds[0, 0] + data_end_time = resampled_cube.time_bnds[-1, 1] + if start_time: + try: + data_start_time = resampled_cube.time_bnds[:, 0]. \ + sel(time=start_time, method='bfill') + if data_start_time.size < 1: + data_start_time = resampled_cube.time_bnds[0, 0] + except KeyError: + pass + if end_time: + try: + data_end_time = resampled_cube.time_bnds[:, 1]. \ + sel(time=end_time, method='ffill') + if data_end_time.size < 1: + data_end_time = resampled_cube.time_bnds[-1, 1] + except KeyError: + pass + return resampled_cube.sel(time=slice(data_start_time, data_end_time)) + + +def get_min_max_timedeltas_from_data(data: xr.Dataset): + time_diff = data['time'].diff(dim=data['time'].dims[0])\ + .values.astype(np.float64) + return pd.Timedelta(min(time_diff)), pd.Timedelta(max(time_diff)) + + +def get_min_max_timedeltas_for_time_period(time_frequency: int, time_unit: str): + min_freq = MIN_MAX_DELTAS[time_unit][0] * time_frequency + max_freq = MIN_MAX_DELTAS[time_unit][1] * time_frequency + delta_unit = MIN_MAX_DELTAS[time_unit][2] + return pd.Timedelta(f'{min_freq}{delta_unit}'), \ + pd.Timedelta(f'{max_freq}{delta_unit}') + + +def _normalize_time(time, normalize_hour=True): + if normalize_hour: + return time.replace(hour=0, minute=0, second=0, microsecond=0, + nanosecond=0) + return time.replace(minute=0, second=0, microsecond=0, nanosecond=0) + + +def _get_expected_start_time(dataset_start_time, time_unit): + if time_unit == 'H': + return _normalize_time(dataset_start_time, normalize_hour=False) + if time_unit == 'D': + return _normalize_time(dataset_start_time) + if time_unit == 'W': + delta = pd.Timedelta(-dataset_start_time.day_of_week) + return _normalize_time(dataset_start_time) - delta + if time_unit == 'M': + return _normalize_time(dataset_start_time).replace(day=1) + if time_unit == 'Q': + delta = pd.Timedelta(-(dataset_start_time.month - 1) % 3) + return _normalize_time(dataset_start_time).replace(day=1) - delta + if time_unit == 'Y': + return _normalize_time(dataset_start_time).replace(month=1, day=1) + raise CubeGeneratorError(f'Unsupported time unit "{time_unit}"') diff --git a/xcube/core/resampling/temporal.py b/xcube/core/resampling/temporal.py index 454d4caea..9601c2037 100644 --- a/xcube/core/resampling/temporal.py +++ b/xcube/core/resampling/temporal.py @@ -19,15 +19,37 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. +from enum import Enum from typing import Dict, Any, Sequence, Union +import cftime +from datetime import timedelta import numpy as np import xarray as xr +from xarray.coding.cftime_offsets import Day +from xarray.coding.cftime_offsets import Hour +from xarray.coding.cftime_offsets import MonthBegin +from xarray.coding.cftime_offsets import QuarterBegin +from xarray.coding.cftime_offsets import YearBegin from xcube.core.schema import CubeSchema from xcube.core.select import select_variables_subset from xcube.core.verify import assert_cube +UPSAMPLING_METHODS = ['asfreq', 'ffill', 'bfill', 'pad', 'nearest', + 'interpolate'] +DOWNSAMPLING_METHODS = ['count', 'first', 'last', 'min', 'max', 'sum', 'prod', + 'mean', 'median', 'std', 'var', 'percentile'] +SPLINE_INTERPOLATION_KINDS = ['zero', 'slinear', 'quadratic', 'cubic'] +OTHER_INTERPOLATION_KINDS = ['linear', 'nearest', 'previous', 'next'] +INTERPOLATION_KINDS = SPLINE_INTERPOLATION_KINDS + OTHER_INTERPOLATION_KINDS + + +class Offset(Enum): + PREVIOUS = 'previous' + NONE = 'none' + NEXT = 'next' + def resample_in_time(dataset: xr.Dataset, frequency: str, @@ -39,7 +61,8 @@ def resample_in_time(dataset: xr.Dataset, time_chunk_size=None, var_names: Sequence[str] = None, metadata: Dict[str, Any] = None, - cube_asserted: bool = False) -> xr.Dataset: + cube_asserted: bool = False, + rename_variables: bool = True) -> xr.Dataset: """ Resample a dataset in the time dimension. @@ -49,7 +72,9 @@ def resample_in_time(dataset: xr.Dataset, ``'first'``, ``'last'``, ``'max'``, ``'min'``, ``'mean'``, ``'median'``, ``'percentile_

'``, - ``'std'``, ``'sum'``, ``'var'``. + ``'std'``, ``'sum'``, ``'var'``, + ``'interpolate'`` + . In value ``'percentile_

'`` is a placeholder, where ``'

'`` must be replaced by an integer percentage @@ -82,6 +107,8 @@ def resample_in_time(dataset: xr.Dataset, :param metadata: Output metadata. :param cube_asserted: If False, *cube* will be verified, otherwise it is expected to be a valid cube. + :param rename_variables: Whether the dataset's variables shall be renamed by + extending the resampling method to the original name. :return: A new xcube dataset resampled in time. """ if not cube_asserted: @@ -93,6 +120,13 @@ def resample_in_time(dataset: xr.Dataset, / np.timedelta64(1, 'D')) + 1) frequency = f'{days}D' + frequency_is_irregular = frequency.endswith('Y') or \ + frequency.endswith('M') or \ + frequency.endswith('Q') + # resample to start of period + if frequency_is_irregular: + frequency = f'{frequency}S' + if var_names: dataset = select_variables_subset(dataset, var_names) @@ -127,32 +161,209 @@ def resample_in_time(dataset: xr.Dataset, tolerance) resampled_cube = resampling_method(*method_args, **method_kwargs) - resampled_cube = resampled_cube.rename( - {var_name: f'{var_name}_{method_postfix}' - for var_name in resampled_cube.data_vars}) + if rename_variables: + resampled_cube = resampled_cube.rename( + {var_name: f'{var_name}_{method_postfix}' + for var_name in resampled_cube.data_vars}) resampled_cubes.append(resampled_cube) if len(resampled_cubes) == 1: resampled_cube = resampled_cubes[0] else: resampled_cube = xr.merge(resampled_cubes) + if method in UPSAMPLING_METHODS: + resampled_cube = _adjust_upsampled_cube(resampled_cube, + frequency, + base, + frequency_is_irregular) + else: + resampled_cube = _adjust_downsampled_cube(resampled_cube, + frequency, + base, + frequency_is_irregular) + return adjust_metadata_and_chunking(resampled_cube, + metadata=metadata, + time_chunk_size=time_chunk_size) + + +def _adjust_upsampled_cube(resampled_cube, frequency, base, frequency_is_irregular): + # Times of upsampled cube are correct, we need to determine time bounds + # Get times with negative offset + times = resampled_cube.time.values + previous_times = _get_resampled_times( + resampled_cube, frequency, 'time', Offset.PREVIOUS, base + ) + # Get centers between times and previous_times as start bounds + center_times = _get_centers_between_times( + previous_times, + times, + frequency_is_irregular, + resampled_cube + ) + # we need to add this as intermediate data array so we can retrieve + # resampled times from it + resampled_cube = resampled_cube.assign_coords( + intermediate_time=center_times + ) + stop_times = _get_resampled_times( + resampled_cube, frequency, 'intermediate_time', Offset.NEXT, base + ) + resampled_cube = resampled_cube.drop_vars('intermediate_time') + resampled_cube = _add_time_bounds_to_resampled_cube(center_times, + stop_times, + resampled_cube) + return resampled_cube + - # TODO: add time_bnds to resampled_ds - time_coverage_start = '%s' % dataset.time[0] - time_coverage_end = '%s' % dataset.time[-1] +def _adjust_downsampled_cube(resampled_cube, + frequency, + base, + frequency_is_irregular): + # times of resampled_cube are actually start bounding times. + # We need to determine times and end bounding times + start_times = resampled_cube.time.values + stop_times = _get_resampled_times( + resampled_cube, frequency, 'time', Offset.NEXT, base + ) + resampled_cube = _add_time_bounds_to_resampled_cube(start_times, + stop_times, + resampled_cube) + # Get centers between start and stop bounding times + center_times = _get_centers_between_times( + start_times, + stop_times, + frequency_is_irregular, + resampled_cube + ) + resampled_cube = resampled_cube.assign_coords(time=center_times) + return resampled_cube - resampled_cube.attrs.update(metadata or {}) + +def _get_resampled_times(cube: xr.Dataset, + frequency: str, + name_of_time_dim: str, + offset: Offset, + base=None): + if offset == Offset.PREVIOUS: + offset = _invert_frequency(frequency, + cube[name_of_time_dim].values[0]) + elif offset == Offset.NONE: + offset = None + elif offset == Offset.NEXT: + offset = frequency + args = dict(skipna=True, + closed='left', + label='left', + loffset=offset, + base=base) + args[name_of_time_dim] = frequency + return np.array(list(cube[name_of_time_dim].resample(**args).groups.keys())) + + +def _add_time_bounds_to_resampled_cube(start_times, stop_times, resampled_cube): + time_bounds = xr.DataArray( + np.array([start_times, stop_times]).transpose(), + dims=['time', 'bnds'] + ) + return resampled_cube.assign_coords( + time_bnds=time_bounds + ) + + +def _get_centers_between_times(earlier_times, + later_times, + frequency_is_irregular, + resampled_cube): + """ + Determines the center between two time arrays. + In case the frequency is irregular and the centers are close to the + beginning of a month, the centers are snapped to it + """ + time_deltas = later_times - earlier_times + center_times = later_times - time_deltas * 0.5 + if frequency_is_irregular: + # In case of 'M', 'Q' or 'Y' frequencies, add a small time delta + # so we move a little closer to the later time + time_delta = _get_time_delta(earlier_times[0]) + center_times_plus_delta = center_times + time_delta + resampled_cube = resampled_cube.assign_coords( + intermediate_time=center_times_plus_delta + ) + # snap center times to beginnings of months when they are close + starts_of_month = _get_resampled_times( + resampled_cube, '1MS', 'intermediate_time', Offset.NONE + ) + center_time_deltas = center_times_plus_delta - starts_of_month + snapped_times = np.where(center_time_deltas < time_delta * 2, + starts_of_month, + center_times) + resampled_cube.drop_vars('intermediate_time') + return snapped_times + return center_times + + +def _get_time_delta(time_value): + if _is_cf(time_value): + return timedelta(hours=42) + return np.timedelta64(42, 'h') + + +def _invert_frequency(frequency, time_value): + if not _is_cf(time_value): + return f'-{frequency}' + if frequency.endswith('H'): + frequency_value = frequency.split('H')[0] + return Hour(-int(frequency_value)) + if frequency.endswith('D'): + frequency_value = frequency.split('D')[0] + return Day(-int(frequency_value)) + if frequency.endswith('W'): + frequency_value = frequency.split('W')[0] + return Day(-int(frequency_value) * 7) + if frequency.endswith('MS'): + frequency_value = frequency.split('MS')[0] + return MonthBegin(-int(frequency_value)) + if frequency.endswith('QS'): + frequency_value = frequency.split('QS')[0] + return QuarterBegin(-int(frequency_value)) + frequency_value = frequency.split('YS')[0] + return YearBegin(-int(frequency_value)) + + +def _is_cf(time_value): + return isinstance(time_value, cftime.datetime) + + +def adjust_metadata_and_chunking(dataset, metadata=None, time_chunk_size=None): + time_coverage_start = '%s' % dataset.time_bnds[0][0] + time_coverage_end = '%s' % dataset.time_bnds[-1][1] + + dataset.attrs.update(metadata or {}) # TODO: add other time_coverage_ attributes - resampled_cube.attrs.update(time_coverage_start=time_coverage_start, - time_coverage_end=time_coverage_end) + dataset.attrs.update(time_coverage_start=time_coverage_start, + time_coverage_end=time_coverage_end) + try: + schema = CubeSchema.new(dataset) + except ValueError: + return _adjust_chunk_sizes_without_schema(dataset, time_chunk_size) + if schema.chunks is None: + return _adjust_chunk_sizes_without_schema(dataset, time_chunk_size) - schema = CubeSchema.new(dataset) chunk_sizes = {schema.dims[i]: schema.chunks[i] for i in range(schema.ndim)} if isinstance(time_chunk_size, int) and time_chunk_size >= 0: chunk_sizes['time'] = time_chunk_size - return resampled_cube.chunk(chunk_sizes) + return dataset.chunk(chunk_sizes) + + +def _adjust_chunk_sizes_without_schema(dataset, time_chunk_size=None): + chunk_sizes = dict(dataset.chunks) + if isinstance(time_chunk_size, int) and time_chunk_size >= 0: + chunk_sizes['time'] = time_chunk_size + else: + chunk_sizes['time'] = 1 + return dataset.chunk(chunk_sizes) def get_method_kwargs(method, frequency, interp_kind, tolerance): @@ -160,10 +371,12 @@ def get_method_kwargs(method, frequency, interp_kind, tolerance): kwargs = {'kind': interp_kind or 'linear'} elif method in {'nearest', 'bfill', 'ffill', 'pad'}: kwargs = {'tolerance': tolerance or frequency} - elif method in {'first', 'last', 'sum', + elif method in {'last', 'sum', 'min', 'max', 'mean', 'median', 'std', 'var'}: kwargs = {'dim': 'time', 'keep_attrs': True, 'skipna': True} + elif method == 'first': + kwargs = {'keep_attrs': True, 'skipna': False} elif method == 'prod': kwargs = {'dim': 'time', 'skipna': True} elif method == 'count':