diff --git a/esmvalcore/preprocessor/_time.py b/esmvalcore/preprocessor/_time.py index 5aeef8d9c4..d93fe96760 100644 --- a/esmvalcore/preprocessor/_time.py +++ b/esmvalcore/preprocessor/_time.py @@ -131,9 +131,7 @@ def extract_time( f"Currently, start_year is {start_year} " f"and end_year is {end_year}." ) - raise ValueError( - msg, - ) + raise ValueError(msg) t_1 = PartialDateTime( year=start_year, @@ -282,9 +280,7 @@ def dt2str(time: PartialDateTime) -> str: f"cube time bounds {time_coord.cell(0).point} to " f"{time_coord.cell(-1).point}." ) - raise ValueError( - msg, - ) + raise ValueError(msg) return cube_slice @@ -374,12 +370,10 @@ def extract_season(cube: Cube, season: str) -> Cube: allmonths = "JFMAMJJASOND" * 2 if season not in allmonths: msg = ( - f"Unable to extract Season {season} " - f"combination of months not possible." - ) - raise ValueError( - msg, + f"Unable to extract Season '{season}': combination of months not " + f"possible" ) + raise ValueError(msg) sstart = allmonths.index(season) res_season = allmonths[sstart + len(season) : sstart + 12] seasons = [season, res_season] @@ -493,6 +487,7 @@ def hourly_statistics( cube: Cube, hours: int, operator: str = "mean", + keep_group_coordinates: bool = False, **operator_kwargs, ) -> Cube: """Compute hourly statistics. @@ -510,6 +505,9 @@ def hourly_statistics( The operation. Used to determine the :class:`iris.analysis.Aggregator` object used to calculate the statistics. Allowed options are given in :ref:`this table `. + keep_group_coordinates: + If ``True``, keep temporal group coordinates (i.e., ``hour_group``, + ``day_of_year``, ``year``) in the result. If ``False``, remove them. **operator_kwargs: Optional keyword arguments for the :class:`iris.analysis.Aggregator` object defined by `operator`. @@ -540,9 +538,10 @@ def hourly_statistics( **agg_kwargs, ) - result.remove_coord("hour_group") - result.remove_coord("day_of_year") - result.remove_coord("year") + if not keep_group_coordinates: + result.remove_coord("hour_group") + result.remove_coord("day_of_year") + result.remove_coord("year") return result @@ -551,6 +550,7 @@ def hourly_statistics( def daily_statistics( cube: Cube, operator: str = "mean", + keep_group_coordinates: bool = False, **operator_kwargs, ) -> Cube: """Compute daily statistics. @@ -565,6 +565,9 @@ def daily_statistics( The operation. Used to determine the :class:`iris.analysis.Aggregator` object used to calculate the statistics. Allowed options are given in :ref:`this table `. + keep_group_coordinates: + If ``True``, keep temporal group coordinates (i.e., ``day_of_year``, + ``year``) in the result. If ``False``, remove them. **operator_kwargs: Optional keyword arguments for the :class:`iris.analysis.Aggregator` object defined by `operator`. @@ -583,8 +586,10 @@ def daily_statistics( with ignore_iris_vague_metadata_warnings(): result = cube.aggregated_by(["day_of_year", "year"], agg, **agg_kwargs) - result.remove_coord("day_of_year") - result.remove_coord("year") + if not keep_group_coordinates: + result.remove_coord("day_of_year") + result.remove_coord("year") + return result @@ -592,6 +597,7 @@ def daily_statistics( def monthly_statistics( cube: Cube, operator: str = "mean", + keep_group_coordinates: bool = True, **operator_kwargs, ) -> Cube: """Compute monthly statistics. @@ -606,6 +612,9 @@ def monthly_statistics( The operation. Used to determine the :class:`iris.analysis.Aggregator` object used to calculate the statistics. Allowed options are given in :ref:`this table `. + keep_group_coordinates: + If ``True``, keep temporal group coordinates (i.e., ``month_number``, + ``year``) in the result. If ``False``, remove them. **operator_kwargs: Optional keyword arguments for the :class:`iris.analysis.Aggregator` object defined by `operator`. @@ -628,6 +637,11 @@ def monthly_statistics( **agg_kwargs, ) _aggregate_time_fx(result, cube) + + if not keep_group_coordinates: + result.remove_coord("month_number") + result.remove_coord("year") + return result @@ -636,6 +650,7 @@ def seasonal_statistics( cube: Cube, operator: str = "mean", seasons: Iterable[str] = ("DJF", "MAM", "JJA", "SON"), + keep_group_coordinates: bool = True, **operator_kwargs, ) -> Cube: """Compute seasonal statistics. @@ -655,6 +670,9 @@ def seasonal_statistics( and all sequentially correct combinations holding every month of a year: e.g. ('JJAS','ONDJFMAM'), or less in case of prior season extraction. + keep_group_coordinates: + If ``True``, keep temporal group coordinates (i.e., ``clim_season``, + ``season_year``) in the result. If ``False``, remove them. **operator_kwargs: Optional keyword arguments for the :class:`iris.analysis.Aggregator` object defined by `operator`. @@ -667,10 +685,8 @@ def seasonal_statistics( seasons = tuple(sea.upper() for sea in seasons) if any(len(sea) < 2 for sea in seasons): - msg = f"Minimum of 2 month is required per Seasons: {seasons}." - raise ValueError( - msg, - ) + msg = f"Minimum of 2 months is required per season in {seasons}" + raise ValueError(msg) if not cube.coords("clim_season"): iris.coord_categorisation.add_season( @@ -688,9 +704,7 @@ def seasonal_statistics( f"Seasons {seasons} do not match prior season extraction " f"{old_seasons}." ) - raise ValueError( - msg, - ) + raise ValueError(msg) if not cube.coords("season_year"): iris.coord_categorisation.add_season_year( @@ -739,6 +753,11 @@ def spans_full_season(cube: Cube) -> list[bool]: full_seasons = spans_full_season(result) result = result[full_seasons] _aggregate_time_fx(result, cube) + + if not keep_group_coordinates: + result.remove_coord("clim_season") + result.remove_coord("season_year") + return result @@ -746,6 +765,7 @@ def spans_full_season(cube: Cube) -> list[bool]: def annual_statistics( cube: Cube, operator: str = "mean", + keep_group_coordinates: bool = True, **operator_kwargs, ) -> Cube: """Compute annual statistics. @@ -762,6 +782,9 @@ def annual_statistics( The operation. Used to determine the :class:`iris.analysis.Aggregator` object used to calculate the statistics. Allowed options are given in :ref:`this table `. + keep_group_coordinates: + If ``True``, keep temporal group coordinates (i.e., ``year``) in the + result. If ``False``, remove them. **operator_kwargs: Optional keyword arguments for the :class:`iris.analysis.Aggregator` object defined by `operator`. @@ -781,6 +804,10 @@ def annual_statistics( with ignore_iris_vague_metadata_warnings(): result = cube.aggregated_by("year", agg, **agg_kwargs) _aggregate_time_fx(result, cube) + + if not keep_group_coordinates: + result.remove_coord("year") + return result @@ -788,6 +815,7 @@ def annual_statistics( def decadal_statistics( cube: Cube, operator: str = "mean", + keep_group_coordinates: bool = True, **operator_kwargs, ) -> Cube: """Compute decadal statistics. @@ -804,6 +832,9 @@ def decadal_statistics( The operation. Used to determine the :class:`iris.analysis.Aggregator` object used to calculate the statistics. Allowed options are given in :ref:`this table `. + keep_group_coordinates: + If ``True``, keep temporal group coordinates (i.e., ``decade``) in the + result. If ``False``, remove them. **operator_kwargs: Optional keyword arguments for the :class:`iris.analysis.Aggregator` object defined by `operator`. @@ -834,6 +865,10 @@ def get_decade(coord, value): with ignore_iris_vague_metadata_warnings(): result = cube.aggregated_by("decade", agg, **agg_kwargs) _aggregate_time_fx(result, cube) + + if not keep_group_coordinates: + result.remove_coord("decade") + return result @@ -1016,13 +1051,11 @@ def anomalies( reps = cube.shape[tdim] / cube_stddev.shape[tdim] if reps % 1 != 0: msg = ( - "Cannot safely apply preprocessor to this dataset, " - "since the full time period of this dataset is not " - f"a multiple of the period '{period}'" - ) - raise ValueError( - msg, + f"Cannot safely apply preprocessor to this dataset since the " + f"full time period of this dataset is not a multiple of the " + f"period '{period}'" ) + raise ValueError(msg) cube.data = cube.core_data() / da.concatenate( [cube_stddev.core_data() for _ in range(int(reps))], axis=tdim, @@ -1171,9 +1204,7 @@ def regrid_time( f"Setting a fixed calendar is not supported for frequency " f"'{frequency}'" ) - raise NotImplementedError( - msg, - ) + raise NotImplementedError(msg) # Setup new time coordinate new_dates = _get_new_dates(frequency, coord) @@ -1246,9 +1277,7 @@ def _get_new_dates(frequency: str, coord: Coord) -> list[datetime.datetime]: f"For `n`-hourly data, `n` must be a divisor of 24, got " f"'{frequency}'" ) - raise NotImplementedError( - msg, - ) + raise NotImplementedError(msg) half_interval = datetime.timedelta(hours=n_hours / 2.0) dates = [ datetime.datetime( @@ -1376,9 +1405,7 @@ def timeseries_filter( f"Filter type {filter_type} not implemented, " f"please choose one of {', '.join(supported_filters)}" ) - raise NotImplementedError( - msg, - ) + raise NotImplementedError(msg) # Apply filter (agg, agg_kwargs) = get_iris_aggregator(filter_stats, **operator_kwargs) @@ -1439,17 +1466,13 @@ def resample_hours( allowed_intervals = (1, 2, 3, 4, 6, 12) if interval not in allowed_intervals: msg = f"The number of hours must be one of {allowed_intervals}" - raise ValueError( - msg, - ) + raise ValueError(msg) if offset >= interval: msg = ( f"The offset ({offset}) must be lower than " f"the interval ({interval})" ) - raise ValueError( - msg, - ) + raise ValueError(msg) time = cube.coord("time") cube_period = time.cell(1).point - time.cell(0).point if cube_period.total_seconds() / 3600 > interval: @@ -1457,9 +1480,7 @@ def resample_hours( f"Data period ({cube_period}) should be lower than " f"the interval ({interval})" ) - raise ValueError( - msg, - ) + raise ValueError(msg) dates = time.units.num2date(time.points) # Interpolate input time to requested hours if desired @@ -1473,9 +1494,7 @@ def resample_hours( f"Expected `None`, 'nearest' or 'linear' for `interpolate`, " f"got '{interpolate}'" ) - raise ValueError( - msg, - ) + raise ValueError(msg) new_dates = sorted( [ cf_datetime(y, m, d, h, calendar=time.units.calendar) @@ -1497,9 +1516,7 @@ def resample_hours( msg = ( f"Time coordinate {dates} does not contain {hours} for {cube}" ) - raise ValueError( - msg, - ) + raise ValueError(msg) return cube @@ -1556,9 +1573,7 @@ def resample_time( msg = ( f"Time coordinate {dates} does not contain {requested} for {cube}" ) - raise ValueError( - msg, - ) + raise ValueError(msg) return cube @@ -1888,9 +1903,7 @@ def _check_cube_coords(cube): f"Input cube {cube.summary(shorten=True)} needs a dimensional " f"coordinate `time`" ) - raise CoordinateNotFoundError( - msg, - ) + raise CoordinateNotFoundError(msg) time_coord = cube.coord("time", dim_coords=True) # The following works since DimCoords are always 1D and monotonic if time_coord.points[0] > time_coord.points[-1]: @@ -1902,18 +1915,14 @@ def _check_cube_coords(cube): f"Input cube {cube.summary(shorten=True)} needs a coordinate " f"`longitude`" ) - raise CoordinateNotFoundError( - msg, - ) + raise CoordinateNotFoundError(msg) lon_ndim = len(cube.coord_dims("longitude")) if lon_ndim != 1: msg = ( f"Input cube {cube.summary(shorten=True)} needs a 1D coordinate " f"`longitude`, got {lon_ndim:d}D" ) - raise CoordinateMultiDimError( - msg, - ) + raise CoordinateMultiDimError(msg) @preserve_float_dtype diff --git a/tests/unit/preprocessor/_time/test_time.py b/tests/unit/preprocessor/_time/test_time.py index a1c4df0103..befcdf1674 100644 --- a/tests/unit/preprocessor/_time/test_time.py +++ b/tests/unit/preprocessor/_time/test_time.py @@ -9,6 +9,7 @@ import iris import iris.coord_categorisation import iris.coords +import iris.cube import iris.exceptions import iris.fileformats import isodate @@ -664,6 +665,15 @@ def test_get_jf(self): with assert_raises(iris.exceptions.CoordinateNotFoundError): self.cube.coord("season_year") + def test_invalid_month_combination_fail(self): + """Test function for custom seasons.""" + msg = ( + r"Unable to extract Season 'FJ': combination of months not " + r"possible" + ) + with pytest.raises(ValueError, match=msg): + extract_season(self.cube, "FJ") + class TestClimatology(tests.Test): """Test class for :func:`esmvalcore.preprocessor._time.climatology`.""" @@ -988,6 +998,26 @@ def test_season_mean(self): expected = np.array([3.0, 6.0, 9.0]) assert_array_equal(result.data, expected) + assert result.coords("clim_season") + assert result.coords("season_year") + + def test_season_mean_no_keep_group_coordinates(self): + """Test for season average of a 1D field.""" + data = np.arange(12) + times = np.arange(15, 360, 30) + cube = self._create_cube(data, times) + + result = seasonal_statistics( + cube, + "mean", + keep_group_coordinates=False, + ) + expected = np.array([3.0, 6.0, 9.0]) + assert_array_equal(result.data, expected) + + assert not result.coords("clim_season") + assert not result.coords("season_year") + def test_season_median(self): """Test for season median of a 1D field.""" data = np.arange(12) @@ -1110,6 +1140,14 @@ def test_season_not_available(self): with pytest.raises(ValueError, match=re.escape(msg)): seasonal_statistics(cube, "mean") + def test_invalid_season_fail(self): + data = np.arange(12) + times = np.arange(15, 360, 30) + cube = self._create_cube(data, times) + msg = "Minimum of 2 months is required per season in ('J', 'F')" + with pytest.raises(ValueError, match=re.escape(msg)): + seasonal_statistics(cube, "mean", seasons=["J", "F"]) + class TestMonthlyStatistics(tests.Test): """Test :func:`esmvalcore.preprocessor._time.monthly_statistics`.""" @@ -1149,6 +1187,37 @@ def test_mean(self): ) assert_array_equal(result.data, expected) + assert result.coords("month_number") + assert result.coords("year") + + def test_mean_no_keep_group_coordinates(self): + """Test average of a 1D field.""" + data = np.arange(24) + times = np.arange(7, 360, 15) + cube = self._create_cube(data, times) + + result = monthly_statistics(cube, "mean", keep_group_coordinates=False) + expected = np.array( + [ + 0.5, + 2.5, + 4.5, + 6.5, + 8.5, + 10.5, + 12.5, + 14.5, + 16.5, + 18.5, + 20.5, + 22.5, + ], + ) + assert_array_equal(result.data, expected) + + assert not result.coords("month_number") + assert not result.coords("year") + def test_median(self): """Test median of a 1D field.""" data = np.arange(24) @@ -1264,6 +1333,29 @@ def test_mean(self): expected = np.array([0.5, 2.5, 4.5, 6.5]) assert_array_equal(result.data, expected) + assert not result.coords("hour_group") + assert not result.coords("day_of_year") + assert not result.coords("year") + + def test_mean_keep_group_coordinates(self): + """Test average of a 1D field.""" + data = np.arange(8) + times = np.arange(0, 48, 6) + cube = self._create_cube(data, times) + + result = hourly_statistics( + cube, + 12, + "mean", + keep_group_coordinates=True, + ) + expected = np.array([0.5, 2.5, 4.5, 6.5]) + assert_array_equal(result.data, expected) + + assert result.coords("hour_group") + assert result.coords("day_of_year") + assert result.coords("year") + def test_median(self): """Test median of a 1D field.""" data = np.arange(8) @@ -1306,7 +1398,7 @@ def test_sum(self): class TestDailyStatistics(tests.Test): - """Test :func:`esmvalcore.preprocessor._time.monthly_statistics`.""" + """Test :func:`esmvalcore.preprocessor._time.daily_statistics`.""" @staticmethod def _create_cube(data, times): @@ -1328,6 +1420,22 @@ def test_mean(self): expected = np.array([1.5, 5.5]) assert_array_equal(result.data, expected) + assert not result.coords("day_of_year") + assert not result.coords("year") + + def test_mean_no_keep_group_coordinates(self): + """Test average of a 1D field.""" + data = np.arange(8) + times = np.arange(0, 48, 6) + cube = self._create_cube(data, times) + + result = daily_statistics(cube, "mean", keep_group_coordinates=True) + expected = np.array([1.5, 5.5]) + assert_array_equal(result.data, expected) + + assert result.coords("day_of_year") + assert result.coords("year") + def test_median(self): """Test median of a 1D field.""" data = np.arange(8) @@ -1679,36 +1787,48 @@ def make_time_series(number_years=2): return iris.cube.Cube(data, dim_coords_and_dims=[(time, 0)]) +@pytest.mark.parametrize("keep_group_coordinates", [True, False]) @pytest.mark.parametrize("existing_coord", [True, False]) -def test_annual_average(existing_coord): +def test_annual_average(existing_coord, keep_group_coordinates): """Test for annual average.""" cube = make_time_series(number_years=2) if existing_coord: iris.coord_categorisation.add_year(cube, "time") - result = annual_statistics(cube) + result = annual_statistics( + cube, + keep_group_coordinates=keep_group_coordinates, + ) expected = np.array([1.0, 1.0]) assert_array_equal(result.data, expected) expected_time = np.array([180.0, 540.0]) assert_array_equal(result.coord("time").points, expected_time) + assert bool(result.coords("year")) is keep_group_coordinates +@pytest.mark.parametrize("keep_group_coordinates", [True, False]) @pytest.mark.parametrize("existing_coord", [True, False]) -def test_annual_sum(existing_coord): +def test_annual_sum(existing_coord, keep_group_coordinates): """Test for annual sum.""" cube = make_time_series(number_years=2) if existing_coord: iris.coord_categorisation.add_year(cube, "time") - result = annual_statistics(cube, "sum") + result = annual_statistics( + cube, + "sum", + keep_group_coordinates=keep_group_coordinates, + ) expected = np.array([12.0, 12.0]) assert_array_equal(result.data, expected) expected_time = np.array([180.0, 540.0]) assert_array_equal(result.coord("time").points, expected_time) + assert bool(result.coords("year")) is keep_group_coordinates +@pytest.mark.parametrize("keep_group_coordinates", [True, False]) @pytest.mark.parametrize("existing_coord", [True, False]) -def test_decadal_average(existing_coord): +def test_decadal_average(existing_coord, keep_group_coordinates): """Test for decadal average.""" cube = make_time_series(number_years=20) if existing_coord: @@ -1725,15 +1845,23 @@ def get_decade(coord, value): get_decade, ) - result = decadal_statistics(cube) + result = decadal_statistics( + cube, + keep_group_coordinates=keep_group_coordinates, + ) expected = np.array([1.0, 1.0]) assert_array_equal(result.data, expected) expected_time = np.array([1800.0, 5400.0]) assert_array_equal(result.coord("time").points, expected_time) + assert bool(result.coords("decade")) is keep_group_coordinates +@pytest.mark.parametrize("keep_group_coordinates", [True, False]) @pytest.mark.parametrize("existing_coord", [True, False]) -def test_decadal_average_time_dependent_fx(existing_coord): +def test_decadal_average_time_dependent_fx( + existing_coord, + keep_group_coordinates, +): """Test for decadal average.""" cube = make_time_series(number_years=20) measure = iris.coords.CellMeasure( @@ -1764,15 +1892,20 @@ def get_decade(coord, value): "time", get_decade, ) - result = decadal_statistics(cube) + result = decadal_statistics( + cube, + keep_group_coordinates=keep_group_coordinates, + ) assert result.cell_measure("ocean_volume").data.shape == (1,) assert result.ancillary_variable("land_ice_area_fraction").data.shape == ( 1, ) + assert bool(result.coords("decade")) is keep_group_coordinates +@pytest.mark.parametrize("keep_group_coordinates", [True, False]) @pytest.mark.parametrize("existing_coord", [True, False]) -def test_decadal_sum(existing_coord): +def test_decadal_sum(existing_coord, keep_group_coordinates): """Test for decadal average.""" cube = make_time_series(number_years=20) if existing_coord: @@ -1789,11 +1922,16 @@ def get_decade(coord, value): get_decade, ) - result = decadal_statistics(cube, "sum") + result = decadal_statistics( + cube, + "sum", + keep_group_coordinates=keep_group_coordinates, + ) expected = np.array([120.0, 120.0]) assert_array_equal(result.data, expected) expected_time = np.array([1800.0, 5400.0]) assert_array_equal(result.coord("time").points, expected_time) + assert bool(result.coords("decade")) is keep_group_coordinates def make_map_data(number_years=2): @@ -1879,6 +2017,21 @@ def test_standardized_anomalies(period): assert result.units == "1" +def test_standardized_anomalies_invalid_period(): + time_coord = iris.coords.DimCoord( + [15, 100, 380], + standard_name="time", + units="days since 2000-01-01", + ) + cube = iris.cube.Cube([1, 2, 3], dim_coords_and_dims=[(time_coord, 0)]) + msg = ( + r"Cannot safely apply preprocessor to this dataset since the full " + r"time period of this dataset is not a multiple of the period 'month'" + ) + with pytest.raises(ValueError, match=msg): + anomalies(cube, period="month", standardize=True) + + @pytest.mark.parametrize(("period", "reference"), PARAMETERS) def test_anomalies_preserve_metadata(period, reference): """Test that ``anomalies`` preserves metadata."""