diff --git a/esmvalcore/cmor/_fixes/native6/era5.py b/esmvalcore/cmor/_fixes/native6/era5.py index 6d8f1bb7a5..26f2bc9e66 100644 --- a/esmvalcore/cmor/_fixes/native6/era5.py +++ b/esmvalcore/cmor/_fixes/native6/era5.py @@ -3,7 +3,6 @@ import datetime import logging -import iris import numpy as np from iris.cube import CubeList from iris.util import reverse @@ -20,60 +19,51 @@ logger = logging.getLogger(__name__) -def get_frequency(cube): - """Determine time frequency of input cube.""" - try: - time = cube.coord(axis="T") - except iris.exceptions.CoordinateNotFoundError: - return "fx" - - time.convert_units("days since 1850-1-1 00:00:00.0") - if len(time.points) == 1: - acceptable_long_names = ( - "Geopotential", - "Percentage of the Grid Cell Occupied by Land (Including Lakes)", - ) - if cube.long_name not in acceptable_long_names: - msg = ( - "Unable to infer frequency of cube " - f"with length 1 time dimension: {cube}" - ) - raise ValueError( - msg, - ) - return "fx" - - interval = time.points[1] - time.points[0] - - if interval - 1 / 24 < 1e-4: - return "hourly" - if interval - 1.0 < 1e-4: - return "daily" - return "monthly" - - -def fix_hourly_time_coordinate(cube): +def fix_hourly_time_coordinate(cube, frequency): """Shift aggregated variables 30 minutes back in time.""" - if get_frequency(cube) == "hourly": + # While the frequency for aggregated variables is "1hr", the most common frequency + # in the CMIP6 E1hr table is "1hrPt" and in the E1hrClimMon table is "1hrCM". + # We could set the frequency to "1hr" using the extra_facets_native6.yml configuration + # file, but this would be backward incompatible for users who have already + # stored the data under a directory with the name 1hrPt. Therefore, apply + # this fix to any frequency starting with "1hr". + # + # Note that comparing instantaneous variables from CMIP6 to averaged + # variables from ERA5 may lead to some differences. + if frequency.startswith("1hr"): time = cube.coord(axis="T") - time.points = time.points - 1 / 48 + if str(time.units).startswith("hours since"): + shift = 0.5 + elif str(time.units).startswith("days since"): + shift = 1.0 / 48.0 + else: + msg = f"Unexpected time units {time.units} encountered for ERA5 data." + raise ValueError(msg) + time.points = time.points - shift return cube -def fix_accumulated_units(cube): +def fix_accumulated_units(cube, frequency): """Convert accumulations to fluxes.""" - if get_frequency(cube) == "monthly": + # While the frequency for aggregated variables is "1hr", the most common frequency + # in the CMIP6 E1hr table is "1hrPt" and in the E1hrClimMon table is "1hrCM". + # We could set the frequency to "1hr" using the extra_facets_native6.yml configuration + # file, but this would be backward incompatible for users who have already + # stored the data under a directory with the name 1hrPt. Therefore, apply + # this fix to any frequency starting with "1hr". + # + # Note that comparing instantaneous variables from CMIP6 to averaged + # variables from ERA5 may lead to some differences. + if frequency == "mon": cube.units = cube.units * "d-1" - elif get_frequency(cube) == "hourly": + elif frequency.startswith("1hr"): cube.units = cube.units * "h-1" - elif get_frequency(cube) == "daily": + elif frequency == "day": msg = ( f"Fixing of accumulated units of cube " f"{cube.summary(shorten=True)} is not implemented for daily data" ) - raise NotImplementedError( - msg, - ) + raise NotImplementedError(msg) return cube @@ -163,8 +153,8 @@ def fix_metadata(self, cubes): for cube in cubes: # Set input cube units for invalid units were ignored on load cube.units = "m" - fix_hourly_time_coordinate(cube) - fix_accumulated_units(cube) + fix_hourly_time_coordinate(cube, self.frequency) + fix_accumulated_units(cube, self.frequency) multiply_with_density(cube) # Correct sign to align with CMOR standards cube.data = cube.core_data() * -1.0 @@ -180,8 +170,8 @@ def fix_metadata(self, cubes): for cube in cubes: # Set input cube units for invalid units were ignored on load cube.units = "m" - fix_hourly_time_coordinate(cube) - fix_accumulated_units(cube) + fix_hourly_time_coordinate(cube, self.frequency) + fix_accumulated_units(cube, self.frequency) multiply_with_density(cube) # Correct sign to align with CMOR standards cube.data = cube.core_data() * -1.0 @@ -205,8 +195,8 @@ class Mrro(Fix): def fix_metadata(self, cubes): """Fix metadata.""" for cube in cubes: - fix_hourly_time_coordinate(cube) - fix_accumulated_units(cube) + fix_hourly_time_coordinate(cube, self.frequency) + fix_accumulated_units(cube, self.frequency) multiply_with_density(cube) return cubes @@ -246,8 +236,8 @@ class Pr(Fix): def fix_metadata(self, cubes): """Fix metadata.""" for cube in cubes: - fix_hourly_time_coordinate(cube) - fix_accumulated_units(cube) + fix_hourly_time_coordinate(cube, self.frequency) + fix_accumulated_units(cube, self.frequency) multiply_with_density(cube) return cubes @@ -265,8 +255,8 @@ def fix_metadata(self, cubes): for cube in cubes: # Set input cube units for invalid units were ignored on load cube.units = "m" - fix_hourly_time_coordinate(cube) - fix_accumulated_units(cube) + fix_hourly_time_coordinate(cube, self.frequency) + fix_accumulated_units(cube, self.frequency) multiply_with_density(cube) return cubes @@ -319,8 +309,8 @@ class Rlds(Fix): def fix_metadata(self, cubes): """Fix metadata.""" for cube in cubes: - fix_hourly_time_coordinate(cube) - fix_accumulated_units(cube) + fix_hourly_time_coordinate(cube, self.frequency) + fix_accumulated_units(cube, self.frequency) cube.attributes["positive"] = "down" return cubes @@ -332,8 +322,8 @@ class Rlns(Fix): def fix_metadata(self, cubes): """Fix metadata.""" for cube in cubes: - fix_hourly_time_coordinate(cube) - fix_accumulated_units(cube) + fix_hourly_time_coordinate(cube, self.frequency) + fix_accumulated_units(cube, self.frequency) cube.attributes["positive"] = "down" return cubes @@ -345,7 +335,7 @@ class Rls(Fix): def fix_metadata(self, cubes): """Fix metadata.""" for cube in cubes: - fix_hourly_time_coordinate(cube) + fix_hourly_time_coordinate(cube, self.frequency) cube.attributes["positive"] = "down" return cubes @@ -357,8 +347,8 @@ class Rlus(Fix): def fix_metadata(self, cubes): """Fix metadata.""" for cube in cubes: - fix_hourly_time_coordinate(cube) - fix_accumulated_units(cube) + fix_hourly_time_coordinate(cube, self.frequency) + fix_accumulated_units(cube, self.frequency) cube.attributes["positive"] = "up" return cubes @@ -396,8 +386,8 @@ class Rsds(Fix): def fix_metadata(self, cubes): """Fix metadata.""" for cube in cubes: - fix_hourly_time_coordinate(cube) - fix_accumulated_units(cube) + fix_hourly_time_coordinate(cube, self.frequency) + fix_accumulated_units(cube, self.frequency) cube.attributes["positive"] = "down" return cubes @@ -409,8 +399,8 @@ class Rsns(Fix): def fix_metadata(self, cubes): """Fix metadata.""" for cube in cubes: - fix_hourly_time_coordinate(cube) - fix_accumulated_units(cube) + fix_hourly_time_coordinate(cube, self.frequency) + fix_accumulated_units(cube, self.frequency) cube.attributes["positive"] = "down" return cubes @@ -422,8 +412,8 @@ class Rsus(Fix): def fix_metadata(self, cubes): """Fix metadata.""" for cube in cubes: - fix_hourly_time_coordinate(cube) - fix_accumulated_units(cube) + fix_hourly_time_coordinate(cube, self.frequency) + fix_accumulated_units(cube, self.frequency) cube.attributes["positive"] = "up" return cubes @@ -435,8 +425,8 @@ class Rsdt(Fix): def fix_metadata(self, cubes): """Fix metadata.""" for cube in cubes: - fix_hourly_time_coordinate(cube) - fix_accumulated_units(cube) + fix_hourly_time_coordinate(cube, self.frequency) + fix_accumulated_units(cube, self.frequency) cube.attributes["positive"] = "down" return cubes @@ -448,8 +438,8 @@ class Rss(Fix): def fix_metadata(self, cubes): """Fix metadata.""" for cube in cubes: - fix_hourly_time_coordinate(cube) - fix_accumulated_units(cube) + fix_hourly_time_coordinate(cube, self.frequency) + fix_accumulated_units(cube, self.frequency) cube.attributes["positive"] = "down" return cubes @@ -482,7 +472,7 @@ class Tasmax(Fix): def fix_metadata(self, cubes): """Fix metadata.""" for cube in cubes: - fix_hourly_time_coordinate(cube) + fix_hourly_time_coordinate(cube, self.frequency) return cubes @@ -492,7 +482,7 @@ class Tasmin(Fix): def fix_metadata(self, cubes): """Fix metadata.""" for cube in cubes: - fix_hourly_time_coordinate(cube) + fix_hourly_time_coordinate(cube, self.frequency) return cubes @@ -567,7 +557,7 @@ def _fix_coordinates( # noqa: C901 ): coord.guess_bounds() - self._fix_monthly_time_coord(cube) + self._fix_monthly_time_coord(cube, self.frequency) # Fix coordinate increasing direction if cube.coords("latitude") and not has_unstructured_grid(cube): @@ -582,9 +572,9 @@ def _fix_coordinates( # noqa: C901 return cube @staticmethod - def _fix_monthly_time_coord(cube): + def _fix_monthly_time_coord(cube, frequency): """Set the monthly time coordinates to the middle of the month.""" - if get_frequency(cube) == "monthly": + if frequency in ("monthly", "mon"): coord = cube.coord(axis="T") end = [] for cell in coord.cells(): diff --git a/tests/integration/cmor/_fixes/native6/test_era5.py b/tests/integration/cmor/_fixes/native6/test_era5.py index 82c380383c..b183bd811b 100644 --- a/tests/integration/cmor/_fixes/native6/test_era5.py +++ b/tests/integration/cmor/_fixes/native6/test_era5.py @@ -15,10 +15,10 @@ Evspsbl, Zg, fix_accumulated_units, - get_frequency, ) from esmvalcore.cmor.fix import fix_metadata from esmvalcore.cmor.table import CMOR_TABLES, get_var_info +from esmvalcore.dataset import Dataset from esmvalcore.preprocessor import cmor_check_metadata COMMENT = ( @@ -41,85 +41,6 @@ def test_get_zg_fix(): assert fix == [Zg(vardef), AllVars(vardef), GenericFix(vardef)] -def test_get_frequency_hourly(): - """Test cubes with hourly frequency.""" - time = DimCoord( - [0, 1, 2], - standard_name="time", - units=Unit("hours since 1900-01-01"), - ) - cube = Cube( - [1, 6, 3], - var_name="random_var", - dim_coords_and_dims=[(time, 0)], - ) - assert get_frequency(cube) == "hourly" - cube.coord("time").convert_units("days since 1850-1-1 00:00:00.0") - assert get_frequency(cube) == "hourly" - - -def test_get_frequency_daily(): - """Test cubes with daily frequency.""" - time = DimCoord( - [0, 1, 2], - standard_name="time", - units=Unit("days since 1900-01-01"), - ) - cube = Cube( - [1, 6, 3], - var_name="random_var", - dim_coords_and_dims=[(time, 0)], - ) - assert get_frequency(cube) == "daily" - cube.coord("time").convert_units("hours since 1850-1-1 00:00:00.0") - assert get_frequency(cube) == "daily" - - -def test_get_frequency_monthly(): - """Test cubes with monthly frequency.""" - time = DimCoord( - [0, 31, 59], - standard_name="time", - units=Unit("hours since 1900-01-01"), - ) - cube = Cube( - [1, 6, 3], - var_name="random_var", - dim_coords_and_dims=[(time, 0)], - ) - assert get_frequency(cube) == "monthly" - cube.coord("time").convert_units("days since 1850-1-1 00:00:00.0") - assert get_frequency(cube) == "monthly" - - -def test_get_frequency_fx(): - """Test cubes with time invariant frequency.""" - cube = Cube(1.0, long_name="Cube without time coordinate") - assert get_frequency(cube) == "fx" - - time = DimCoord( - 0, - standard_name="time", - units=Unit("hours since 1900-01-01"), - ) - cube = Cube( - [1], - var_name="cube_with_length_1_time_coord", - long_name="Geopotential", - dim_coords_and_dims=[(time, 0)], - ) - assert get_frequency(cube) == "fx" - - cube.long_name = ( - "Percentage of the Grid Cell Occupied by Land (Including Lakes)" - ) - assert get_frequency(cube) == "fx" - - cube.long_name = "Not geopotential" - with pytest.raises(ValueError): - get_frequency(cube) - - def test_fix_accumulated_units_fail(): """Test `fix_accumulated_units`.""" time = DimCoord( @@ -133,7 +54,7 @@ def test_fix_accumulated_units_fail(): dim_coords_and_dims=[(time, 0)], ) with pytest.raises(NotImplementedError): - fix_accumulated_units(cube) + fix_accumulated_units(cube, "day") def _era5_latitude(): @@ -162,9 +83,9 @@ def _era5_time(frequency): timestamps = [788928] # hours since 1900 at 1 january 1990 elif frequency == "daily": timestamps = [788940, 788964, 788988] - elif frequency == "hourly": + elif frequency == "1hr": timestamps = [788928, 788929, 788930] - elif frequency == "monthly": + elif frequency in ("monthly", "mon"): timestamps = [788928, 789672, 790344] else: msg = f"Invalid frequency {frequency}" @@ -292,7 +213,7 @@ def _cmor_data(mip): def era5_2d(frequency): - if frequency == "monthly": + if frequency in ("monthly", "mon"): time = DimCoord( [-31, 0, 31], standard_name="time", @@ -422,9 +343,9 @@ def cl_cmor_amon(): def clt_era5_hourly(): - time = _era5_time("hourly") + time = _era5_time("1hr") cube = Cube( - _era5_data("hourly"), + _era5_data("1hr"), long_name="cloud cover fraction", var_name="cloud_cover", units="unknown", @@ -459,9 +380,9 @@ def clt_cmor_e1hr(): def evspsbl_era5_hourly(): - time = _era5_time("hourly") + time = _era5_time("1hr") cube = Cube( - _era5_data("hourly") * -1.0, + _era5_data("1hr") * -1.0, long_name="total evapotranspiration", var_name="e", units="unknown", @@ -496,9 +417,9 @@ def evspsbl_cmor_e1hr(): def evspsblpot_era5_hourly(): - time = _era5_time("hourly") + time = _era5_time("1hr") cube = Cube( - _era5_data("hourly") * -1.0, + _era5_data("1hr") * -1.0, long_name="potential evapotranspiration", var_name="epot", units="unknown", @@ -533,9 +454,9 @@ def evspsblpot_cmor_e1hr(): def mrro_era5_hourly(): - time = _era5_time("hourly") + time = _era5_time("1hr") cube = Cube( - _era5_data("hourly"), + _era5_data("1hr"), long_name="runoff", var_name="runoff", units="m", @@ -647,9 +568,9 @@ def pr_cmor_amon(): def pr_era5_hourly(): - time = _era5_time("hourly") + time = _era5_time("1hr") cube = Cube( - _era5_data("hourly"), + _era5_data("1hr"), long_name="total_precipitation", var_name="tp", units="m", @@ -684,9 +605,9 @@ def pr_cmor_e1hr(): def prsn_era5_hourly(): - time = _era5_time("hourly") + time = _era5_time("1hr") cube = Cube( - _era5_data("hourly"), + _era5_data("1hr"), long_name="snow", var_name="snow", units="unknown", @@ -721,9 +642,9 @@ def prsn_cmor_e1hr(): def ptype_era5_hourly(): - time = _era5_time("hourly") + time = _era5_time("1hr") cube = Cube( - _era5_data("hourly"), + _era5_data("1hr"), long_name="snow", var_name="snow", units="unknown", @@ -759,9 +680,9 @@ def ptype_cmor_e1hr(): def rlds_era5_hourly(): - time = _era5_time("hourly") + time = _era5_time("1hr") cube = Cube( - _era5_data("hourly"), + _era5_data("1hr"), long_name="surface thermal radiation downwards", var_name="ssrd", units="J m**-2", @@ -796,7 +717,7 @@ def rlds_cmor_e1hr(): def rlns_era5_hourly(): - freq = "hourly" + freq = "1hr" cube = Cube( _era5_data(freq), long_name=None, @@ -837,7 +758,7 @@ def rlns_cmor_e1hr(): def rlus_era5_hourly(): - freq = "hourly" + freq = "1hr" cube = Cube( _era5_data(freq), long_name=None, @@ -954,9 +875,9 @@ def rlutcs_cmor_amon(): def rls_era5_hourly(): - time = _era5_time("hourly") + time = _era5_time("1hr") cube = Cube( - _era5_data("hourly"), + _era5_data("1hr"), long_name="runoff", var_name="runoff", units="W m-2", @@ -991,9 +912,9 @@ def rls_cmor_e1hr(): def rsds_era5_hourly(): - time = _era5_time("hourly") + time = _era5_time("1hr") cube = Cube( - _era5_data("hourly"), + _era5_data("1hr"), long_name="solar_radiation_downwards", var_name="rlwd", units="J m**-2", @@ -1028,7 +949,7 @@ def rsds_cmor_e1hr(): def rsns_era5_hourly(): - freq = "hourly" + freq = "1hr" cube = Cube( _era5_data(freq), long_name=None, @@ -1069,7 +990,7 @@ def rsns_cmor_e1hr(): def rsus_era5_hourly(): - freq = "hourly" + freq = "1hr" cube = Cube( _era5_data(freq), long_name=None, @@ -1108,9 +1029,9 @@ def rsus_cmor_e1hr(): def rsdt_era5_hourly(): - time = _era5_time("hourly") + time = _era5_time("1hr") cube = Cube( - _era5_data("hourly"), + _era5_data("1hr"), long_name="thermal_radiation_downwards", var_name="strd", units="J m**-2", @@ -1145,9 +1066,9 @@ def rsdt_cmor_e1hr(): def rss_era5_hourly(): - time = _era5_time("hourly") + time = _era5_time("1hr") cube = Cube( - _era5_data("hourly"), + _era5_data("1hr"), long_name="net_solar_radiation", var_name="ssr", units="J m**-2", @@ -1211,9 +1132,9 @@ def sftlf_cmor_fx(): def tas_era5_hourly(): - time = _era5_time("hourly") + time = _era5_time("1hr") cube = Cube( - _era5_data("hourly"), + _era5_data("1hr"), long_name="2m_temperature", var_name="t2m", units="K", @@ -1335,9 +1256,9 @@ def zg_cmor_amon(): def tasmax_era5_hourly(): - time = _era5_time("hourly") + time = _era5_time("1hr") cube = Cube( - _era5_data("hourly"), + _era5_data("1hr"), long_name="maximum 2m temperature", var_name="mx2t", units="K", @@ -1373,9 +1294,9 @@ def tasmax_cmor_e1hr(): def tasmin_era5_hourly(): - time = _era5_time("hourly") + time = _era5_time("1hr") cube = Cube( - _era5_data("hourly"), + _era5_data("1hr"), long_name="minimum 2m temperature", var_name="mn2t", units="K", @@ -1411,9 +1332,9 @@ def tasmin_cmor_e1hr(): def uas_era5_hourly(): - time = _era5_time("hourly") + time = _era5_time("1hr") cube = Cube( - _era5_data("hourly"), + _era5_data("1hr"), long_name="10m_u_component_of_wind", var_name="u10", units="m s-1", @@ -1449,9 +1370,9 @@ def uas_cmor_e1hr(): def vas_era5_hourly(): - time = _era5_time("hourly") + time = _era5_time("1hr") cube = Cube( - _era5_data("hourly"), + _era5_data("1hr"), long_name="10m_v_component_of_wind", var_name="v10", units="m s-1", @@ -1510,6 +1431,7 @@ def vas_cmor_e1hr(): (prsn_era5_hourly(), prsn_cmor_e1hr(), "prsn", "E1hr"), (era5_2d("monthly"), cmor_2d("Amon", "prw"), "prw", "Amon"), (era5_2d("monthly"), cmor_2d("Amon", "ps"), "ps", "Amon"), + (era5_2d("mon"), cmor_2d("Amon", "ps"), "ps", "Amon"), (ptype_era5_hourly(), ptype_cmor_e1hr(), "ptype", "E1hr"), ( era5_3d("monthly"), @@ -1550,14 +1472,30 @@ def vas_cmor_e1hr(): @pytest.mark.parametrize(("era5_cubes", "cmor_cubes", "var", "mip"), VARIABLES) def test_cmorization(era5_cubes, cmor_cubes, var, mip): """Verify that cmorization results in the expected target cube.""" - fixed_cubes = fix_metadata(era5_cubes, var, "native6", "era5", mip) + dataset = Dataset( + short_name=var, + mip=mip, + project="native6", + dataset="ERA5", + ) + dataset.augment_facets() + + # Call `fix_metadata` and `cmor_check_metadata` with the same arguments as + # in `esmvalcore.dataset.Dataset.load`. + fixed_cubes = fix_metadata(era5_cubes, **dataset.facets) assert len(fixed_cubes) == 1 fixed_cube = fixed_cubes[0] cmor_cube = cmor_cubes[0] # Test that CMOR checks are passing - fixed_cubes = cmor_check_metadata(fixed_cube, "native6", mip, var) + fixed_cubes = cmor_check_metadata( + fixed_cube, + cmor_table="native6", + mip=mip, + short_name=var, + frequency=dataset["frequency"], + ) if fixed_cube.coords("time"): for cube in [fixed_cube, cmor_cube]: