diff --git a/doc/sphinx/source/input.rst b/doc/sphinx/source/input.rst index 4cb27bb4f4..8dbdea7e32 100644 --- a/doc/sphinx/source/input.rst +++ b/doc/sphinx/source/input.rst @@ -336,6 +336,8 @@ A list of the datasets for which a CMORizers is available is provided in the fol +------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+ | ESACCI-SEAICE | siconc (SIday, SImon) | 2 | Python | +------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+ +| ESACCI-PERMAFROST | alt, gtd, pfr (Lyr, frequency: yr) | 2 | Python | ++------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+ | ESACCI-SEA-SURFACE-SALINITY | sos (Omon) | 2 | Python | +------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+ | ESACCI-SOILMOISTURE | sm (Eday, Lmon), smStderr (Eday) | 2 | Python | diff --git a/esmvaltool/cmorizers/data/cmor_config/ESACCI-PERMAFROST.yml b/esmvaltool/cmorizers/data/cmor_config/ESACCI-PERMAFROST.yml new file mode 100644 index 0000000000..337dc93cce --- /dev/null +++ b/esmvaltool/cmorizers/data/cmor_config/ESACCI-PERMAFROST.yml @@ -0,0 +1,29 @@ +--- +# Common global attributes for CMORizer output +attributes: + dataset_id: ESACCI-PERMAFROST + version: 'v3.0' + tier: 2 + modeling_realm: sat + project_id: OBS6 + source: 'ftp://anon-ftp.ceda.ac.uk/neodc/esacci/permafrost' + reference: "esacci-permafrost" + comment: "" + +# Variables to CMORize +variables: + alt: + mip: Lyr + raw: ALT + file: 'ESACCI-PERMAFROST-L4-ALT-*-AREA4_PP-{year}-fv03.0.nc' + weights_dir: '/work/bd0854/b380103/esacci-permafrost-weights' + gtd: + mip: Lyr + raw: [GST, T1m, T2m, T5m, T10m] + file: 'ESACCI-PERMAFROST-L4-GTD-*-AREA4_PP-{year}-fv03.0.nc' + weights_dir: '/work/bd0854/b380103/esacci-permafrost-weights' + pfr: + mip: Lyr + raw: PFR + file: 'ESACCI-PERMAFROST-L4-PFR-*-AREA4_PP-{year}-fv03.0.nc' + weights_dir: '/work/bd0854/b380103/esacci-permafrost-weights' diff --git a/esmvaltool/cmorizers/data/datasets.yml b/esmvaltool/cmorizers/data/datasets.yml index 96462c9d27..5f818ea671 100644 --- a/esmvaltool/cmorizers/data/datasets.yml +++ b/esmvaltool/cmorizers/data/datasets.yml @@ -551,6 +551,17 @@ datasets: Version = "v0008" Put all files under a single directory (no subdirectories with years). + ESACCI-PERMAFROST: + tier: 2 + source: ftp://anon-ftp.ceda.ac.uk/neodc/esacci/permafrost/data + last_access: 2024-02-27 + info: | + Download the data from: + active_layer_thickness/L4/area4/pp/v03.0 + ground_temperature/L4/area4/pp/v03.0 + permafrost_extent/L4/area4/pp/v03.0 + Put all files in a single directory. + ESACCI-SEAICE: tier: 2 source: ftp://anon-ftp.ceda.ac.uk/neodc/esacci/sea_ice/data/ diff --git a/esmvaltool/cmorizers/data/downloaders/datasets/esacci_permafrost.py b/esmvaltool/cmorizers/data/downloaders/datasets/esacci_permafrost.py new file mode 100644 index 0000000000..02cc4f553f --- /dev/null +++ b/esmvaltool/cmorizers/data/downloaders/datasets/esacci_permafrost.py @@ -0,0 +1,75 @@ +"""Script to download ESACCI-PERMAFROST.""" + +import logging +from datetime import datetime + +from dateutil import relativedelta + +from esmvaltool.cmorizers.data.downloaders.ftp import CCIDownloader + +logger = logging.getLogger(__name__) + + +def download_dataset( + config: dict, + dataset: str, + dataset_info: dict, + start_date: datetime, + end_date: datetime, + overwrite: bool, +) -> None: + """Download dataset. + + Parameters + ---------- + config : dict + ESMValTool's user configuration + dataset : str + Name of the dataset + dataset_info : dict + Dataset information from the datasets.yml file + start_date : datetime + Start of the interval to download + end_date : datetime + End of the interval to download + overwrite : bool + Overwrite already downloaded files + """ + if start_date is None: + start_date = datetime(1997, 1, 1, tzinfo=datetime.timezone.utc) + if end_date is None: + end_date = datetime(2019, 12, 31, tzinfo=datetime.timezone.utc) + + downloader = CCIDownloader( + config=config, + dataset=dataset, + dataset_info=dataset_info, + overwrite=overwrite, + ) + + downloader.connect() + + version = "v03.0" + + ccivars = [ + "active_layer_thickness", + "ground_temperature", + "permafrost_extent", + ] + + # download active layer thickness + loop_date = start_date + while loop_date <= end_date: + for var in ccivars: + pathname = f"{var}/L4/area4/pp/{version}/" + fname = f"ESACCI-PERMAFROST-L4-*-{loop_date.year}-f{version}.nc" + if downloader.file_exists(fname, pathname): + downloader.download_files(fname, pathname) + else: + logger.info( + "%d: no data for %s %s", + loop_date.year, + var, + version, + ) + loop_date += relativedelta.relativedelta(years=1) diff --git a/esmvaltool/cmorizers/data/downloaders/ftp.py b/esmvaltool/cmorizers/data/downloaders/ftp.py index 1e3607f5e5..58ad692d6f 100644 --- a/esmvaltool/cmorizers/data/downloaders/ftp.py +++ b/esmvaltool/cmorizers/data/downloaders/ftp.py @@ -1,5 +1,6 @@ """Downloader for FTP repositories.""" +import fnmatch import ftplib import logging import os @@ -228,6 +229,26 @@ def dataset_name(self): """ return self.dataset.lower().replace("-", "_") + def download_files(self, filename, path=None): + """Download file(s). + + Parameters: + ----------- + filename : str + Name of file (w/o path) to download (wildcards are allowed) + path : str + Path of file(s) to download (optional) + """ + if path is not None: + self.set_cwd(path) + files = self._client.nlst() + matching_files = fnmatch.filter(files, filename) + if len(matching_files) != 0: + for file in matching_files: + super().download_file(file) + else: + logger.info('No files %s found.', filename) + def download_year(self, year): """Download a specific year. @@ -237,3 +258,21 @@ def download_year(self, year): Year to download """ self.download_folder(str(year)) + + def file_exists(self, filename, path=None): + """Check if a file exists. + + Parameters: + ----------- + filename : str + Name of file (w/o path) to check (wildcards are allowed) + path : str + Path of file to check (optional) + """ + if path is not None: + self.set_cwd(path) + files = self._client.nlst() + matching_files = fnmatch.filter(files, filename) + if len(matching_files) != 0: + return True + return False diff --git a/esmvaltool/cmorizers/data/formatters/datasets/esacci_permafrost.py b/esmvaltool/cmorizers/data/formatters/datasets/esacci_permafrost.py new file mode 100644 index 0000000000..96d88f43cc --- /dev/null +++ b/esmvaltool/cmorizers/data/formatters/datasets/esacci_permafrost.py @@ -0,0 +1,353 @@ +"""ESMValTool CMORizer for ESACCI-PERMAFROST data. + +Tier + Tier 2: other freely-available dataset. + +Source + ftp://anon-ftp.ceda.ac.uk/neodc/esacci/permafrost/data + +Last access + 20240227 + +Download and processing instructions + Download the data from: + active_layer_thickness/L4/area4/pp/v03.0 + ground_temperature/L4/area4/pp/v03.0 + permafrost_extent/L4/area4/pp/v03.0 + Put all files in a single directory. +""" + +import glob +import logging +import os +import os.path +from copy import deepcopy +from datetime import datetime + +import iris +import numpy as np +from cdo import Cdo +from dateutil import relativedelta +from esmvalcore.cmor.table import CMOR_TABLES +from netCDF4 import Dataset + +from esmvaltool.cmorizers.data.utilities import save_variable, set_global_atts + +logger = logging.getLogger(__name__) + + +def _fix_coordinates(cube: iris.cube.Cube, definition): + """Fix coordinates.""" + axis2def = {"T": "time", "X": "longitude", "Y": "latitude", "Z": "sdepth"} + axes = ["T", "X", "Y", "Z"] + + for axis in axes: + coord_def = definition.coordinates.get(axis2def[axis]) + if coord_def: + coord = cube.coord(axis=axis) + if axis == "T": + coord.convert_units("days since 1850-1-1 00:00:00.0") + coord.standard_name = coord_def.standard_name + coord.var_name = coord_def.out_name + coord.long_name = coord_def.long_name + coord.points = coord.core_points().astype("float64") + if len(coord.points) > 1: + if coord.bounds is not None: + coord.bounds = None + coord.guess_bounds() + + return cube + + +def _regrid_infile(infile, outfile, weightsfile): + """Regrid infile to 0.5 deg x 0.5 deg grid using cdo.""" + cdo = Cdo() + # ESACCI-PERMAFROST v3.0 dimensions of raw input data + xsize = 14762 + ysize = 10353 + totalsize = xsize * ysize + + # description of ESACCI-PERMAFROST v3.0 polar stereographic + # grid for cdo + esagrid = ( + f"gridtype = projection\n" + f"gridsize = {totalsize}\n" + f"xsize = {xsize}\n" + f"ysize = {ysize}\n" + f"xname = x\n" + f'xlongname = "x coordinate of projection"\n' + f'xunits = "m"\n' + f"yname = y\n" + f'ylongname = "y coordinate of projection"\n' + f'yunits = "m"\n' + f"xfirst = -6111475.22239475\n" + f"xinc = 926.625433138333\n" + f"yfirst = 4114895.09469662\n" + f"yinc = -926.625433138333\n" + f"grid_mapping = polar_stereographic\n" + f"grid_mapping_name = polar_stereographic\n" + f"straight_vertical_longitude_from_pole = 0.\n" + f"false_easting = 0.\n" + f"false_northing = 0.\n" + f"latitude_of_projection_origin = 90.\n" + f"standard_parallel = 71.\n" + f"longitude_of_prime_meridian = 0.\n" + f"semi_major_axis = 6378137.\n" + f"inverse_flattening = 298.257223563\n" + f'proj_params = "+proj=stere +lat_0=90 +lat_ts=71 +lon_0=0' + f' +k=1" +x_0=0 +y_0=0 +datum=WGS84 +units=m +no_defs"\n' + ) + + esagrid_file = "./esacci_grid.txt" + + # write grid description to ASCII file + with open(esagrid_file, "w", encoding="ascii") as file: + file.write(esagrid) + file.close() + + # define dimensions of target grid (regular lat-lon grid) + target_dimx = 720 # delta_lon = 0.5 deg + target_dimy = 360 # delta_lat = 0.5 deg + target_grid = f"r{target_dimx}x{target_dimy}" + + # check if suitable weights file already exists + # (e.g. from previous call to _regrid_file) + + weightsfile_ok = False + + if os.path.isfile(weightsfile): + weights = Dataset(weightsfile, "r") + # make sure dimensions of source and target grids match + # expected values + src = weights.variables["src_grid_dims"] + dst = weights.variables["dst_grid_dims"] + if ( + xsize == src[0] + and ysize == src[1] + and target_dimx == dst[0] + and target_dimy == dst[1] + ): + logger.info( + "Using matching weights file %s for regridding.", + weightsfile, + ) + weightsfile_ok = True + weights.close() + + # if no suitable weights file, generate new weights for regridding + + if not weightsfile_ok: + logger.info( + "Generating regridding weights. This will take" + " about 5-10 minutes (or more)...", + ) + # check if path for weight files exists, if not create folder + path = os.path.split(weightsfile)[0] + if not os.path.exists(path): + os.makedirs(path) + # generate weights + cdo.genbil( + f"{target_grid} -setgrid,{esagrid_file}", + input=infile, + output=weightsfile, + options="-f nc", + ) + + # now regrid data to 0.5 deg x 0.5 deg + cdo.remap( + f"{target_grid},{weightsfile} -setgrid,{esagrid_file}", + input=infile, + output=outfile, + options="-f nc", + ) + + # delete temporary file + os.remove(esagrid_file) + + +def _extract_variable(in_file, var, cfg, out_dir, year): + logger.info( + "CMORizing variable '%s' from input file '%s'", + var["short_name"], + in_file, + ) + attributes = deepcopy(cfg["attributes"]) + attributes["mip"] = var["mip"] + attributes["raw"] = var["raw"] + cmor_table = CMOR_TABLES[attributes["project_id"]] + definition = cmor_table.get_variable(var["mip"], var["short_name"]) + + if "weights_dir" in var.keys(): + weights_dir = var["weights_dir"] + else: + weights_dir = "." + + # regrid input file using cdo + # (using the preprocessor (ESMF) is too slow) + + regridded_file = f"./{year}_{var['short_name']}.nc" + weights_file = f"{weights_dir}/{year}_{var['short_name']}_weights.nc" + _regrid_infile(in_file, regridded_file, weights_file) + + # load input file + cubes = iris.load(regridded_file) + + if len(cubes) > 1: + # variable gtd contains the vertical levels as separate variables + # (depth level can only be recognized by the variable names) + # --> combine all depth levels into 1 cube + for cube in cubes: + if cube.var_name == "GST": + sdepth = 0.0 + elif cube.var_name == "T1m": + sdepth = 1.0 + elif cube.var_name == "T2m": + sdepth = 2.0 + elif cube.var_name == "T5m": + sdepth = 5.0 + elif cube.var_name == "T10m": + sdepth = 10.0 + else: + sdepth = 999.0 + logger.info("Could not determin depth. Check results.") + cube.add_aux_coord( + iris.coords.AuxCoord( + sdepth, + standard_name="depth", + long_name="depth", + units="m", + ), + ) + cube.var_name = "gst" + cube.standard_name = "soil_temperature" # "valid" standard name + cube.attributes.pop("actual_min") + cube.attributes.pop("actual_max") + tmp_cube = cubes.merge_cube() + # setting the attribute 'positive' is needed for Iris to recognize + # this coordinate as 'Z' axis + tmp_cube.coord("depth").attributes["positive"] = "down" + # swap coordinates 'depth' and 'time': + # (depth, time, lat, lon) --> (time, depth, lat, lon) + flipped_data = np.swapaxes(tmp_cube.core_data(), 1, 0) + coord_spec = [ + (tmp_cube.coord("time"), 0), + (tmp_cube.coord("depth"), 1), + (tmp_cube.coord("latitude"), 2), + (tmp_cube.coord("longitude"), 3), + ] + cube = iris.cube.Cube(flipped_data, dim_coords_and_dims=coord_spec) + cube.metadata = tmp_cube.metadata + # change units string so unit conversion from deg C --> K will work + cube.units = "celsius" + # convert units from degC to K + cube.convert_units("K") + else: + cube = cubes[0] + + # --> drop attributes that differ among input files for different years + # global attributes to remove + drop_attrs = [ + "source", + "date_created", + "history", + "tracking_id", + "id", + "time_coverage_start", + "time_coverage_end", + "platform", + "sensor", + "keywords", + ] + # variable attributes to remove + drop_var_attrs = [ + "flag_meanings", + "flag_values", + "grid_mapping", + "actual_range", + "ancillary_variables", + ] + for attr in drop_attrs: + if attr in cube.attributes.keys(): + cube.attributes.pop(attr) + for attr in drop_var_attrs: + if attr in cube.attributes.keys(): + cube.attributes.pop(attr) + + set_global_atts(cube, attributes) + + cube.coord("time").points = ( + cube.coord("time").core_points().astype("float64") + ) + + # Set correct names + cube.var_name = definition.short_name + cube.standard_name = definition.standard_name + cube.long_name = definition.long_name + + # Fix units + # input variable for pfr reports 'percent' --> rename to '%' + # input variable for alt reports 'metres' --> rename to 'm' + # input variable for gtd has been converted to 'K' --> nothing to do + cube.units = definition.units + + # Fix data type + cube.data = cube.core_data().astype("float32") + + # Fix coordinates + cube = _fix_coordinates(cube, definition) + + # Save results + logger.debug("Saving cube\n%s", cube) + logger.debug("Setting time dimension to UNLIMITED while saving!") + save_variable( + cube, + cube.var_name, + out_dir, + attributes, + unlimited_dimensions=["time"], + ) + os.remove(regridded_file) # delete temporary file + logger.info("Finished CMORizing %s", in_file) + + +def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date): + """CMORize ESACCI-PERMAFROST dataset.""" + glob_attrs = cfg["attributes"] + + logger.info( + "Starting CMORization for tier%s OBS files: %s", + glob_attrs["tier"], + glob_attrs["dataset_id"], + ) + logger.info("Input data from: %s", in_dir) + logger.info("Output will be written to: %s", out_dir) + logger.info( + "CMORizing ESACCI-PERMAFROST version %s", + glob_attrs["version"], + ) + + if start_date is None: + start_date = datetime(1997, 1, 1) + if end_date is None: + end_date = datetime(2019, 12, 31) + + loop_date = start_date + while loop_date <= end_date: + for short_name, var in cfg["variables"].items(): + if "short_name" not in var: + var["short_name"] = short_name + filepattern = os.path.join( + in_dir, + var["file"].format(year=loop_date.year), + ) + in_file = glob.glob(filepattern)[0] + if not in_file: + logger.info( + "%d: no data not found for variable %s", + loop_date.year, + short_name, + ) + else: + _extract_variable(in_file, var, cfg, out_dir, loop_date.year) + + loop_date += relativedelta.relativedelta(years=1) diff --git a/esmvaltool/recipes/examples/recipe_check_obs.yml b/esmvaltool/recipes/examples/recipe_check_obs.yml index a31cc1cf50..832ac3d18d 100644 --- a/esmvaltool/recipes/examples/recipe_check_obs.yml +++ b/esmvaltool/recipes/examples/recipe_check_obs.yml @@ -292,6 +292,16 @@ diagnostics: start_year: 2003, end_year: 2018} scripts: null + ESACCI-PERMAFROST: + description: ESACCI-PERMAFROST check + variables: + alt: + gtd: + pfr: + additional_datasets: + - {dataset: ESACCI-PERMAFROST, project: OBS6, mip: Lyr, tier: 2, type: sat, version: v3.0, + frequency: yr, start_year: 1997, end_year: 2019} + scripts: null ESACCI-OC: description: ESACCI-OC check @@ -302,7 +312,6 @@ diagnostics: type: sat, version: fv5.0, start_year: 1998, end_year: 2020} scripts: null - ESACCI-OZONE: description: ESACCI-OZONE check variables: diff --git a/esmvaltool/references/esacci-permafrost.bibtex b/esmvaltool/references/esacci-permafrost.bibtex new file mode 100644 index 0000000000..229df7ba84 --- /dev/null +++ b/esmvaltool/references/esacci-permafrost.bibtex @@ -0,0 +1,29 @@ +@article{esacci-permafrost-alt, + doi = {10.5285/67a3f8c8dc914ef99f7f08eb0d997e23}, + url = {https://dx.doi.org/10.5285/67a3f8c8dc914ef99f7f08eb0d997e23}, + year = 2021, + month = {jun}, + publisher = {NERC EDS Centre for Environmental Data Analysis}, + author = {Obu, J.; Westermann, S.; Barboux, C.; Bartsch, A.; Delaloye, R.; Grosse, G.; Heim, B.; Hugelius, G.; Irrgang, A.; Kääb, A.M.; Kroisleitner, C.; Matthes, H.; Nitze, I.; Pellet, C.; Seifert, F.M.; Strozzi, T.; Wegmüller, U.; Wieczorek, M.; Wiesmann, A.} + title = {ESA Permafrost Climate Change Initiative (Permafrost\_cci): Permafrost active layer thickness for the Northern Hemisphere, v3.0}, +} + +@article{esacci-permafrost-gtd, + doi = {10.5285/b25d4a6174de4ac78000d034f500a268}, + url = {https://dx.doi.org/10.5285/b25d4a6174de4ac78000d034f500a268}, + year = 2021, + month = {jun}, + publisher = {NERC EDS Centre for Environmental Data Analysis}, + author = {Obu, J.; Westermann, S.; Barboux, C.; Bartsch, A.; Delaloye, R.; Grosse, G.; Heim, B.; Hugelius, G.; Irrgang, A.; Kääb, A.M.; Kroisleitner, C.; Matthes, H.; Nitze, I.; Pellet, C.; Seifert, F.M.; Strozzi, T.; Wegmüller, U.; Wieczorek, M.; Wiesmann, A.}, + title = {ESA Permafrost Climate Change Initiative (Permafrost\_cci): Permafrost Ground Temperature for the Northern Hemisphere, v3.0}, +} + +@article{esacci-permafrost-pfr, + doi = {10.5285/6e2091cb0c8b4106921b63cd5357c97c}, + url = {https://dx.doi.org/10.5285/6e2091cb0c8b4106921b63cd5357c97c}, + year = 2021, + month = {jun}, + publisher = {NERC EDS Centre for Environmental Data Analysis}, + author = {Obu, J.; Westermann, S.; Barboux, C.; Bartsch, A.; Delaloye, R.; Grosse, G.; Heim, B.; Hugelius, G.; Irrgang, A.; Kääb, A.M.; Kroisleitner, C.; Matthes, H.; Nitze, I.; Pellet, C.; Seifert, F.M.; Strozzi, T.; Wegmüller, U.; Wieczorek, M.; Wiesmann, A.}, + title = {ESA Permafrost Climate Change Initiative (Permafrost\_cci): Permafrost extent for the Northern Hemisphere, v3.0}, +}