diff --git a/doc/sphinx/source/input.rst b/doc/sphinx/source/input.rst index 6bca0c387d..64821eef0a 100644 --- a/doc/sphinx/source/input.rst +++ b/doc/sphinx/source/input.rst @@ -451,7 +451,7 @@ A list of the datasets for which a CMORizers is available is provided in the fol +------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+ | NSIDC-0116-[nh|sh] [#note4]_ | usi, vsi (day) | 3 | Python | +------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+ -| NSIDC-g02202-[sh] | siconc (SImon) | 3 | Python | +| NSIDC-g02202-[nh|sh] | siconc (SImon) | 3 | Python | +------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+ | OceanSODA-ETHZ | areacello (Ofx), co3os, dissicos, fgco2, phos, spco2, talkos (Omon) | 2 | Python | +------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+ diff --git a/esmvaltool/cmorizers/data/cmor_config/NSIDC-G02202-nh.yml b/esmvaltool/cmorizers/data/cmor_config/NSIDC-G02202-nh.yml new file mode 100644 index 0000000000..3259bb159b --- /dev/null +++ b/esmvaltool/cmorizers/data/cmor_config/NSIDC-G02202-nh.yml @@ -0,0 +1,23 @@ +--- +filename: sic_psn25_{year}.*.nc +# Common global attributes for Cmorizer output +attributes: + dataset_id: NSIDC-G02202-nh + version: '5' + tier: 3 + modeling_realm: reanaly + project_id: OBS6 + source: 'https://nsidc.org/data/g02202/versions/5' + reference: 'nsidc-g02202' + comment: '' + +variables: + siconc: + mip: SImon + raw: cdr_seaice_conc_monthly + compress: true + + +custom: + create_areacello: true + area_file: psn25area_v3.dat diff --git a/esmvaltool/cmorizers/data/cmor_config/NSIDC-G02202-sh.yml b/esmvaltool/cmorizers/data/cmor_config/NSIDC-G02202-sh.yml index ad09ff7e3b..ad2263f774 100644 --- a/esmvaltool/cmorizers/data/cmor_config/NSIDC-G02202-sh.yml +++ b/esmvaltool/cmorizers/data/cmor_config/NSIDC-G02202-sh.yml @@ -1,13 +1,13 @@ --- -filename: seaice_conc_monthly_sh_{year}.*.nc +filename: sic_pss25_{year}.*.nc # Common global attributes for Cmorizer output attributes: dataset_id: NSIDC-G02202-sh - version: '4' + version: '5' tier: 3 modeling_realm: reanaly project_id: OBS6 - source: 'https://nsidc.org/data/g02202/versions/4' + source: 'https://nsidc.org/data/g02202/versions/5' reference: 'nsidc-g02202' comment: '' diff --git a/esmvaltool/cmorizers/data/datasets.yml b/esmvaltool/cmorizers/data/datasets.yml index 2d2fc71908..a9110a846c 100644 --- a/esmvaltool/cmorizers/data/datasets.yml +++ b/esmvaltool/cmorizers/data/datasets.yml @@ -1205,11 +1205,19 @@ datasets: NSIDC-G02202-sh: tier: 3 - source: https://polarwatch.noaa.gov/erddap/griddap/nsidcG02202v4shmday - last_access: 2023-05-13 + source: https://polarwatch.noaa.gov/erddap/griddap/nsidcG02202v5shmday + last_access: 2025-01-24 info: | Download monthly data. Login required for download, and also requires citation only to use + + NSIDC-G02202-nh: + tier: 3 + source: https://polarwatch.noaa.gov/erddap/griddap/nsidcG02202v5shmday + last_access: 2025-01-24 + info: | + Download monthly data. + Login required for download, and also requires citation only to use OceanSODA-ETHZ: tier: 2 diff --git a/esmvaltool/cmorizers/data/downloaders/datasets/nsidc_g02202_nh.py b/esmvaltool/cmorizers/data/downloaders/datasets/nsidc_g02202_nh.py new file mode 100644 index 0000000000..dcf6269f92 --- /dev/null +++ b/esmvaltool/cmorizers/data/downloaders/datasets/nsidc_g02202_nh.py @@ -0,0 +1,80 @@ +# pylint: disable=too-many-arguments +# pylint: disable=too-many-function-args +# pylint: disable=R0917 +# pylint: disable=too-many-locals +"""Script to download NSIDC-G02202-nh.""" +import logging +from datetime import datetime +from dateutil import relativedelta + +from esmvaltool.cmorizers.data.downloaders.wget import WGetDownloader + +logger = logging.getLogger(__name__) + + +def download_dataset(config, dataset, dataset_info, start_date, end_date, + overwrite): + """Download dataset. + + Parameters + ---------- + config : dict + ESMValTool's user configuration + dataset : str + Name of the dataset + dataset_info : dict + Dataset information from the datasets.yml file + start_date : datetime + Start of the interval to download + end_date : datetime + End of the interval to download + overwrite : bool + Overwrite already downloaded files + """ + if start_date is None: + start_date = datetime(1979, 1, 1) + if end_date is None: + end_date = datetime(2024, 6, 1) + + loop_date = start_date + + downloader = WGetDownloader( + config=config, + dataset=dataset, + dataset_info=dataset_info, + overwrite=overwrite, + ) + + # need area file + area_dat = ('ftp://sidads.colorado.edu/DATASETS/seaice' + '/polar-stereo/tools/psn25area_v3.dat') + downloader.download_folder(area_dat, []) + + anc_path = ('https://noaadata.apps.nsidc.org/NOAA/G02202_V5/' + 'ancillary/G02202-ancillary-psn25-v05r00.nc') + downloader.download_folder(anc_path, []) + + base_path = ('https://noaadata.apps.nsidc.org/NOAA/G02202_V5/north/monthly' + '/sic_psn25_{year}{month:02d}_{other}_v05r00.nc') + + datels = [datetime(1978, 11, 1), datetime(1987, 7, 30), + datetime(1991, 12, 30), datetime(1995, 9, 30), + datetime(2007, 12, 30), end_date] + suffls = ['n07', 'F08', 'F11', 'F13', 'F17'] + isuf = 0 + suffix = suffls[isuf] + # initialize suffix if dates start higher than initial + while loop_date >= datels[isuf]: + suffix = suffls[isuf] + isuf += 1 + + while loop_date <= end_date: + + if loop_date > datels[isuf]: + suffix = suffls[isuf] + isuf += 1 + + downloader.download_folder( + base_path.format(year=loop_date.year, month=loop_date.month, + other=suffix), []) + loop_date += relativedelta.relativedelta(months=1) diff --git a/esmvaltool/cmorizers/data/downloaders/datasets/nsidc_g02202_sh.py b/esmvaltool/cmorizers/data/downloaders/datasets/nsidc_g02202_sh.py index 0a6ae7900d..c69f75762f 100644 --- a/esmvaltool/cmorizers/data/downloaders/datasets/nsidc_g02202_sh.py +++ b/esmvaltool/cmorizers/data/downloaders/datasets/nsidc_g02202_sh.py @@ -33,7 +33,7 @@ def download_dataset( if start_date is None: start_date = datetime(1979, 1, 1) if end_date is None: - end_date = datetime(2023, 1, 1) + end_date = datetime(2024, 6, 1) loop_date = start_date @@ -51,29 +51,18 @@ def download_dataset( ) downloader.download_folder(area_dat, []) - anc_path = ( - "https://noaadata.apps.nsidc.org/NOAA/G02202_V4/" - "ancillary/G02202-cdr-ancillary-sh.nc" - ) + anc_path = ('https://noaadata.apps.nsidc.org/NOAA/G02202_V5/' + 'ancillary/G02202-ancillary-pss25-v05r00.nc') downloader.download_folder(anc_path, []) - base_path = ( - "https://noaadata.apps.nsidc.org/NOAA/G02202_V4/south/monthly" - "/seaice_conc_monthly_sh_{year}{month:02d}_{other}_v04r00.nc" - ) + base_path = ('https://noaadata.apps.nsidc.org/NOAA/G02202_V5/south/monthly' + '/sic_pss25_{year}{month:02d}_{other}_v05r00.nc') + + datels = [datetime(1978, 11, 1), datetime(1987, 7, 30), + datetime(1991, 12, 30), datetime(1995, 9, 30), + datetime(2007, 12, 30), end_date] + suffls = ['n07', 'F08', 'F11', 'F13', 'F17'] - # regex for n07 changes to f08.. file names - # bins #{'197811':'n07','198708':'f08', - # '199201':'f11','199510':'f13', '200801':'f17'} - datels = [ - datetime(1978, 11, 1), - datetime(1987, 7, 30), - datetime(1991, 12, 30), - datetime(1995, 9, 30), - datetime(2007, 12, 30), - end_date, - ] - suffls = ["n07", "f08", "f11", "f13", "f17"] isuf = 0 suffix = suffls[isuf] # initialize suffix if dates start higher than initial @@ -93,4 +82,3 @@ def download_dataset( [], ) loop_date += relativedelta.relativedelta(months=1) - # check loop_date is => next bin diff --git a/esmvaltool/cmorizers/data/formatters/datasets/nsidc_g02202_nh.py b/esmvaltool/cmorizers/data/formatters/datasets/nsidc_g02202_nh.py new file mode 100644 index 0000000000..b515745ea5 --- /dev/null +++ b/esmvaltool/cmorizers/data/formatters/datasets/nsidc_g02202_nh.py @@ -0,0 +1,192 @@ +# pylint: disable=unused-argument +# pylint: disable=too-many-arguments +# pylint: disable=too-many-function-args +# pylint: disable=R0917 +# pylint: disable=E1121 +# pylint: disable=too-many-locals +# flake8: noqa +"""ESMValTool CMORizer for Sea Ice Concentration CDR (Northern Hemisphere). + +Tier + Tier 3: restricted dataset. + +Source + https://nsidc.org/data/g02202/versions/5 + +Last access + 20250124 + +Download and processing instructions + Download data from: + https://noaadata.apps.nsidc.org/NOAA/G02202_V5/north/monthly + lat and lon from: + https://noaadata.apps.nsidc.org/NOAA/G02202_V5/ancillary/ + area file: + ftp://sidads.colorado.edu/DATASETS/seaice/polar-stereo/tools/ + psn25area_v3.dat + + https://nsidc.org/sites/default/files/documents/user-guide/g02202-v005-userguide.pdf + +""" + +import logging +import os +import re + +import numpy as np + +import iris +from cf_units import Unit +from iris.coords import AuxCoord + +from esmvaltool.cmorizers.data import utilities as utils + +logger = logging.getLogger(__name__) + + +def _get_filepaths(in_dir, basename, yyyy): + """Find correct name of file (extend basename with timestamp).""" + f_name = basename.format(year=yyyy) + regex = re.compile(f_name) + return_files = [] + for files in os.listdir(in_dir): + if regex.match(files): + return_files.append(os.path.join(in_dir, files)) + + return return_files + + +def _fix_time_coord(cube, _field, _filename): + """Set time points to central day of month.""" + time_coord = cube.coord("time") + new_unit = Unit("days since 1850-01-01 00:00:00", calendar="standard") + time_coord.convert_units(new_unit) + old_time = new_unit.num2date(time_coord.points) + new_time = [d.replace(day=15) for d in old_time] + time_coord.points = new_unit.date2num(new_time) + + +def _prom_dim_coord(cube, _field, _filename): + iris.util.promote_aux_coord_to_dim_coord(cube, "time") + + +def _create_coord(cubes, var_name, standard_name): + cube = cubes.extract_cube(standard_name) + coord = AuxCoord( + cube.data, + standard_name=standard_name, + long_name=cube.long_name, + var_name=var_name, + units="degrees", # cube.units, + ) + return coord + + +def _extract_variable(raw_var, cmor_info, attrs, filepath, out_dir, latlon): + """Extract variable from all files.""" + var = cmor_info.short_name + cubes = iris.load(filepath, raw_var, _prom_dim_coord) + iris.util.equalise_attributes(cubes) + + cube = cubes.concatenate_cube() + iris.util.promote_aux_coord_to_dim_coord(cube, "projection_y_coordinate") + iris.util.promote_aux_coord_to_dim_coord(cube, "projection_x_coordinate") + cube.coord("projection_y_coordinate").rename("y") + cube.coord("projection_x_coordinate").rename("x") + + cube.add_aux_coord(latlon[0], (1, 2)) + cube.add_aux_coord(latlon[1], (1, 2)) + # add coord typesi + area_type = AuxCoord( + [1.0], standard_name="area_type", + var_name="type", long_name="Sea Ice area type" + ) + cube.add_aux_coord(area_type) + + # cube.convert_units(cmor_info.units) + cube.units = "%" + cube.data[cube.data > 100] = np.nan + cube = cube * 100 + + # utils.fix_coords(cube) #latlon multidimensional + utils.fix_var_metadata(cube, cmor_info) + utils.set_global_atts(cube, attrs) + + utils.save_variable(cube, var, out_dir, + attrs, unlimited_dimensions=["time"]) + + return cube + + +def _create_areacello(cfg, in_dir, sample_cube, glob_attrs, out_dir): + if not cfg["custom"].get("create_areacello", False): + return + var_info = cfg["cmor_table"].get_variable("Ofx", "areacello") + glob_attrs["mip"] = "Ofx" + lat_coord = sample_cube.coord("latitude") + + area_file = os.path.join(in_dir, cfg["custom"]["area_file"]) + with open(area_file, "rb") as datfile: + areasdmnd = np.fromfile( + datfile, + dtype=np.int32).reshape(lat_coord.shape) + + # Divide by 1000 to get km2 then multiply by 1e6 to m2 ...*1000 + ardata = areasdmnd * 1000 + + cube = iris.cube.Cube( + ardata, + standard_name=var_info.standard_name, + long_name=var_info.long_name, + var_name=var_info.short_name, + units="m2", + dim_coords_and_dims=[(sample_cube.coord("y"), 0), + (sample_cube.coord("x"), 1)], + ) + cube.add_aux_coord(lat_coord, (0, 1)) + cube.add_aux_coord(sample_cube.coord("longitude"), (0, 1)) + utils.fix_var_metadata(cube, var_info) + utils.set_global_atts(cube, glob_attrs) + utils.save_variable( + cube, var_info.short_name, out_dir, + glob_attrs, zlib=True) + + +def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date): + """Cmorization func call.""" + glob_attrs = cfg["attributes"] + cmor_table = cfg["cmor_table"] + + # get aux nc file + cubesaux = iris.load( + os.path.join( + in_dir, "G02202-ancillary-psn25-v05r00.nc") + ) + coords = [_create_coord(cubesaux, "lat", "latitude"), + _create_coord(cubesaux, "lon", "longitude")] + + sample_cube = None + for year in range(1979, 2025): + filepaths = _get_filepaths(in_dir, cfg["filename"], year) + + if filepaths: + logger.info("Found %d files in '%s'", len(filepaths), in_dir) + + for var, var_info in cfg["variables"].items(): + logger.info("CMORizing variable '%s'", var) + glob_attrs["mip"] = var_info["mip"] + cmor_info = cmor_table.get_variable(var_info["mip"], var) + sample_cube = _extract_variable( + var_info.get("raw", var), + cmor_info, + glob_attrs, + filepaths, + out_dir, + coords, + ) + else: + logger.info("No files found ") + logger.info("year: %d basename: %s", year, cfg["filename"]) + + if sample_cube is not None: + _create_areacello(cfg, in_dir, sample_cube, glob_attrs, out_dir) diff --git a/esmvaltool/cmorizers/data/formatters/datasets/nsidc_g02202_sh.py b/esmvaltool/cmorizers/data/formatters/datasets/nsidc_g02202_sh.py index 3ff94121e3..e88830710b 100644 --- a/esmvaltool/cmorizers/data/formatters/datasets/nsidc_g02202_sh.py +++ b/esmvaltool/cmorizers/data/formatters/datasets/nsidc_g02202_sh.py @@ -1,24 +1,30 @@ -"""ESMValTool CMORizer for Sea Ice Concentration CDR. +# pylint: disable=unused-argument +# pylint: disable=too-many-arguments +# pylint: disable=too-many-function-args +# pylint: disable=R0917 +# pylint: disable=E1121 +# flake8: noqa +"""ESMValTool CMORizer for Sea Ice Concentration CDR (Southern Hemisphere). Tier Tier 3: restricted dataset. Source - https://nsidc.org/data/g02202/versions/4 + https://nsidc.org/data/g02202/versions/5 Last access - 20231213 + 20250124 Download and processing instructions Download data from: - https://noaadata.apps.nsidc.org/NOAA/G02202_V4/south/monthly + https://noaadata.apps.nsidc.org/NOAA/G02202_V5/south/monthly lat and lon from: - https://noaadata.apps.nsidc.org/NOAA/G02202_V4/ancillary/ + https://noaadata.apps.nsidc.org/NOAA/G02202_V5/ancillary/ area file: ftp://sidads.colorado.edu/DATASETS/seaice/polar-stereo/tools/ pss25area_v3.dat - https://nsidc.org/sites/default/files/g02202-v004-userguide_1_1.pdf + https://nsidc.org/sites/default/files/documents/user-guide/g02202-v005-userguide.pdf """ @@ -155,21 +161,22 @@ def _create_areacello(cfg, in_dir, sample_cube, glob_attrs, out_dir): cube, var_info.short_name, out_dir, glob_attrs, zlib=True ) - def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date): """Cmorization func call.""" glob_attrs = cfg["attributes"] cmor_table = cfg["cmor_table"] # get aux nc file - cubesaux = iris.load(os.path.join(in_dir, "G02202-cdr-ancillary-sh.nc")) + cubesaux = iris.load( + os.path.join( + in_dir, "G02202-ancillary-pss25-v05r00.nc") + ) lat_coord = _create_coord(cubesaux, "lat", "latitude") lon_coord = _create_coord(cubesaux, "lon", "longitude") - year = 1978 - # split by year.. sample_cube = None - for year in range(1979, 2022, 1): + + for year in range(1979, 2025, 1): filepaths = _get_filepaths(in_dir, cfg["filename"], year) if len(filepaths) > 0: @@ -190,11 +197,10 @@ def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date): out_dir, [lat_coord, lon_coord], ) - else: logger.info( "No files found year: %d basename: %s", year, cfg["filename"] ) if sample_cube is not None: - _create_areacello(cfg, in_dir, sample_cube, glob_attrs, out_dir) + _create_areacello(cfg, in_dir, sample_cube, glob_attrs, out_dir) \ No newline at end of file diff --git a/esmvaltool/recipes/examples/recipe_check_obs.yml b/esmvaltool/recipes/examples/recipe_check_obs.yml index bfccb7f00f..e2ea176f39 100644 --- a/esmvaltool/recipes/examples/recipe_check_obs.yml +++ b/esmvaltool/recipes/examples/recipe_check_obs.yml @@ -2093,8 +2093,10 @@ diagnostics: siconc: mip: SImon additional_datasets: + - {dataset: NSIDC-G02202-nh, project: OBS6, tier: 3, + type: reanaly, version: 5, start_year: 1979, end_year: 2024} - {dataset: NSIDC-G02202-sh, project: OBS6, tier: 3, - type: reanaly, version: 4, start_year: 1979, end_year: 2022} + type: reanaly, version: 5, start_year: 1979, end_year: 2024} scripts: null TROPFLUX: