diff --git a/environment.yml b/environment.yml index 59948524b8..7804473f80 100644 --- a/environment.yml +++ b/environment.yml @@ -62,6 +62,7 @@ dependencies: - seaborn - seawater - shapely >=2.0.2 + - webdavclient - xarray >=0.12.0 - xesmf >=0.7.1 - xgboost >1.6.1 # github.com/ESMValGroup/ESMValTool/issues/2779 diff --git a/environment_osx.yml b/environment_osx.yml index 10a70bcf89..45e351465b 100644 --- a/environment_osx.yml +++ b/environment_osx.yml @@ -62,6 +62,7 @@ dependencies: - seaborn - seawater - shapely >=2.0.2 + - webdavclient - xarray >=0.12.0 - xesmf >=0.7.1 - xgboost >1.6.1 # github.com/ESMValGroup/ESMValTool/issues/2779 diff --git a/esmvaltool/cmorizers/data/cmor_config/ESACCI-OZONE.yml b/esmvaltool/cmorizers/data/cmor_config/ESACCI-OZONE.yml index 622b5ad197..744818929b 100644 --- a/esmvaltool/cmorizers/data/cmor_config/ESACCI-OZONE.yml +++ b/esmvaltool/cmorizers/data/cmor_config/ESACCI-OZONE.yml @@ -5,16 +5,40 @@ attributes: tier: 2 modeling_realm: sat project_id: OBS6 - source: "https://cds.climate.copernicus.eu/datasets/satellite-ozone-v1" + source: "https://cds.climate.copernicus.eu/datasets/satellite-ozone-v1 and https://webdav.aeronomie.be/guest/o3_cci/webdata/Nadir_Profiles/L3/IASI_MG_FORLI/" reference: "esacci-ozone" comment: "" variables: - toz: + # Note: Do not change variable names, these are used to handle the different + # datasets in the formatting script esacci_ozone.py. + toz_gto_ecv: + version: L3-GTO-ECV mip: AERmon + output: toz raw: total_ozone_column - filename: C3S-L3_OZONE-O3_PRODUCTS-MERGED_UV-MERGED-ALG-MONTHLY-v2000.nc #The filename needs also "YYYYMM-" as prefix added during the cmorization - o3: + filename: "{year}{month}-C3S-L3_OZONE-O3_PRODUCTS-MERGED_UV-MERGED-ALG-MONTHLY-v2000.nc" + o3_sage_omps: + version: L3-SAGE-OMPS mip: AERmon + output: o3 raw: merged_ozone_concentration - filename: C3S-L3_OZONE-O3_PRODUCTS-CONC_MZM-MERGED-ALG-MONTHLY-v0008.nc #The filename needs also "YYYYMM-" as prefix added during the cmorization + filename: "{year}{month}-C3S-L3_OZONE-O3_PRODUCTS-CONC_MZM-MERGED-ALG-MONTHLY-v0008.nc" + o3_megridop: + version: L3-MEGRIDOP + mip: AERmon + output: o3 + raw: merged_ozone_concentration + filename: "{year}{month}-C3S-L3_OZONE-O3_PRODUCTS-CONC_LLG-MERGED-ALG-MONTHLY-v0005.nc" + o3_iasi: + version: L3-IASI + mip: AERmon + output: o3 + raw: O3_partial_column_profile + filename: "IASI_FORLI_O3_MERGED_{year}{month}*_V1.0.nc" + toz_iasi: + version: L3-IASI + mip: AERmon + output: toz + raw: O3_total_column + filename: "IASI_FORLI_O3_MERGED_{year}{month}*_V1.0.nc" diff --git a/esmvaltool/cmorizers/data/datasets.yml b/esmvaltool/cmorizers/data/datasets.yml index 9d9e163604..e1b9a8271f 100644 --- a/esmvaltool/cmorizers/data/datasets.yml +++ b/esmvaltool/cmorizers/data/datasets.yml @@ -522,12 +522,13 @@ datasets: ESACCI-OZONE: tier: 2 source: https://cds.climate.copernicus.eu/datasets/satellite-ozone-v1 - last_access: 2025-02-11 + last_access: 2025-05-15 info: | Download the data from: GTO-ECV total column (variable toz) Select the following from the CDS: https://cds.climate.copernicus.eu/datasets/satellite-ozone-v1?tab=download + Put all files under a single directory (no subdirectories with years). Processing Level = "Level 3" Variable = "Atm. mole content of ozone" @@ -540,16 +541,36 @@ datasets: SAGE-CCI-OMPS (variable o3) Select the following options from the same link: https://cds.climate.copernicus.eu/datasets/satellite-ozone-v1?tab=download + Put all files under a single directory (no subdirectories with years). Processing Level = "Level 3" Variable = "Mole concentration of ozone in air" - Vertical aggregation = " Vertical profiles from limb sensors" + Vertical aggregation = "Vertical profiles from limb sensors" Sensor = " CMZM (Monthly zonal mean merged concentration product from limb sensors ACE, GOMOS, MIPAS, OMPS, OSIRIS, SAGE-2 and SCIAMACHY)" Year = select all (1984-2022) Month = select all (1-12) Version = "v0008" - Put all files under a single directory (no subdirectories with years). + + MEGRIDOP (variable o3) + Select the following options from the same link: + https://cds.climate.copernicus.eu/datasets/satellite-ozone-v1?tab=download + Put all files under a single directory (no subdirectories with years). + + Processing Level = "Level 3" + Variable = "Mole concentration of ozone in air" + Vertical aggregation = "Vertical profiles from limb sensors" + Sensor = "CLLG (Latitude-longitude gridded merged concentration product from limb sensors GOMOS, MIPAS, OSIRIS and SCIAMACHY)" + Year = select all (2001-2024) + Month = select all (1-12) + Version = "v0005" + + IASI (variables o3, toz) + Download from BIRA WebDAV server: https://webdav.aeronomie.be + Path: /guest/o3_cci/webdata/Nadir_Profiles/L3/IASI_MG_FORLI/ + Username: o3_cci_public + No password (leave empty) + Download each year (yyyy) into separate folders named "IASI_yyyy" ESACCI-SEAICE: tier: 2 diff --git a/esmvaltool/cmorizers/data/downloaders/datasets/esacci_ozone.py b/esmvaltool/cmorizers/data/downloaders/datasets/esacci_ozone.py index fb9b4337f1..ef0a641107 100644 --- a/esmvaltool/cmorizers/data/downloaders/datasets/esacci_ozone.py +++ b/esmvaltool/cmorizers/data/downloaders/datasets/esacci_ozone.py @@ -1,18 +1,27 @@ -"""Script to download ESACCI-OZONE from the CDS.""" +"""Script to download ESACCI-OZONE from the CDS and BIRA WebDAV.""" import gzip import logging +import os import shutil import zipfile +from datetime import datetime from pathlib import Path import cdsapi +import webdav.client as wc +from dateutil import relativedelta logger = logging.getLogger(__name__) def download_dataset( - config, dataset, dataset_info, start_date, end_date, overwrite + config, + dataset, + dataset_info, + start_date, + end_date, + overwrite, ): """Download ESACCI-OZONE dataset using CDS API. @@ -22,34 +31,63 @@ def download_dataset( the ECMWF account needs to be saved in user's ${HOME} directory. - All the files will be saved in ${RAWOBS}/Tier2/ESACCI-OZONE. """ - cds_url = "https://cds.climate.copernicus.eu/api" - if dataset == "ESACCI-OZONE": + raw_obs_dir = Path(config["rootpath"]["RAWOBS"][0]) + output_folder = raw_obs_dir / f"Tier{dataset_info['tier']}" / dataset + output_folder.mkdir(parents=True, exist_ok=True) + + cds_url = "https://cds.climate.copernicus.eu/api" + + if start_date is None: + gto_year1 = 1995 + omps_year1 = 1984 + megridop_year1 = 2001 + else: + gto_year1 = start_date.year + omps_year1 = start_date.year + megridop_year1 = start_date.year + if end_date is None: + gto_year2 = 2024 + omps_year2 = 2023 + megridop_year2 = 2025 + else: + gto_year2 = end_date.year + omps_year2 = end_date.year + megridop_year2 = end_date.year + requests = { - "toz": { + "toz_gto_ecv": { "processing_level": "level_3", "variable": "atmosphere_mole_content_of_ozone", "vertical_aggregation": "total_column", "sensor": ["merged_uv"], - "year": [str(y) for y in range(1995, 2024)], + "year": [str(y) for y in range(gto_year1, gto_year2)], "month": [f"{m:02d}" for m in range(1, 13)], "version": ["v2000"], }, - "o3": { + "o3_sage_omps": { "processing_level": "level_3", "variable": "mole_concentration_of_ozone_in_air", "vertical_aggregation": "vertical_profiles_from_limb_sensors", "sensor": ["cmzm"], - "year": [str(y) for y in range(1984, 2023)], + "year": [str(y) for y in range(omps_year1, omps_year2)], "month": [f"{m:02d}" for m in range(1, 13)], "version": ["v0008"], }, + "o3_sage_megridop": { + "processing_level": "level_3", + "variable": "mole_concentration_of_ozone_in_air", + "vertical_aggregation": "vertical_profiles_from_limb_sensors", + "sensor": ["cllg"], + "year": [ + str(y) for y in range(megridop_year1, megridop_year2) + ], + "month": [f"{m:02d}" for m in range(1, 13)], + "version": ["v0005"], + }, } - client = cdsapi.Client(cds_url) - raw_obs_dir = Path(config["rootpath"]["RAWOBS"][0]) - output_folder = raw_obs_dir / f"Tier{dataset_info['tier']}" / dataset - output_folder.mkdir(parents=True, exist_ok=True) + cds_client = cdsapi.Client(cds_url) for var_name, request in requests.items(): logger.info("Downloading %s data to %s", var_name, output_folder) @@ -58,12 +96,15 @@ def download_dataset( if file_path.exists() and not overwrite: logger.info( - "File %s already exists. Skipping download.", file_path + "File %s already exists. Skipping download.", + file_path, ) continue - client.retrieve( - "satellite-ozone-v1", request, file_path.as_posix() + cds_client.retrieve( + "satellite-ozone-v1", + request, + file_path.as_posix(), ) # Handle both .gz and .zip files @@ -80,5 +121,53 @@ def download_dataset( with open(output_folder / file_path.stem, "wb") as f_out: shutil.copyfileobj(f_in, f_out) + # download IASI data from BIRA WebDAV (IASI data not available on CDS) + # all the files will be saved by year (yyyy) in + # ${RAWOBS}/Tier2/ESACCI-OZONE/IASI_yyyy + + if start_date is None: + start_date = datetime(2008, 1, 1) + if end_date is None: + end_date = datetime(2023, 12, 31) + + options = { + "webdav_hostname": "https://webdav.aeronomie.be", + "webdav_login": "o3_cci_public", + "webdav_password": "", + } + + wd_client = wc.Client(options) + + basepath = "/guest/o3_cci/webdata/Nadir_Profiles/L3/IASI_MG_FORLI/" + + loop_date = start_date + while loop_date <= end_date: + year = loop_date.year + + # if needed, create local output directory + outdir = output_folder / f"IASI_{year}" + os.makedirs(outdir, exist_ok=True) + + # directory on WebDAV server to download + remotepath = f"{basepath}/{year}" + files = wd_client.list(remotepath) + info = wd_client.info(remotepath + "/" + files[0]) + numfiles = len(files) + # calculate approx. download volume in Gbytes + size = int(info["size"]) * numfiles // 1073741824 + del files + + loginfo = ( + f"downloading {numfiles} files for year {year}" + f" (approx. {size} Gbytes)" + ) + logger.info(loginfo) + + # synchronize local (output) directory and WebDAV server directory + wd_client.pull(remote_directory=remotepath, local_directory=outdir) + + loop_date += relativedelta.relativedelta(years=1) + else: - raise ValueError(f"Unknown dataset: {dataset}") + errmsg = f"Unknown dataset: {dataset}" + raise ValueError(errmsg) diff --git a/esmvaltool/cmorizers/data/formatters/datasets/esacci_ozone.py b/esmvaltool/cmorizers/data/formatters/datasets/esacci_ozone.py index a7b941071b..cf0c27c690 100644 --- a/esmvaltool/cmorizers/data/formatters/datasets/esacci_ozone.py +++ b/esmvaltool/cmorizers/data/formatters/datasets/esacci_ozone.py @@ -35,20 +35,42 @@ Month = select all (1-12) Version = "v0008" + MEGRIDOP (variable o3) + Processing Level = "Level 3" + Variable = "At, model content of ozone" + Vertical aggregation = "Vertical profiles from limb sensors" + Sensor = "CLLG" + Year = select all (2001-2024) + Month = select all (1-12) + Version = "v0005" + Put all files under a single directory (no subdirectories with years). in ${RAWOBS}/Tier2/ESACCI-OZONE + --------------------------------------------------------------------------- + + IASI (variables o3, toz) + Download from BIRA WebDAV server: https://webdav.aeronomie.be + Path: /guest/o3_cci/webdata/Nadir_Profiles/L3/IASI_MG_FORLI/ + Username: o3_cci_public + No password (leave empty) + Download each year (yyyy) into separate folders named "IASI_yyyy" """ +import glob import logging +import os from datetime import datetime -from pathlib import Path +import dask.array as da import iris +import iris.experimental.stratify import iris.util +import numpy as np from cf_units import Unit +from dateutil import relativedelta from esmvalcore.cmor._fixes.native_datasets import NativeDatasetFix -from esmvalcore.preprocessor import concatenate +from esmvalcore.preprocessor import concatenate, monthly_statistics from ...utilities import ( fix_coords, @@ -60,19 +82,40 @@ logger = logging.getLogger(__name__) -def _convert_units(cubes, short_name, var): - """Perform variable-specific calculations.""" +def _convert_units( + cubes: iris.cube.CubeList, short_name: str, var: dict +) -> iris.cube.Cube: + """Perform variable-specific conversion of units. + + Parameters + ---------- + cubes: cube list containing all data to convert units + short_name: short name of variable to be converted + var: dict contaning variable info + + Returns + ------- + cube: iris.cube.Cube + data cube with converted units. + """ cube = cubes.extract_cube(var["raw"]) if short_name == "o3": # Ozone mole fraction - gas_constant = 8.31446261815324 # Ideal gas constant (J mol-1 K-1) - t_cube = cubes.extract_cube("air_temperature") - p_cube = cubes.extract_cube("air_pressure") - p_cube.convert_units("Pa") - air_mol_concentration = p_cube / (gas_constant * t_cube) # mol m-3 - cube = cube / air_mol_concentration - cube.units = "mol mol-1" - - elif short_name == "toz": # Total ozone column (m) + if var["var_name"] == "o3_iasi": # IASI merged profiles + air_mol_concentration = cubes.extract_cube( + "air_partial_column_profile", + ) + cube = cube / air_mol_concentration + cube.units = "mol mol-1" + else: # SAGE-CCI-OMPS or MEGRIDOP profiles + gas_constant = 8.31446261815324 # Ideal gas constant (J mol-1 K-1) + t_cube = cubes.extract_cube("air_temperature") + p_cube = cubes.extract_cube("air_pressure") + p_cube.convert_units("Pa") + air_mol_concentration = p_cube / (gas_constant * t_cube) # mol m-3 + cube = cube / air_mol_concentration + cube.units = "mol mol-1" + + elif short_name == "toz": # Total ozone column (m) (IASI or GTO-ECV) # Convert from mol m-2 to m # ------------------------- # 1e-5 m (gas @ T = 273 K and p = 101325 Pa) ~ 2.69e20 molecules m-2 @@ -87,43 +130,68 @@ def _convert_units(cubes, short_name, var): return cube -def _extract_variable(short_name, var, cfg, filename, year, month): +def _extract_variable(in_files, var, cfg, out_dir, year, month): """Extract variable, add time coordinate, and scalar longitude.""" + short_name = var["output"] mip = var["mip"] cmor_info = cfg["cmor_table"].get_variable(mip, short_name) - cubes = iris.load(filename) - - cube = _convert_units(cubes, short_name, var) + # add time coordinate(s) + # try to extract day from filename(s) + new_list = iris.cube.CubeList() + for fname in in_files: + print(fname) + cubes = iris.load(fname) + i = fname.find(f"{year}{month:02}") + strday = fname[i + 6 : i + 8] + try: + day = int(strday) + except ValueError: + day = 15 + + time_units = Unit("days since 1950-01-01") + time_points = time_units.date2num(datetime(year, month, day)) + + # Add time coordinate to cube. + time_coord = iris.coords.DimCoord( + time_points, + var_name="time", + standard_name="time", + long_name="time", + units=time_units, + ) + for cube in cubes: + cube.add_aux_coord(time_coord, ()) + cube = iris.util.new_axis(cube, time_coord) + new_list.append(cube) logger.info("Checking CMOR info for %s: %s", short_name, cmor_info) if cmor_info is None: raise ValueError(f"CMOR info for {short_name} in MIP {mip} not found!") - day = 15 # Mid-month - time_units = Unit("days since 1950-01-01") - time_points = time_units.date2num(datetime(year, month, day)) - logger.info( - "Filename: '%s', Extracted Year:'%d', Month: '%d'", - filename, - year, - month, - ) - - # Add time coordinate to cube. - time_coord = iris.coords.DimCoord( - time_points, - var_name="time", - standard_name="time", - long_name="time", - units=time_units, - ) - time_coord.guess_bounds(monthly=True) - cube.add_aux_coord(time_coord, ()) - cube = iris.util.new_axis(cube, time_coord) - - # Add longitude coordinate to cube only for o3. - if short_name == "o3": + cubes = new_list.concatenate() + cube = _convert_units(cubes, short_name, var) + # fix nan's + cube.data = da.ma.fix_invalid(cube.core_data()) + # calculate monthly means (IASI) + cube = monthly_statistics(cube, operator="mean") + + # Add/set time bounds to + # (year-month-01 00:00, year-month+1-01 00:00) + timecoord = cube.coord("time") + for time in timecoord.units.num2date(timecoord.points): + start_date = datetime(time.year, time.month, 1) + end_date = start_date + end_date += relativedelta.relativedelta(months=1) + timecoord.bounds = np.array( + [ + timecoord.units.date2num(start_date), + timecoord.units.date2num(end_date), + ] + ) + + # Add longitude coordinate to cube only for o3_sage_omps. + if var["var_name"] == "o3_sage_omps": lon_coord = iris.coords.DimCoord( [180.0], bounds=[[0.0, 360.0]], @@ -136,8 +204,165 @@ def _extract_variable(short_name, var, cfg, filename, year, month): cube = iris.util.new_axis(cube, lon_coord) cube.transpose([1, 3, 2, 0]) NativeDatasetFix.fix_alt16_metadata(cube) + lat_coord = iris.coords.DimCoord( + cube.coord("latitude").points, + var_name="lat", + standard_name="latitude", + long_name="latitude", + units="degrees_north", + ) + cube.remove_coord(cube.coord("latitude")) + cube.add_dim_coord(lat_coord, 2) + cube = fix_coords(cube) + elif var["var_name"] == "o3_megridop": + # roll longitude: -180...180 --> 0...360 + cube.coord("longitude").points = cube.coord("longitude").points + 180.0 + nlon = len(cube.coord("longitude").points) + cube.data = da.roll(cube.core_data(), int(nlon / 2), axis=1) + NativeDatasetFix.fix_alt16_metadata(cube) + # reorder dimensions to [time, lev, lat, lon] + cube.transpose([0, 3, 2, 1]) + # rename var_name of lat and lon coordinates + lon_coord = iris.coords.DimCoord( + cube.coord("longitude").points, + var_name="lon", + standard_name="longitude", + long_name="longitude", + units="degrees_east", + ) + cube.remove_coord(cube.coord("longitude")) + cube.add_dim_coord(lon_coord, 3) + lat_coord = iris.coords.DimCoord( + cube.coord("latitude").points, + var_name="lat", + standard_name="latitude", + long_name="latitude", + units="degrees_north", + ) + cube.remove_coord(cube.coord("latitude")) + cube.add_dim_coord(lat_coord, 2) + cube = fix_coords(cube) + # add latitude, longitude and nlev coordiantes to cube for o3_iasi + elif var["var_name"] == "o3_iasi": + # add named coordinates + # longitude + lon_cube = cubes.extract_cube("longitude") + lon_coord = iris.coords.DimCoord( + lon_cube.core_data()[0, :], + var_name="lon", + standard_name="longitude", + long_name="longitude", + units="degrees_east", + ) + cube.add_aux_coord(lon_coord, 2) + iris.util.promote_aux_coord_to_dim_coord(cube, "longitude") + # latitude + lat_cube = cubes.extract_cube("latitude") + lat_coord = iris.coords.DimCoord( + lat_cube.core_data()[0, :], + var_name="lat", + standard_name="latitude", + long_name="latitude", + units="degrees_north", + ) + cube.add_aux_coord(lat_coord, 3) + iris.util.promote_aux_coord_to_dim_coord(cube, "latitude") + # level + lev_coord = iris.coords.DimCoord( + np.arange(1, 0, -1 / cube.shape[1]), + var_name="lev", + standard_name="atmosphere_hybrid_sigma_pressure_coordinate", + long_name="hybrid sigma pressure coordinate", + units="1", + attributes={"positive": "up"}, + ) + cube.add_dim_coord(lev_coord, 1) + cube.transpose([0, 1, 3, 2]) + # air pressure (aux coordinate) + # calculate full levels from half levels + ap_half = cubes.extract_cube("atmosphere_pressure_grid") + ap_half.data = np.ma.masked_invalid(ap_half.data) + ap_half = monthly_statistics(ap_half, operator="mean") + # reoder air pressure (aux coordinate) before adding to cube + # (otherwise, data of aux coordiante remains in original order) + ap_half.transpose([0, 1, 3, 2]) + ap_full = cube.copy() + for k in range(ap_full.shape[1]): + ap_full.core_data()[:, k, :, :] = 0.5 * ( + ap_half.core_data()[:, k, :, :] + + ap_half.core_data()[:, k + 1, :, :] + ) + ap_means = ap_full.core_data().mean(axis=[2, 3], keepdims=True) + ap_full.data = da.where( + da.ma.getmaskarray(ap_full.core_data()), + ap_means, + ap_full.core_data(), + ) + ap_coord = iris.coords.AuxCoord( + ap_full.core_data(), + var_name="plev", + standard_name="air_pressure", + long_name="pressure", + units="Pa", + ) + cube.add_aux_coord(ap_coord, [0, 1, 2, 3]) + # roll longitude: -180...180 --> 0...360 + cube.coord("longitude").points = cube.coord("longitude").points + 180.0 + nlon = len(cube.coord("longitude").points) + cube.data = da.roll(cube.core_data(), int(nlon / 2), axis=-1) + elif var["var_name"] == "toz_iasi": + # add named coordinates + # longitude + lon_cube = cubes.extract_cube("longitude") + lon_coord = iris.coords.DimCoord( + lon_cube.core_data()[0, :], + var_name="lon", + standard_name="longitude", + long_name="longitude", + units="degrees_east", + ) + cube.add_aux_coord(lon_coord, 1) + iris.util.promote_aux_coord_to_dim_coord(cube, "longitude") + # latitude + lat_cube = cubes.extract_cube("latitude") + lat_coord = iris.coords.DimCoord( + lat_cube.core_data()[0, :], + var_name="lat", + standard_name="latitude", + long_name="latitude", + units="degrees_north", + ) + cube.add_aux_coord(lat_coord, 2) + iris.util.promote_aux_coord_to_dim_coord(cube, "latitude") + # reorder dimensions to [time, lat, lon] + cube.transpose([0, 2, 1]) + # roll longitude: -180...180 --> 0...360 + cube.coord("longitude").points = cube.coord("longitude").points + 180.0 + nlon = len(cube.coord("longitude").points) + cube.data = da.roll(cube.core_data(), int(nlon / 2), axis=-1) + cube = fix_coords(cube) + elif var["var_name"] == "toz_gto_ecv": + lon_coord = iris.coords.DimCoord( + cube.coord("longitude").points, + var_name="lon", + standard_name="longitude", + long_name="longitude", + units="degrees_east", + ) + cube.remove_coord(cube.coord("longitude")) + cube.add_dim_coord(lon_coord, 2) + lat_coord = iris.coords.DimCoord( + cube.coord("latitude").points, + var_name="lat", + standard_name="latitude", + long_name="latitude", + units="degrees_north", + ) + cube.remove_coord(cube.coord("latitude")) + cube.add_dim_coord(lat_coord, 1) + cube = fix_coords(cube) + fix_var_metadata(cube, cmor_info) - cube = fix_coords(cube) set_global_atts(cube, cfg["attributes"]) return cube @@ -145,17 +370,30 @@ def _extract_variable(short_name, var, cfg, filename, year, month): def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date): """Cmorization process with dataset-specific time ranges.""" glob_attrs = cfg["attributes"] + if "version" in glob_attrs: + glob_version = glob_attrs["version"] + else: + glob_version = "" for var_name, var in cfg["variables"].items(): - # Define dataset-specific time ranges - if var_name == "toz": # GTO-ECV + # Define dataset-specific default time ranges + if var_name == "toz_gto_ecv": # GTO-ECV dataset_start = datetime(1995, 7, 1) dataset_end = datetime(2023, 4, 30) - elif var_name == "o3": # SAGE-CCI-OMPS + elif var_name == "o3_sage_omps": # SAGE-CCI-OMPS dataset_start = datetime(1984, 10, 1) dataset_end = datetime(2022, 12, 31) + elif var_name == "o3_megridop": # MEGRIDOP + dataset_start = datetime(2001, 11, 1) + dataset_end = datetime(2023, 12, 31) + elif var_name == "o3_iasi" or var_name == "toz_iasi": # IASI + dataset_start = datetime(2008, 1, 1) + dataset_end = datetime(2023, 12, 31) else: - raise ValueError(f"Unknown dataset for variable {var_name}") + errmsg = f"Unknown dataset for variable {var_name}" + raise ValueError(errmsg) + + var["var_name"] = var_name # Adjust start and end dates if not provided start_date_x = start_date or dataset_start @@ -167,44 +405,59 @@ def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date): all_data_cubes = [] glob_attrs["mip"] = var["mip"] + if "version" in var: + glob_attrs["version"] = var["version"] + else: + glob_attrs["version"] = glob_version + output_var = var["output"] for year in range(start_date_x.year, end_date_x.year + 1): + if var_name == "o3_iasi" or var_name == "toz_iasi": + subfolder = f"IASI_{year}" + else: + subfolder = "" for month in range(1, 13): # Skip months outside the dataset range current_date = datetime(year, month, 1) if current_date < dataset_start or current_date > dataset_end: continue - date_str = f"{year}{month:02}" # YYYYMM format - filename = Path(in_dir) / f"{date_str}-{var['filename']}" - if not filename.is_file(): - logger.warning( - "No file found for %s in %s-%02d", - var_name, - year, - month, - ) - continue + monstr = f"{month:02}" - logger.info( - "CMORizing variable '%s' from file '%s'", - var_name, - filename, + filepattern = os.path.join( + in_dir, + subfolder, + var["filename"].format(year=year, month=monstr), ) + in_files = glob.glob(filepattern) + if not in_files: + infomsg = ( + f"{var_name}: no data not found for {year}-{monstr}" + ) + logger.info(infomsg) + continue cube = _extract_variable( - var_name, var, cfg, filename, year, month + in_files, + var, + cfg, + out_dir, + year, + month, ) + + logger.info("CMORizing variable '%s'", output_var) all_data_cubes.append(cube) if not all_data_cubes: - raise ValueError( - f"No valid data found for {var_name} within the selected time" - " range" + errmsg = ( + f"No valid data found for {var_name} within the selected" + f" time range" ) + raise ValueError(errmsg) final_cube = concatenate(all_data_cubes) save_variable( final_cube, - var_name, + output_var, out_dir, glob_attrs, unlimited_dimensions=["time"], diff --git a/esmvaltool/recipes/examples/recipe_check_obs.yml b/esmvaltool/recipes/examples/recipe_check_obs.yml index b531301894..331e3ac9e7 100644 --- a/esmvaltool/recipes/examples/recipe_check_obs.yml +++ b/esmvaltool/recipes/examples/recipe_check_obs.yml @@ -311,6 +311,10 @@ diagnostics: additional_datasets: - {dataset: ESACCI-OZONE, project: OBS, mip: Amon, tier: 2, type: sat, version: L3, start_year: 1997, end_year: 2010} + - {dataset: ESACCI-OZONE, project: OBS6, mip: AERmon, tier: 2, + type: sat, version: L3-GTO-ECV, start_year: 1995, end_year: 2023} + - {dataset: ESACCI-OZONE, project: OBS6, mip: AERmon, tier: 2, + type: sat, version: L3-IASI, start_year: 2008, end_year: 2010} tozStderr: additional_datasets: - {dataset: ESACCI-OZONE, project: OBS, mip: Amon, tier: 2, @@ -323,14 +327,14 @@ diagnostics: additional_datasets: - {dataset: ESACCI-OZONE, project: OBS, mip: Amon, tier: 2, type: sat, version: L3, start_year: 2007, end_year: 2008} - toz: - additional_datasets: - - {dataset: ESACCI-OZONE, project: OBS6, mip: AERmon, tier: 2, - type: sat, version: L3, start_year: 1995, end_year: 2023} o3: additional_datasets: - {dataset: ESACCI-OZONE, project: OBS6, mip: AERmon, tier: 2, - type: sat, version: L3, start_year: 1984, end_year: 2022} + type: sat, version: L3-SAGE-OMPS, start_year: 1984, end_year: 2022} + - {dataset: ESACCI-OZONE, project: OBS6, mip: AERmon, tier: 2, + type: sat, version: L3-MEGRIDOP, start_year: 2001, end_year: 2023} + - {dataset: ESACCI-OZONE, project: OBS6, mip: AERmon, tier: 2, + type: sat, version: L3-IASI, start_year: 2008, end_year: 2010} scripts: null ESACCI-SEAICE: