Skip to content
Open
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/sphinx/source/input.rst
Original file line number Diff line number Diff line change
Expand Up @@ -441,7 +441,7 @@ A list of the datasets for which a CMORizers is available is provided in the fol
+------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+
| NSIDC-0116-[nh|sh] [#note4]_ | usi, vsi (day) | 3 | Python |
+------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+
| NSIDC-g02202-[sh] | siconc (SImon) | 3 | Python |
| NSIDC-g02202-[nh|sh] | siconc (SImon) | 3 | Python |
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
| NSIDC-g02202-[nh|sh] | siconc (SImon) | 3 | Python |
| NSIDC-G02202-[nh|sh] | siconc (SImon) | 3 | Python |

update to capital 'G' used in data commands

+------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+
| OceanSODA-ETHZ | areacello (Ofx), co3os, dissicos, fgco2, phos, spco2, talkos (Omon) | 2 | Python |
+------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+
Expand Down
23 changes: 23 additions & 0 deletions esmvaltool/cmorizers/data/cmor_config/NSIDC-G02202-nh.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
---
filename: sic_psn25_{year}.*.nc
# Common global attributes for Cmorizer output
attributes:
dataset_id: NSIDC-G02202-nh
version: '5'
tier: 3
modeling_realm: reanaly
project_id: OBS6
source: 'https://nsidc.org/data/g02202/versions/5'
reference: 'nsidc-g02202'
comment: ''

variables:
siconc:
mip: SImon
raw: cdr_seaice_conc_monthly
compress: true


custom:
create_areacello: true
area_file: psn25area_v3.dat
6 changes: 3 additions & 3 deletions esmvaltool/cmorizers/data/cmor_config/NSIDC-G02202-sh.yml
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
---
filename: seaice_conc_monthly_sh_{year}.*.nc
filename: sic_pss25_{year}.*.nc
# Common global attributes for Cmorizer output
attributes:
dataset_id: NSIDC-G02202-sh
version: '4'
version: '5'
tier: 3
modeling_realm: reanaly
project_id: OBS6
source: 'https://nsidc.org/data/g02202/versions/4'
source: 'https://nsidc.org/data/g02202/versions/5'
reference: 'nsidc-g02202'
comment: ''

Expand Down
12 changes: 10 additions & 2 deletions esmvaltool/cmorizers/data/datasets.yml
Original file line number Diff line number Diff line change
Expand Up @@ -1131,11 +1131,19 @@ datasets:

NSIDC-G02202-sh:
tier: 3
source: https://polarwatch.noaa.gov/erddap/griddap/nsidcG02202v4shmday
last_access: 2023-05-13
source: https://polarwatch.noaa.gov/erddap/griddap/nsidcG02202v5shmday
last_access: 2025-01-24
info: |
Download monthly data.
Login required for download, and also requires citation only to use

NSIDC-G02202-nh:
tier: 3
source: https://polarwatch.noaa.gov/erddap/griddap/nsidcG02202v5shmday
last_access: 2025-01-24
info: |
Download monthly data.
Login required for download, and also requires citation only to use

OceanSODA-ETHZ:
tier: 2
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
# pylint: disable=too-many-arguments
# pylint: disable=too-many-function-args
# pylint: disable=R0917
# pylint: disable=too-many-locals
"""Script to download NSIDC-G02202-nh."""
import logging
from datetime import datetime
from dateutil import relativedelta

from esmvaltool.cmorizers.data.downloaders.wget import WGetDownloader

logger = logging.getLogger(__name__)


def download_dataset(config, dataset, dataset_info, start_date, end_date,
overwrite):
"""Download dataset.

Parameters
----------
config : dict
ESMValTool's user configuration
dataset : str
Name of the dataset
dataset_info : dict
Dataset information from the datasets.yml file
start_date : datetime
Start of the interval to download
end_date : datetime
End of the interval to download
overwrite : bool
Overwrite already downloaded files
"""
if start_date is None:
start_date = datetime(1979, 1, 1)
if end_date is None:
end_date = datetime(2024, 6, 1)

loop_date = start_date

downloader = WGetDownloader(
config=config,
dataset=dataset,
dataset_info=dataset_info,
overwrite=overwrite,
)

# need area file
area_dat = ('ftp://sidads.colorado.edu/DATASETS/seaice'
'/polar-stereo/tools/psn25area_v3.dat')
downloader.download_folder(area_dat, [])

anc_path = ('https://noaadata.apps.nsidc.org/NOAA/G02202_V5/'
'ancillary/G02202-ancillary-psn25-v05r00.nc')
downloader.download_folder(anc_path, [])

base_path = ('https://noaadata.apps.nsidc.org/NOAA/G02202_V5/north/monthly'
'/sic_psn25_{year}{month:02d}_{other}_v05r00.nc')

datels = [datetime(1978, 11, 1), datetime(1987, 7, 30),
datetime(1991, 12, 30), datetime(1995, 9, 30),
datetime(2007, 12, 30), end_date]
suffls = ['n07', 'F08', 'F11', 'F13', 'F17']
isuf = 0
suffix = suffls[isuf]
# initialize suffix if dates start higher than initial
while loop_date >= datels[isuf]:
suffix = suffls[isuf]
isuf += 1

while loop_date <= end_date:

if loop_date > datels[isuf]:
suffix = suffls[isuf]
isuf += 1

downloader.download_folder(
base_path.format(year=loop_date.year, month=loop_date.month,
other=suffix), [])
loop_date += relativedelta.relativedelta(months=1)
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def download_dataset(config, dataset, dataset_info, start_date, end_date,
if start_date is None:
start_date = datetime(1979, 1, 1)
if end_date is None:
end_date = datetime(2023, 1, 1)
end_date = datetime(2024, 6, 1)

loop_date = start_date

Expand All @@ -47,20 +47,17 @@ def download_dataset(config, dataset, dataset_info, start_date, end_date,
'/polar-stereo/tools/pss25area_v3.dat')
downloader.download_folder(area_dat, [])

anc_path = ('https://noaadata.apps.nsidc.org/NOAA/G02202_V4/'
'ancillary/G02202-cdr-ancillary-sh.nc')
anc_path = ('https://noaadata.apps.nsidc.org/NOAA/G02202_V5/'
'ancillary/G02202-ancillary-pss25-v05r00.nc')
downloader.download_folder(anc_path, [])

base_path = ('https://noaadata.apps.nsidc.org/NOAA/G02202_V4/south/monthly'
'/seaice_conc_monthly_sh_{year}{month:02d}_{other}_v04r00.nc')
base_path = ('https://noaadata.apps.nsidc.org/NOAA/G02202_V5/south/monthly'
'/sic_pss25_{year}{month:02d}_{other}_v05r00.nc')

# regex for n07 changes to f08.. file names
# bins #{'197811':'n07','198708':'f08',
# '199201':'f11','199510':'f13', '200801':'f17'}
datels = [datetime(1978, 11, 1), datetime(1987, 7, 30),
datetime(1991, 12, 30), datetime(1995, 9, 30),
datetime(2007, 12, 30), end_date]
suffls = ['n07', 'f08', 'f11', 'f13', 'f17']
suffls = ['n07', 'F08', 'F11', 'F13', 'F17']
isuf = 0
suffix = suffls[isuf]
# initialize suffix if dates start higher than initial
Expand All @@ -78,4 +75,3 @@ def download_dataset(config, dataset, dataset_info, start_date, end_date,
base_path.format(year=loop_date.year, month=loop_date.month,
other=suffix), [])
loop_date += relativedelta.relativedelta(months=1)
# check loop_date is => next bin
192 changes: 192 additions & 0 deletions esmvaltool/cmorizers/data/formatters/datasets/nsidc_g02202_nh.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,192 @@
# pylint: disable=unused-argument
# pylint: disable=too-many-arguments
# pylint: disable=too-many-function-args
# pylint: disable=R0917
# pylint: disable=E1121
# pylint: disable=too-many-locals
# flake8: noqa
"""ESMValTool CMORizer for Sea Ice Concentration CDR (Northern Hemisphere).

Tier
Tier 3: restricted dataset.

Source
https://nsidc.org/data/g02202/versions/5

Last access
20250124

Download and processing instructions
Download data from:
https://noaadata.apps.nsidc.org/NOAA/G02202_V5/north/monthly
lat and lon from:
https://noaadata.apps.nsidc.org/NOAA/G02202_V5/ancillary/
area file:
ftp://sidads.colorado.edu/DATASETS/seaice/polar-stereo/tools/
psn25area_v3.dat

https://nsidc.org/sites/default/files/documents/user-guide/g02202-v005-userguide.pdf

"""

import logging
import os
import re

import numpy as np

import iris
from cf_units import Unit
from iris.coords import AuxCoord

from esmvaltool.cmorizers.data import utilities as utils

logger = logging.getLogger(__name__)


def _get_filepaths(in_dir, basename, yyyy):
"""Find correct name of file (extend basename with timestamp)."""
f_name = basename.format(year=yyyy)
regex = re.compile(f_name)
return_files = []
for files in os.listdir(in_dir):
if regex.match(files):
return_files.append(os.path.join(in_dir, files))

return return_files


def _fix_time_coord(cube, _field, _filename):
"""Set time points to central day of month."""
time_coord = cube.coord("time")
new_unit = Unit("days since 1850-01-01 00:00:00", calendar="standard")
time_coord.convert_units(new_unit)
old_time = new_unit.num2date(time_coord.points)
new_time = [d.replace(day=15) for d in old_time]
time_coord.points = new_unit.date2num(new_time)


def _prom_dim_coord(cube, _field, _filename):
iris.util.promote_aux_coord_to_dim_coord(cube, "time")


def _create_coord(cubes, var_name, standard_name):
cube = cubes.extract_cube(standard_name)
coord = AuxCoord(
cube.data,
standard_name=standard_name,
long_name=cube.long_name,
var_name=var_name,
units="degrees", # cube.units,
)
return coord


def _extract_variable(raw_var, cmor_info, attrs, filepath, out_dir, latlon):
"""Extract variable from all files."""
var = cmor_info.short_name
cubes = iris.load(filepath, raw_var, _prom_dim_coord)
iris.util.equalise_attributes(cubes)

cube = cubes.concatenate_cube()
iris.util.promote_aux_coord_to_dim_coord(cube, "projection_y_coordinate")
iris.util.promote_aux_coord_to_dim_coord(cube, "projection_x_coordinate")
cube.coord("projection_y_coordinate").rename("y")
cube.coord("projection_x_coordinate").rename("x")

cube.add_aux_coord(latlon[0], (1, 2))
cube.add_aux_coord(latlon[1], (1, 2))
# add coord typesi
area_type = AuxCoord(
[1.0], standard_name="area_type",
var_name="type", long_name="Sea Ice area type"
)
cube.add_aux_coord(area_type)

# cube.convert_units(cmor_info.units)
cube.units = "%"
cube.data[cube.data > 100] = np.nan
cube = cube * 100

# utils.fix_coords(cube) #latlon multidimensional
utils.fix_var_metadata(cube, cmor_info)
utils.set_global_atts(cube, attrs)

utils.save_variable(cube, var, out_dir,
attrs, unlimited_dimensions=["time"])

return cube


def _create_areacello(cfg, in_dir, sample_cube, glob_attrs, out_dir):
if not cfg["custom"].get("create_areacello", False):
return
var_info = cfg["cmor_table"].get_variable("Ofx", "areacello")
glob_attrs["mip"] = "Ofx"
lat_coord = sample_cube.coord("latitude")

area_file = os.path.join(in_dir, cfg["custom"]["area_file"])
with open(area_file, "rb") as datfile:
areasdmnd = np.fromfile(
datfile,
dtype=np.int32).reshape(lat_coord.shape)

# Divide by 1000 to get km2 then multiply by 1e6 to m2 ...*1000
ardata = areasdmnd * 1000

cube = iris.cube.Cube(
ardata,
standard_name=var_info.standard_name,
long_name=var_info.long_name,
var_name=var_info.short_name,
units="m2",
dim_coords_and_dims=[(sample_cube.coord("y"), 0),
(sample_cube.coord("x"), 1)],
)
cube.add_aux_coord(lat_coord, (0, 1))
cube.add_aux_coord(sample_cube.coord("longitude"), (0, 1))
utils.fix_var_metadata(cube, var_info)
utils.set_global_atts(cube, glob_attrs)
utils.save_variable(
cube, var_info.short_name, out_dir,
glob_attrs, zlib=True)


def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date):
"""Cmorization func call."""
glob_attrs = cfg["attributes"]
cmor_table = cfg["cmor_table"]

# get aux nc file
cubesaux = iris.load(
os.path.join(
in_dir, "G02202-ancillary-psn25-v05r00.nc")
)
coords = [_create_coord(cubesaux, "lat", "latitude"),
_create_coord(cubesaux, "lon", "longitude")]

sample_cube = None
for year in range(1979, 2025):
filepaths = _get_filepaths(in_dir, cfg["filename"], year)

if filepaths:
logger.info("Found %d files in '%s'", len(filepaths), in_dir)

for var, var_info in cfg["variables"].items():
logger.info("CMORizing variable '%s'", var)
glob_attrs["mip"] = var_info["mip"]
cmor_info = cmor_table.get_variable(var_info["mip"], var)
sample_cube = _extract_variable(
var_info.get("raw", var),
cmor_info,
glob_attrs,
filepaths,
out_dir,
coords,
)
else:
logger.info("No files found ")
logger.info("year: %d basename: %s", year, cfg["filename"])

if sample_cube is not None:
_create_areacello(cfg, in_dir, sample_cube, glob_attrs, out_dir)
Loading