Skip to content

Commit b0b5b6d

Browse files
Add functions to import the datasets (#3)
* Update .gitignore * Add basic setup.py * Add basic functionality to load datasets * Set cube helper logging level to ERROR to hide warnings i.e. `tracking_id, history and creation_date attributes inconsistent` * Comment out problematic dataset * Speed up data loading and lose the cube-helper dependency * Rename data -> cubes * Add package data * Update esmvaltool_sample_data/loader.py Co-authored-by: Bouwe Andela <[email protected]> * Add developer imports * Address review comments * Update doc strings and add annotations * Select subset of data * Add whitelists for specific subsets of data * Use ignore list to filter problematic datasets * Ignore dataset that fails to regrid * Remove unused functions * Remove code used for testing Co-authored-by: Bouwe Andela <[email protected]>
1 parent 1ed1d66 commit b0b5b6d

File tree

5 files changed

+251
-1
lines changed

5 files changed

+251
-1
lines changed

.gitignore

Lines changed: 86 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,89 @@
1-
# ignore these files
1+
# Distribution / packaging
2+
.Python
3+
build/
4+
c
5+
develop-eggs/
6+
dist/
7+
downloads/
8+
eggs/
9+
.eggs/
10+
env/
11+
lib/
12+
lib64/
13+
parts/
14+
sdist/
15+
var/
16+
wheels/
17+
*.egg-info/
18+
.installed.cfg
19+
*.egg
20+
*.orig
21+
*.tmp
22+
MANIFEST
23+
24+
# Byte-compiled / optimized / DLL files
25+
__pycache__/
26+
*.py[cod]
27+
*$py.class
28+
29+
# C extensions
30+
*.so
31+
32+
# Created by editors
33+
*~
34+
\#*
35+
\.\#*
36+
*.swp
37+
38+
# Created by PyCharm
39+
.idea/
40+
41+
# eclipse/pydev
42+
.project
43+
.pydevproject
44+
.settings
45+
46+
#Create by VSCode
47+
.vscode
48+
49+
#pytest
50+
.cache
51+
.pytest_cache
52+
53+
# PyInstaller
54+
# Usually these files are written by a python script from a template
55+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
56+
*.manifest
57+
*.spec
58+
59+
# Installer logs
60+
pip-log.txt
61+
pip-delete-this-directory.txt
62+
63+
# Unit test / coverage reports
64+
htmlcov/
65+
.tox/
66+
.coverage
67+
.coverage.*
68+
.cache
69+
nosetests.xml
70+
coverage.xml
71+
*.cover
72+
.hypothesis/
73+
74+
# Jupyter Notebook
75+
.ipynb_checkpoints
76+
77+
*.tmp
78+
*.orig
79+
/c
80+
/tests/data/**
81+
test-reports/
82+
/test_bash.sh
83+
/python_test_out.txt
84+
85+
# Build folder
86+
doc/sphinx/build
287

388
# esgf-pyclient cache
489
*.sqlite

MANIFEST.in

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
recursive-include esmvaltool_sample_data/data/ *.nc

esmvaltool_sample_data/__init__.py

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
from pathlib import Path
2+
3+
import cf_units
4+
import iris
5+
import yaml
6+
7+
base_dir = Path(__file__).parent
8+
9+
VERBOSE = False
10+
11+
with open(base_dir / 'datasets.yml', 'r') as f:
12+
config = yaml.safe_load(f)
13+
14+
ignore_list = [fn.replace('.', '/') for fn in config['ignore']]
15+
16+
17+
def strip_attributes(cube: 'iris.Cube') -> None:
18+
"""Remove attributes in-place that cause issues with merging and
19+
concatenation."""
20+
for attr in ['creation_date', 'tracking_id', 'history']:
21+
if attr in cube.attributes:
22+
cube.attributes.pop(attr)
23+
24+
25+
def simplify_time(cube: 'iris.Cube') -> None:
26+
"""Simplifies the time coordinate in-place."""
27+
coord = cube.coord('time')
28+
coord.convert_units(
29+
cf_units.Unit('days since 1850-1-1 00:00:00',
30+
calendar=coord.units.calendar))
31+
32+
33+
def load_cubes_from_input_dirs(input_dirs: list) -> 'iris.Cube':
34+
"""Generator that loads all *.nc files from each input dir into a cube."""
35+
for i, input_dir in enumerate(sorted(input_dirs)):
36+
if VERBOSE:
37+
print(f'Loading #{i:02d}:', input_dir)
38+
39+
files = input_dir.glob('*.nc')
40+
cubes = iris.load(str(file) for file in files)
41+
for cube in cubes:
42+
strip_attributes(cube)
43+
simplify_time(cube)
44+
45+
cube = cubes.concatenate_cube()
46+
47+
if VERBOSE:
48+
print(' ', cube.shape, cube.coord('time').units.calendar)
49+
50+
yield cube
51+
52+
53+
def filter_ignored_datasets(dirs, root):
54+
for drc in dirs:
55+
test_drc = str(drc.relative_to(root))
56+
if test_drc not in ignore_list:
57+
yield drc
58+
elif VERBOSE:
59+
print('Ignored:', test_drc)
60+
61+
62+
def load_timeseries_cubes(mip_table: str = 'Amon') -> list:
63+
"""Returns a list of iris cubes with timeseries data.
64+
65+
The data are: ta / Amon / historical / r1i1p1f1, any grid, 1950 - onwards.
66+
All dimensions were reduced to a few steps except for the time dimension.
67+
68+
Parameters
69+
----------
70+
mip_table: str
71+
select monthly (`Amon`) or daily (`day`) data.
72+
73+
Returns
74+
-------
75+
list of iris.cube
76+
"""
77+
78+
timeseries_dir = base_dir / 'data' / 'timeseries'
79+
80+
paths = timeseries_dir.glob(f'**/{mip_table}/**/*.nc')
81+
input_dirs = list(set(path.parent for path in paths))
82+
83+
input_dirs = list(filter_ignored_datasets(input_dirs, timeseries_dir))
84+
85+
cubes = load_cubes_from_input_dirs(input_dirs)
86+
87+
return list(cubes)
88+
89+
90+
if __name__ == '__main__':
91+
VERBOSE = True
92+
93+
for mip_table in (
94+
'Amon',
95+
'day',
96+
):
97+
print()
98+
print(f'Loading `{mip_table}`')
99+
ts = load_timeseries_cubes(mip_table)
100+
101+
first_cube = ts[0]
102+
for i, cube in enumerate(ts):
103+
print(i)
104+
cube.regrid(grid=first_cube, scheme=iris.analysis.Linear())
105+
106+
# breakpoint()

esmvaltool_sample_data/datasets.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,3 +27,7 @@ ignore:
2727
- CMIP6.CMIP.EC-Earth-Consortium.EC-Earth3-Veg-LR.historical.r1i1p1f1.Amon.ta.gr.v20200217
2828
# something wrong with lon coord
2929
- CMIP6.CMIP.UA.MCM-UA-1-0.historical.r1i1p1f1.Amon.ta.gn.v20190731
30+
# iris.exceptions.ConcatenateError: failed to concatenate into a single cube.
31+
- CMIP6.CMIP.NCC.NorCPM1.historical.r1i1p1f1.Amon.ta.gn.v20190914
32+
# Regridding -> ValueError: Cube 'air_temperature' must contain a single 1D y coordinate.
33+
- CMIP6.CMIP.FIO-QLNM.FIO-ESM-2-0.historical.r1i1p1f1.Amon.ta.gn.v20191204

setup.py

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
from setuptools import setup
2+
3+
with open('README.md') as readme_file:
4+
readme = readme_file.read()
5+
6+
PACKAGES = [
7+
'esmvaltool_sample_data',
8+
]
9+
10+
setup(
11+
name='ESMValTool sample data',
12+
version='0.0.1',
13+
description="ESMValTool sample data",
14+
long_description=readme + '\n\n',
15+
author="",
16+
author_email='',
17+
url='https://github.com/ESMValGroup/ESMValTool_sample_data',
18+
packages=PACKAGES,
19+
include_package_data=True,
20+
license="",
21+
zip_safe=False,
22+
keywords='ESMValTool',
23+
classifiers=[
24+
'Development Status :: 2 - Pre-Alpha',
25+
'Intended Audience :: Developers',
26+
'License :: OSI Approved :: Apache Software License',
27+
'Natural Language :: English',
28+
'Programming Language :: Python :: 3',
29+
'Programming Language :: Python :: 3.6',
30+
'Programming Language :: Python :: 3.7',
31+
],
32+
test_suite='tests',
33+
install_requires=[
34+
'scitools-iris>=2.2',
35+
],
36+
# tests_require=[
37+
# 'pytest',
38+
# 'pytest-cov',
39+
# 'pycodestyle',
40+
# ],
41+
extras_require={
42+
'develop': [
43+
'codespell',
44+
'docformatter',
45+
'esgf-pyclient',
46+
'isort',
47+
'myproxyclient',
48+
'pre-commit',
49+
'prospector[with_pyroma]!=1.1.6.3,!=1.1.6.4',
50+
'yamllint',
51+
'yapf',
52+
],
53+
},
54+
)

0 commit comments

Comments
 (0)