-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathcmip6.py
More file actions
153 lines (123 loc) · 4.21 KB
/
cmip6.py
File metadata and controls
153 lines (123 loc) · 4.21 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
import os.path
import pathlib
from pathlib import Path
from typing import Any
import pandas as pd
import xarray as xr
from ref_sample_data.data_request.base import IntakeESGFDataRequest
from ref_sample_data.resample import decimate_curvilinear, decimate_rectilinear
def prefix_to_filename(ds, filename_prefix: str) -> str:
"""
Create a filename from a dataset and a prefix.
Optionally includes the time range of the dataset if it has a time dimension.
Parameters
----------
ds
Dataset
filename_prefix
Prefix for the filename
This includes the different facets of the dataset
Returns
-------
Filename for the dataset
"""
if "time" in ds.dims:
time_range = f"{ds.time.min().dt.strftime('%Y%m').item()}-{ds.time.max().dt.strftime('%Y%m').item()}"
filename = f"{filename_prefix}_{time_range}.nc"
else:
filename = f"{filename_prefix}.nc"
return filename
class CMIP6Request(IntakeESGFDataRequest):
"""
Represents a CMIP6 dataset request
These data are fetched from ESGF and decimated according to their grid type
"""
source_type = "CMIP6"
cmip6_path_items = (
"mip_era",
"activity_drs",
"institution_id",
"source_id",
"experiment_id",
"member_id",
"table_id",
"variable_id",
"grid_label",
)
cmip6_filename_paths = (
"variable_id",
"table_id",
"source_id",
"experiment_id",
"member_id",
"grid_label",
)
def __init__(self, facets: dict[str, Any], remove_ensembles: bool, time_span: tuple[str, str] | None):
self.avail_facets = [
"mip_era",
"activity_drs",
"institution_id",
"source_id",
"experiment_id",
"member_id",
"table_id",
"variable_id",
"grid_label",
"version",
"data_node",
]
self.facets = facets
self.remove_ensembles = remove_ensembles
self.time_span = time_span
assert all(key in self.avail_facets for key in self.cmip6_path_items), "Error message"
assert all(key in self.avail_facets for key in self.cmip6_filename_paths), "Error message"
def decimate_dataset(self, dataset: xr.Dataset) -> xr.Dataset | None:
"""
Downscale the dataset to a smaller size.
Parameters
----------
dataset
The dataset to downscale
Returns
-------
xr.Dataset
The downscaled dataset
"""
has_latlon = "lat" in dataset.dims and "lon" in dataset.dims
has_ij = "i" in dataset.dims and "j" in dataset.dims
# The AMOC variable `msftmz` has these strange dims and we do not want to decimate
skip_decimate = {"time", "basin", "lev", "lat"}.issubset(dataset.dims)
if has_latlon:
assert len(dataset.lat.dims) == 1 and len(dataset.lon.dims) == 1
result = decimate_rectilinear(dataset)
elif has_ij:
# 2d curvilinear grid (generally ocean variables)
result = decimate_curvilinear(dataset)
elif skip_decimate:
result = dataset
else:
raise ValueError("Cannot decimate this grid: too many dimensions")
if "time" in dataset.dims and self.time_span is not None:
result = result.sel(time=slice(*self.time_span))
if result.time.size == 0:
result = None
return result
def generate_filename(self, metadata: pd.Series, ds: xr.Dataset, ds_filename: pathlib.Path) -> Path:
"""
Create the output filename for the dataset.
Parameters
----------
ds
Loaded dataset
ds_filename:
Filename of the dataset (Unused)
Returns
-------
The output filename
"""
output_path = (
Path(os.path.join(*[metadata[item] for item in self.cmip6_path_items]))
/ f"v{metadata['version']}"
)
filename_prefix = "_".join([metadata[item] for item in self.cmip6_filename_paths])
return output_path / prefix_to_filename(ds, filename_prefix)