Skip to content

Commit 4764368

Browse files
Merge pull request #51 from euro-cordex/check_driving_source
Check global attribute driving_source
2 parents 17672e1 + 4f0b544 commit 4764368

File tree

4 files changed

+290
-27
lines changed

4 files changed

+290
-27
lines changed

cc_plugin_cc6/_constants.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,3 +20,12 @@
2020
deltdic["yrmax"] = timedelta(days=366.1).total_seconds()
2121
deltdic["yrmin"] = timedelta(days=359.9).total_seconds()
2222
deltdic["yr"] = timedelta(days=360).total_seconds()
23+
deltdic["subhr"] = timedelta(seconds=600).total_seconds()
24+
deltdic["subhrmax"] = timedelta(seconds=601).total_seconds()
25+
deltdic["subhrmin"] = timedelta(seconds=599).total_seconds()
26+
deltdic["dec"] = timedelta(days=3600).total_seconds()
27+
deltdic["decmax"] = timedelta(days=3599.99).total_seconds()
28+
deltdic["decmin"] = timedelta(days=3660.01).total_seconds()
29+
deltdic["cen"] = timedelta(days=36000).total_seconds()
30+
deltdic["cenmax"] = timedelta(days=35999.99).total_seconds()
31+
deltdic["cenmin"] = timedelta(days=36600.01).total_seconds()

cc_plugin_cc6/base.py

Lines changed: 205 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,16 @@ def printtimedelta(d):
3535
return f"{d} seconds"
3636

3737

38+
def flatten(lst):
39+
result = []
40+
for item in lst:
41+
if isinstance(item, list):
42+
result.extend(flatten(item))
43+
else:
44+
result.append(item)
45+
return result
46+
47+
3848
class MIPCVCheckBase(BaseCheck):
3949
register_checker = False
4050
_cc_spec = "mip"
@@ -78,6 +88,91 @@ def setup(self, dataset):
7888
self._initialize_coords_info()
7989
if self.consistency_output:
8090
self._write_consistency_output()
91+
# if only the time checks should be run (so no verification against CV / MIP tables)
92+
elif self.options.get("time_checks_only", False):
93+
self.varname = [
94+
var
95+
for var in flatten(list(self.xrds.cf.standard_names.values()))
96+
if var
97+
not in flatten(
98+
list(self.xrds.cf.coordinates.values())
99+
+ list(self.xrds.cf.axes.values())
100+
+ list(self.xrds.cf.bounds.values())
101+
+ list(self.xrds.cf.formula_terms.values())
102+
)
103+
]
104+
self._initialize_time_info()
105+
self._initialize_coords_info()
106+
self.frequency = self._get_attr("frequency")
107+
if self.varname != []:
108+
self.cell_methods = self.xrds[self.varname[0]].attrs.get(
109+
"cell_methods", "unknown"
110+
)
111+
else:
112+
self.cell_methods = "unknown"
113+
self.drs_fn = {}
114+
if self.frequency == "unknown" and self.time is not None:
115+
if self.time.sizes[self.time.dims[0]] > 1 and 1 == 2:
116+
for ifreq in [
117+
fkey
118+
for fkey in deltdic.keys()
119+
if "max" not in fkey and "min" not in fkey
120+
]:
121+
try:
122+
intv = abs(
123+
get_tseconds(
124+
cftime.num2date(
125+
self.time.values[1],
126+
units=self.timeunits,
127+
calendar=self.calendar,
128+
)
129+
- cftime.num2date(
130+
self.time.values[0],
131+
units=self.timeunits,
132+
calendar=self.calendar,
133+
)
134+
)
135+
)
136+
if (
137+
intv <= deltdic[ifreq + "max"]
138+
and intv >= deltdic[ifreq + "min"]
139+
):
140+
self.frequency = ifreq
141+
break
142+
except (AttributeError, ValueError):
143+
continue
144+
elif self.timebnds and len(self.xrds[self.timebnds].dims) == 2:
145+
for ifreq in [
146+
fkey
147+
for fkey in deltdic.keys()
148+
if "max" not in fkey and "min" not in fkey
149+
]:
150+
try:
151+
intv = abs(
152+
get_tseconds(
153+
cftime.num2date(
154+
self.xrds[self.timebnds].values[0, 1],
155+
units=self.timeunits,
156+
calendar=self.calendar,
157+
)
158+
- cftime.num2date(
159+
self.xrds[self.timebnds].values[0, 0],
160+
units=self.timeunits,
161+
calendar=self.calendar,
162+
)
163+
)
164+
)
165+
if (
166+
intv <= deltdic[ifreq + "max"]
167+
and intv >= deltdic[ifreq + "min"]
168+
):
169+
self.frequency = ifreq
170+
break
171+
except (AttributeError, ValueError):
172+
continue
173+
if self.consistency_output:
174+
self._write_consistency_output()
175+
# in case of general "mip" checks, the path to the CMOR tables need to be specified
81176
elif self._cc_spec == "mip":
82177
raise Exception(
83178
"ERROR: No 'tables' option specified. Cannot initialize CV and MIP tables."
@@ -146,7 +241,6 @@ def _initialize_CV_info(self, tables_path):
146241
)
147242
for key in ["table_id"]:
148243
if key not in self.CT[table]["Header"]:
149-
print(table, key)
150244
raise KeyError(
151245
f"CMOR table '{table}' misses the key '{key}' in the header information."
152246
)
@@ -221,9 +315,6 @@ def _initialize_time_info(self):
221315
# The entire checker crashes in case of invalid time units
222316
# todo: catch a possible exception in base._initialize_time_info
223317
# and report the problem in any check method
224-
self.timedec = xr.decode_cf(
225-
self.xrds.copy(deep=True), decode_times=True, use_cftime=True
226-
).cf["time"]
227318
self.time_invariant_vars = [
228319
var
229320
for var in list(self.xrds.data_vars.keys())
@@ -234,7 +325,6 @@ def _initialize_time_info(self):
234325
self.calendar = None
235326
self.timeunits = None
236327
self.timebnds = None
237-
self.timedec = None
238328
self.time_invariant_vars = [
239329
var
240330
for var in list(self.xrds.data_vars.keys())
@@ -304,13 +394,6 @@ def _get_var_attr(self, attr, default="unknown"):
304394
return default
305395
return default
306396

307-
def _infer_frequency(self):
308-
"""Infer frequency from given time dimension"""
309-
try:
310-
return xr.infer_freq(self.timedec)
311-
except ValueError:
312-
return "unknown"
313-
314397
def _read_CV(self, path, table_prefix, table_name):
315398
"""Reads the specified CV table."""
316399
table_path = Path(path, f"{table_prefix}_{table_name}.json")
@@ -325,7 +408,10 @@ def _read_CV(self, path, table_prefix, table_name):
325408
def _write_consistency_output(self):
326409
"""Write output for consistency checks across files."""
327410
# Dictionaries of global attributes and their data types
328-
required_attributes = self.CV.get("required_global_attributes", {})
411+
if self.options.get("time_checks_only", False):
412+
required_attributes = {}
413+
else:
414+
required_attributes = self.CV.get("required_global_attributes", {})
329415
file_attrs_req = {
330416
k: str(v) for k, v in self.xrds.attrs.items() if k in required_attributes
331417
}
@@ -834,10 +920,13 @@ def check_grid_definition(self, ds):
834920
if len(self.varname) == 0:
835921
return self.make_result(level, out_of, out_of, desc, messages)
836922

837-
dimsCT = self._get_var_attr("dimensions", [])
838923
# Check only the first latitude and longitude found
924+
dimsCT = self._get_var_attr("dimensions", [])
839925
if "latitude" or "longitude" in dimsCT:
840-
if "latitude" in self.xrds.cf.standard_names:
926+
if (
927+
"latitude" in self.xrds.cf.standard_names
928+
and self.xrds[self.xrds.cf.standard_names["latitude"][0]].ndim > 1
929+
):
841930
lat = self.xrds.cf.standard_names["latitude"][0]
842931
if lat != self.CTgrids["variable_entry"]["latitude"]["out_name"]:
843932
messages.append(
@@ -864,7 +953,10 @@ def check_grid_definition(self, ds):
864953
attrs=["type"],
865954
)
866955
)
867-
if "longitude" in self.xrds.cf.standard_names:
956+
if (
957+
"longitude" in self.xrds.cf.standard_names
958+
and self.xrds[self.xrds.cf.standard_names["longitude"][0]].ndim > 1
959+
):
868960
lon = self.xrds.cf.standard_names["longitude"][0]
869961
if lon != self.CTgrids["variable_entry"]["longitude"]["out_name"]:
870962
messages.append(
@@ -935,6 +1027,65 @@ def check_grid_definition(self, ds):
9351027

9361028
return self.make_result(level, out_of, score, desc, messages)
9371029

1030+
def _resolve_generic_level(self, dimCT, var, messages):
1031+
"""
1032+
Attempt to resolve a generic level like 'alevel' to a valid axis_entry.
1033+
"""
1034+
candidates = [
1035+
key
1036+
for key, entry in self.CTcoords["axis_entry"].items()
1037+
if entry.get("generic_level_name") == dimCT
1038+
]
1039+
1040+
if not candidates:
1041+
messages.append(
1042+
f"The required dimension / coordinate '{dimCT}' of variable '{var}' is not defined explicitly and no generic level match (e.g., 'generic_level_name': '{dimCT}') could be found in the CMOR table."
1043+
)
1044+
return {}
1045+
1046+
# Get candidates with same standard_name as data set variables to get possible matches
1047+
pmatches = list()
1048+
for c in candidates:
1049+
if (
1050+
self.CTcoords["axis_entry"][c].get("standard_name")
1051+
in self.xrds.cf.standard_names
1052+
):
1053+
pmatches.append(c)
1054+
1055+
if not pmatches:
1056+
messages.append(
1057+
f"The required dimension / coordinate '{dimCT}' of variable '{var}' is not defined explicitly. No generic level matches ({', '.join(candidates)}) could be identified in the input file via standard_name."
1058+
)
1059+
return {}
1060+
elif len(pmatches) > 1:
1061+
# Try to select further by long_name and formula:
1062+
plfmatches = list()
1063+
for pmatch in pmatches:
1064+
if self.CTcoords["axis_entry"][pmatch].get("long_name") == self.xrds[
1065+
self.xrds.cf.standard_names[
1066+
self.CTcoords["axis_entry"][pmatch].get("standard_name")
1067+
][0]
1068+
].attrs.get("long_name") and self.CTcoords["axis_entry"][pmatch].get(
1069+
"formula"
1070+
) == self.xrds[
1071+
self.xrds.cf.standard_names[
1072+
self.CTcoords["axis_entry"][pmatch].get("standard_name")
1073+
][0]
1074+
].attrs.get(
1075+
"formula"
1076+
):
1077+
plfmatches.append(pmatch)
1078+
if len(plfmatches) != 1:
1079+
messages.append(
1080+
f"The required dimension / coordinate '{dimCT}' of variable '{var}' is not defined explicitly. Multiple generic level matches "
1081+
f"({', '.join(pmatches)}) can be identified due to insufficient and incompliant metadata specification."
1082+
)
1083+
return {}
1084+
else:
1085+
return self.CTcoords["axis_entry"][plfmatches[0]]
1086+
1087+
return self.CTcoords["axis_entry"][pmatches[0]]
1088+
9381089
def check_variable_definition(self, ds):
9391090
"""Checks mandatory variable attributes of the main variable and associated coordinates."""
9401091
desc = "Variable and coordinate definition (CV)"
@@ -969,14 +1120,26 @@ def check_variable_definition(self, ds):
9691120
# todo: check max min range for var / coord
9701121
#
9711122
dimsCT = self._get_var_attr("dimensions", [])
972-
if dimsCT:
973-
if isinstance(dimsCT, str):
974-
dimsCT = dimsCT.split()
1123+
dimsCT_is_valid = True
1124+
if isinstance(dimsCT, str):
1125+
dimsCT = dimsCT.split()
1126+
elif not isinstance(dimsCT, list):
1127+
messages.append(
1128+
f"Invalid 'dimensions' format for variable '{var}'. This is an issue in the CMOR tables definition and not necessarily in the data file."
1129+
)
1130+
dimsCT_is_valid = False
1131+
if dimsCT and dimsCT_is_valid:
9751132
for dimCT in dimsCT:
9761133
# The coordinate out_name must be in one of the following
9771134
# - in the variable dimensions
9781135
# - in the variable attribute "coordinates"
9791136
diminfo = self.CTcoords["axis_entry"].get(dimCT, {})
1137+
if not diminfo:
1138+
diminfo = self._resolve_generic_level(dimCT, var, messages)
1139+
# todo: checks below need to be updated to support generic levels
1140+
continue
1141+
# if not diminfo: # if checks below support generic levels, this can be uncommented
1142+
# continue
9801143
dim_on = diminfo.get("out_name", "")
9811144
dim_val_raw = diminfo.get("value", "")
9821145
dim_bnds_raw = diminfo.get("bounds_values", "")
@@ -987,7 +1150,8 @@ def check_variable_definition(self, ds):
9871150
cbnds = self.xrds[dim_on].attrs.get("bounds", None)
9881151
if dim_mhbnds not in ["yes", "no"]:
9891152
messages.append(
990-
f"The 'must_have_bounds' attribute of dimension / coordinate '{dimCT}' of the variable '{var}' has to be set to 'yes' or 'no'. This is an issue in the CMOR tables definition and not necessarily in the data file."
1153+
f"The 'must_have_bounds' attribute of dimension / coordinate '{dimCT}' of the variable '{var}' has to be set to 'yes' or 'no'. "
1154+
"This is an issue in the CMOR tables definition and not necessarily in the data file."
9911155
)
9921156
continue
9931157
if not dim_on:
@@ -1108,7 +1272,7 @@ def check_variable_definition(self, ds):
11081272
)
11091273
elif (
11101274
self.xrds[cbnds].ndim != 1
1111-
or self.xrds.dims[self.xrds[cbnds].dim[0]] != 2
1275+
or self.xrds.sizes[self.xrds[cbnds].dim[0]] != 2
11121276
):
11131277
messages.append(
11141278
f"The bounds variable '{cbnds}' needs to be one-dimensional and have exactly two values."
@@ -1356,8 +1520,11 @@ def check_time_continuity(self, ds):
13561520

13571521
# Check if frequency is known and supported
13581522
# (as defined in deltdic)
1359-
if self.frequency in ["unknown", "fx"]:
1523+
if self.frequency == "fx":
13601524
return self.make_result(level, out_of, out_of, desc, messages)
1525+
elif self.frequency == "unknown":
1526+
messages.append("Cannot test time continuity: Frequency not defined.")
1527+
return self.make_result(level, score, out_of, desc, messages)
13611528
if self.frequency not in deltdic.keys():
13621529
messages.append(f"Frequency '{self.frequency}' not supported.")
13631530
return self.make_result(level, score, out_of, desc, messages)
@@ -1429,7 +1596,9 @@ def check_time_bounds(self, ds):
14291596
messages.append(f"Frequency '{self.frequency}' not supported.")
14301597
return self.make_result(level, score, out_of, desc, messages)
14311598
if self.cell_methods == "unknown":
1432-
if len(self.varname) > 0:
1599+
if len(self.varname) > 0 and not self.options.get(
1600+
"time_checks_only", False
1601+
):
14331602
messages.append(
14341603
f"MIP table for '{self.varname[0]}' could not be identified"
14351604
" and thus no 'cell_methods' attribute could be read."
@@ -1570,7 +1739,19 @@ def check_time_range(self, ds):
15701739

15711740
# If time_range is not part of the file name structure, abort
15721741
if "time_range" not in self.drs_fn:
1573-
return self.make_result(level, out_of, out_of, desc, messages)
1742+
# Attempt to infer time range from filename if only timechecks are to be run:
1743+
if self.options.get("time_checks_only", False):
1744+
matches = list(
1745+
filter(
1746+
re.compile(r"^\d{1,}-?\d*$").match,
1747+
os.path.splitext(os.path.basename(self.filepath))[0].split("_"),
1748+
)
1749+
)
1750+
if len(matches) != 1:
1751+
return self.make_result(level, out_of, out_of, desc, messages)
1752+
self.drs_fn = {"time_range": matches[0]}
1753+
else:
1754+
return self.make_result(level, out_of, out_of, desc, messages)
15741755

15751756
# Check if frequency is identified and data is not time invariant
15761757
# (as defined in deltdic)

0 commit comments

Comments
 (0)