Skip to content

Commit 3a500e1

Browse files
authored
Updates to file chunking checks and consistency output (#45)
- Updated consistency output - Added dimensions - Added variable attributes data types - Added global attributes data types - Added non-required global attributes - Updated file chunking check - Suppressing error for last year of a simulation if it is the officially expected end of the experiment
1 parent 9b9e5ab commit 3a500e1

File tree

2 files changed

+61
-8
lines changed

2 files changed

+61
-8
lines changed

cc_plugin_cc6/base.py

Lines changed: 38 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -319,22 +319,39 @@ def _read_CV(self, path, table_prefix, table_name):
319319

320320
def _write_consistency_output(self):
321321
"""Write output for consistency checks across files."""
322-
# Dictionary of global attributes
322+
# Dictionaries of global attributes and their data types
323323
required_attributes = self.CV.get("required_global_attributes", {})
324-
file_attrs = {
324+
file_attrs_req = {
325325
k: str(v) for k, v in self.xrds.attrs.items() if k in required_attributes
326326
}
327+
file_attrs_nreq = {
328+
k: str(v)
329+
for k, v in self.xrds.attrs.items()
330+
if k not in required_attributes
331+
if k not in ["history"]
332+
}
333+
file_attrs_dtypes = {
334+
k: type(v).__qualname__ for k, v in self.xrds.attrs.items()
335+
}
327336
for k in required_attributes:
328-
if k not in file_attrs:
329-
file_attrs[k] = "unset"
330-
# Dictionary of variable attributes
337+
if k not in file_attrs_req:
338+
file_attrs_req[k] = "unset"
339+
if k not in file_attrs_dtypes:
340+
file_attrs_dtypes[k] = "unset"
341+
# Dictionaries of variable attributes and their data types
331342
var_attrs = {}
343+
var_attrs_dtypes = {}
332344
for var in list(self.xrds.data_vars.keys()) + list(self.xrds.coords.keys()):
333345
var_attrs[var] = {
334346
key: str(value)
335347
for key, value in self.xrds[var].attrs.items()
336348
if key not in ["history"]
337349
}
350+
var_attrs_dtypes[var] = {
351+
key: type(value).__qualname__
352+
for key, value in self.xrds[var].attrs.items()
353+
if key not in ["history"]
354+
}
338355
# Dictionary of time information
339356
time_info = {}
340357
if self.time is not None:
@@ -363,12 +380,27 @@ def _write_consistency_output(self):
363380
coord_checksums[coord_var] = md5(
364381
str(self.xrds[coord_var].values.tobytes()).encode("utf-8")
365382
).hexdigest()
383+
# Dictionary of dimension sizes
384+
dims = dict(self.xrds.sizes)
385+
# Do not compare time dimension size, only name
386+
if self.time is not None:
387+
dimt = self.time.dims[0]
388+
dims[dimt] = "n"
389+
# Dictionary of variable data types
390+
var_dtypes = {}
391+
for var in list(self.xrds.data_vars.keys()) + list(self.xrds.coords.keys()):
392+
var_dtypes[var] = str(self.xrds[var].dtype)
366393
# Write combined dictionary
367394
with open(self.consistency_output, "w") as f:
368395
json.dump(
369396
{
370-
"global_attributes": file_attrs,
397+
"global_attributes": file_attrs_req,
398+
"global_attributes_non_required": file_attrs_nreq,
399+
"global_attributes_dtypes": file_attrs_dtypes,
371400
"variable_attributes": var_attrs,
401+
"variable_attributes_dtypes": var_attrs_dtypes,
402+
"variable_dtypes": var_dtypes,
403+
"dimensions": dims,
372404
"coordinates": coord_checksums,
373405
"time_info": time_info,
374406
},

cc_plugin_cc6/cc6.py

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -216,6 +216,19 @@ def check_time_chunking(self, ds):
216216
messages.append(f"Frequency '{self.frequency}' not supported.")
217217
return self.make_result(level, score, out_of, desc, messages)
218218

219+
# Expected last simulation year
220+
# -> suppress error for last year of a simulation
221+
# if it is the "official" last year
222+
expected_last_sim_year = {
223+
"evaluation": 2024,
224+
"historical": 2014,
225+
"ssp119": 2100,
226+
"ssp126": 2100,
227+
"ssp245": 2100,
228+
"ssp370": 2100,
229+
"ssp585": 2100,
230+
}
231+
219232
# Get the time dimension, calendar and units
220233
if self.time is None:
221234
messages.append("Coordinate variable 'time' not found in file.")
@@ -255,7 +268,6 @@ def check_time_chunking(self, ds):
255268
year, 1, 1, 0, 0, 0, calendar=self.calendar
256269
)
257270
else:
258-
259271
year = first_time.year + 1
260272
while str(year)[-1] != "1":
261273
year += 1
@@ -298,6 +310,15 @@ def check_time_chunking(self, ds):
298310
else:
299311
messages.append(f"Cannot interpret cell_methods '{self.cell_methods}'.")
300312

313+
# Consider experiment end
314+
exp_last_year = expected_last_sim_year.get(
315+
self._get_attr("driving_experiment_id", None), None
316+
)
317+
if exp_last_year:
318+
expected_end_date_exp = expected_end_date.replace(year=exp_last_year)
319+
else:
320+
expected_end_date_exp = expected_end_date
321+
301322
if len(messages) == 0:
302323
errmsg = (
303324
f"{'Apart from the first and last files of a timeseries ' if nyears>1 else ''}'{nyears}' "
@@ -311,7 +332,7 @@ def check_time_chunking(self, ds):
311332
+ errmsg
312333
)
313334
# Check if the last time is equal to the expected end date
314-
if last_time != expected_end_date:
335+
if last_time != expected_end_date and last_time != expected_end_date_exp:
315336
messages.append(
316337
f"The last timestep differs from expectation ('{expected_end_date}'): '{last_time}'. "
317338
+ errmsg

0 commit comments

Comments
 (0)