@@ -35,6 +35,16 @@ def printtimedelta(d):
3535 return f"{ d } seconds"
3636
3737
38+ def flatten (lst ):
39+ result = []
40+ for item in lst :
41+ if isinstance (item , list ):
42+ result .extend (flatten (item ))
43+ else :
44+ result .append (item )
45+ return result
46+
47+
3848class MIPCVCheckBase (BaseCheck ):
3949 register_checker = False
4050 _cc_spec = "mip"
@@ -78,6 +88,91 @@ def setup(self, dataset):
7888 self ._initialize_coords_info ()
7989 if self .consistency_output :
8090 self ._write_consistency_output ()
91+ # if only the time checks should be run (so no verification against CV / MIP tables)
92+ elif self .options .get ("time_checks_only" , False ):
93+ self .varname = [
94+ var
95+ for var in flatten (list (self .xrds .cf .standard_names .values ()))
96+ if var
97+ not in flatten (
98+ list (self .xrds .cf .coordinates .values ())
99+ + list (self .xrds .cf .axes .values ())
100+ + list (self .xrds .cf .bounds .values ())
101+ + list (self .xrds .cf .formula_terms .values ())
102+ )
103+ ]
104+ self ._initialize_time_info ()
105+ self ._initialize_coords_info ()
106+ self .frequency = self ._get_attr ("frequency" )
107+ if self .varname != []:
108+ self .cell_methods = self .xrds [self .varname [0 ]].attrs .get (
109+ "cell_methods" , "unknown"
110+ )
111+ else :
112+ self .cell_methods = "unknown"
113+ self .drs_fn = {}
114+ if self .frequency == "unknown" and self .time is not None :
115+ if self .time .sizes [self .time .dims [0 ]] > 1 and 1 == 2 :
116+ for ifreq in [
117+ fkey
118+ for fkey in deltdic .keys ()
119+ if "max" not in fkey and "min" not in fkey
120+ ]:
121+ try :
122+ intv = abs (
123+ get_tseconds (
124+ cftime .num2date (
125+ self .time .values [1 ],
126+ units = self .timeunits ,
127+ calendar = self .calendar ,
128+ )
129+ - cftime .num2date (
130+ self .time .values [0 ],
131+ units = self .timeunits ,
132+ calendar = self .calendar ,
133+ )
134+ )
135+ )
136+ if (
137+ intv <= deltdic [ifreq + "max" ]
138+ and intv >= deltdic [ifreq + "min" ]
139+ ):
140+ self .frequency = ifreq
141+ break
142+ except (AttributeError , ValueError ):
143+ continue
144+ elif self .timebnds and len (self .xrds [self .timebnds ].dims ) == 2 :
145+ for ifreq in [
146+ fkey
147+ for fkey in deltdic .keys ()
148+ if "max" not in fkey and "min" not in fkey
149+ ]:
150+ try :
151+ intv = abs (
152+ get_tseconds (
153+ cftime .num2date (
154+ self .xrds [self .timebnds ].values [0 , 1 ],
155+ units = self .timeunits ,
156+ calendar = self .calendar ,
157+ )
158+ - cftime .num2date (
159+ self .xrds [self .timebnds ].values [0 , 0 ],
160+ units = self .timeunits ,
161+ calendar = self .calendar ,
162+ )
163+ )
164+ )
165+ if (
166+ intv <= deltdic [ifreq + "max" ]
167+ and intv >= deltdic [ifreq + "min" ]
168+ ):
169+ self .frequency = ifreq
170+ break
171+ except (AttributeError , ValueError ):
172+ continue
173+ if self .consistency_output :
174+ self ._write_consistency_output ()
175+ # in case of general "mip" checks, the path to the CMOR tables need to be specified
81176 elif self ._cc_spec == "mip" :
82177 raise Exception (
83178 "ERROR: No 'tables' option specified. Cannot initialize CV and MIP tables."
@@ -146,7 +241,6 @@ def _initialize_CV_info(self, tables_path):
146241 )
147242 for key in ["table_id" ]:
148243 if key not in self .CT [table ]["Header" ]:
149- print (table , key )
150244 raise KeyError (
151245 f"CMOR table '{ table } ' misses the key '{ key } ' in the header information."
152246 )
@@ -221,9 +315,6 @@ def _initialize_time_info(self):
221315 # The entire checker crashes in case of invalid time units
222316 # todo: catch a possible exception in base._initialize_time_info
223317 # and report the problem in any check method
224- self .timedec = xr .decode_cf (
225- self .xrds .copy (deep = True ), decode_times = True , use_cftime = True
226- ).cf ["time" ]
227318 self .time_invariant_vars = [
228319 var
229320 for var in list (self .xrds .data_vars .keys ())
@@ -234,7 +325,6 @@ def _initialize_time_info(self):
234325 self .calendar = None
235326 self .timeunits = None
236327 self .timebnds = None
237- self .timedec = None
238328 self .time_invariant_vars = [
239329 var
240330 for var in list (self .xrds .data_vars .keys ())
@@ -304,13 +394,6 @@ def _get_var_attr(self, attr, default="unknown"):
304394 return default
305395 return default
306396
307- def _infer_frequency (self ):
308- """Infer frequency from given time dimension"""
309- try :
310- return xr .infer_freq (self .timedec )
311- except ValueError :
312- return "unknown"
313-
314397 def _read_CV (self , path , table_prefix , table_name ):
315398 """Reads the specified CV table."""
316399 table_path = Path (path , f"{ table_prefix } _{ table_name } .json" )
@@ -325,7 +408,10 @@ def _read_CV(self, path, table_prefix, table_name):
325408 def _write_consistency_output (self ):
326409 """Write output for consistency checks across files."""
327410 # Dictionaries of global attributes and their data types
328- required_attributes = self .CV .get ("required_global_attributes" , {})
411+ if self .options .get ("time_checks_only" , False ):
412+ required_attributes = {}
413+ else :
414+ required_attributes = self .CV .get ("required_global_attributes" , {})
329415 file_attrs_req = {
330416 k : str (v ) for k , v in self .xrds .attrs .items () if k in required_attributes
331417 }
@@ -834,10 +920,13 @@ def check_grid_definition(self, ds):
834920 if len (self .varname ) == 0 :
835921 return self .make_result (level , out_of , out_of , desc , messages )
836922
837- dimsCT = self ._get_var_attr ("dimensions" , [])
838923 # Check only the first latitude and longitude found
924+ dimsCT = self ._get_var_attr ("dimensions" , [])
839925 if "latitude" or "longitude" in dimsCT :
840- if "latitude" in self .xrds .cf .standard_names :
926+ if (
927+ "latitude" in self .xrds .cf .standard_names
928+ and self .xrds [self .xrds .cf .standard_names ["latitude" ][0 ]].ndim > 1
929+ ):
841930 lat = self .xrds .cf .standard_names ["latitude" ][0 ]
842931 if lat != self .CTgrids ["variable_entry" ]["latitude" ]["out_name" ]:
843932 messages .append (
@@ -864,7 +953,10 @@ def check_grid_definition(self, ds):
864953 attrs = ["type" ],
865954 )
866955 )
867- if "longitude" in self .xrds .cf .standard_names :
956+ if (
957+ "longitude" in self .xrds .cf .standard_names
958+ and self .xrds [self .xrds .cf .standard_names ["longitude" ][0 ]].ndim > 1
959+ ):
868960 lon = self .xrds .cf .standard_names ["longitude" ][0 ]
869961 if lon != self .CTgrids ["variable_entry" ]["longitude" ]["out_name" ]:
870962 messages .append (
@@ -935,6 +1027,65 @@ def check_grid_definition(self, ds):
9351027
9361028 return self .make_result (level , out_of , score , desc , messages )
9371029
1030+ def _resolve_generic_level (self , dimCT , var , messages ):
1031+ """
1032+ Attempt to resolve a generic level like 'alevel' to a valid axis_entry.
1033+ """
1034+ candidates = [
1035+ key
1036+ for key , entry in self .CTcoords ["axis_entry" ].items ()
1037+ if entry .get ("generic_level_name" ) == dimCT
1038+ ]
1039+
1040+ if not candidates :
1041+ messages .append (
1042+ f"The required dimension / coordinate '{ dimCT } ' of variable '{ var } ' is not defined explicitly and no generic level match (e.g., 'generic_level_name': '{ dimCT } ') could be found in the CMOR table."
1043+ )
1044+ return {}
1045+
1046+ # Get candidates with same standard_name as data set variables to get possible matches
1047+ pmatches = list ()
1048+ for c in candidates :
1049+ if (
1050+ self .CTcoords ["axis_entry" ][c ].get ("standard_name" )
1051+ in self .xrds .cf .standard_names
1052+ ):
1053+ pmatches .append (c )
1054+
1055+ if not pmatches :
1056+ messages .append (
1057+ f"The required dimension / coordinate '{ dimCT } ' of variable '{ var } ' is not defined explicitly. No generic level matches ({ ', ' .join (candidates )} ) could be identified in the input file via standard_name."
1058+ )
1059+ return {}
1060+ elif len (pmatches ) > 1 :
1061+ # Try to select further by long_name and formula:
1062+ plfmatches = list ()
1063+ for pmatch in pmatches :
1064+ if self .CTcoords ["axis_entry" ][pmatch ].get ("long_name" ) == self .xrds [
1065+ self .xrds .cf .standard_names [
1066+ self .CTcoords ["axis_entry" ][pmatch ].get ("standard_name" )
1067+ ][0 ]
1068+ ].attrs .get ("long_name" ) and self .CTcoords ["axis_entry" ][pmatch ].get (
1069+ "formula"
1070+ ) == self .xrds [
1071+ self .xrds .cf .standard_names [
1072+ self .CTcoords ["axis_entry" ][pmatch ].get ("standard_name" )
1073+ ][0 ]
1074+ ].attrs .get (
1075+ "formula"
1076+ ):
1077+ plfmatches .append (pmatch )
1078+ if len (plfmatches ) != 1 :
1079+ messages .append (
1080+ f"The required dimension / coordinate '{ dimCT } ' of variable '{ var } ' is not defined explicitly. Multiple generic level matches "
1081+ f"({ ', ' .join (pmatches )} ) can be identified due to insufficient and incompliant metadata specification."
1082+ )
1083+ return {}
1084+ else :
1085+ return self .CTcoords ["axis_entry" ][plfmatches [0 ]]
1086+
1087+ return self .CTcoords ["axis_entry" ][pmatches [0 ]]
1088+
9381089 def check_variable_definition (self , ds ):
9391090 """Checks mandatory variable attributes of the main variable and associated coordinates."""
9401091 desc = "Variable and coordinate definition (CV)"
@@ -969,14 +1120,26 @@ def check_variable_definition(self, ds):
9691120 # todo: check max min range for var / coord
9701121 #
9711122 dimsCT = self ._get_var_attr ("dimensions" , [])
972- if dimsCT :
973- if isinstance (dimsCT , str ):
974- dimsCT = dimsCT .split ()
1123+ dimsCT_is_valid = True
1124+ if isinstance (dimsCT , str ):
1125+ dimsCT = dimsCT .split ()
1126+ elif not isinstance (dimsCT , list ):
1127+ messages .append (
1128+ f"Invalid 'dimensions' format for variable '{ var } '. This is an issue in the CMOR tables definition and not necessarily in the data file."
1129+ )
1130+ dimsCT_is_valid = False
1131+ if dimsCT and dimsCT_is_valid :
9751132 for dimCT in dimsCT :
9761133 # The coordinate out_name must be in one of the following
9771134 # - in the variable dimensions
9781135 # - in the variable attribute "coordinates"
9791136 diminfo = self .CTcoords ["axis_entry" ].get (dimCT , {})
1137+ if not diminfo :
1138+ diminfo = self ._resolve_generic_level (dimCT , var , messages )
1139+ # todo: checks below need to be updated to support generic levels
1140+ continue
1141+ # if not diminfo: # if checks below support generic levels, this can be uncommented
1142+ # continue
9801143 dim_on = diminfo .get ("out_name" , "" )
9811144 dim_val_raw = diminfo .get ("value" , "" )
9821145 dim_bnds_raw = diminfo .get ("bounds_values" , "" )
@@ -987,7 +1150,8 @@ def check_variable_definition(self, ds):
9871150 cbnds = self .xrds [dim_on ].attrs .get ("bounds" , None )
9881151 if dim_mhbnds not in ["yes" , "no" ]:
9891152 messages .append (
990- f"The 'must_have_bounds' attribute of dimension / coordinate '{ dimCT } ' of the variable '{ var } ' has to be set to 'yes' or 'no'. This is an issue in the CMOR tables definition and not necessarily in the data file."
1153+ f"The 'must_have_bounds' attribute of dimension / coordinate '{ dimCT } ' of the variable '{ var } ' has to be set to 'yes' or 'no'. "
1154+ "This is an issue in the CMOR tables definition and not necessarily in the data file."
9911155 )
9921156 continue
9931157 if not dim_on :
@@ -1108,7 +1272,7 @@ def check_variable_definition(self, ds):
11081272 )
11091273 elif (
11101274 self .xrds [cbnds ].ndim != 1
1111- or self .xrds .dims [self .xrds [cbnds ].dim [0 ]] != 2
1275+ or self .xrds .sizes [self .xrds [cbnds ].dim [0 ]] != 2
11121276 ):
11131277 messages .append (
11141278 f"The bounds variable '{ cbnds } ' needs to be one-dimensional and have exactly two values."
@@ -1356,8 +1520,11 @@ def check_time_continuity(self, ds):
13561520
13571521 # Check if frequency is known and supported
13581522 # (as defined in deltdic)
1359- if self .frequency in [ "unknown" , " fx"] :
1523+ if self .frequency == " fx" :
13601524 return self .make_result (level , out_of , out_of , desc , messages )
1525+ elif self .frequency == "unknown" :
1526+ messages .append ("Cannot test time continuity: Frequency not defined." )
1527+ return self .make_result (level , score , out_of , desc , messages )
13611528 if self .frequency not in deltdic .keys ():
13621529 messages .append (f"Frequency '{ self .frequency } ' not supported." )
13631530 return self .make_result (level , score , out_of , desc , messages )
@@ -1429,7 +1596,9 @@ def check_time_bounds(self, ds):
14291596 messages .append (f"Frequency '{ self .frequency } ' not supported." )
14301597 return self .make_result (level , score , out_of , desc , messages )
14311598 if self .cell_methods == "unknown" :
1432- if len (self .varname ) > 0 :
1599+ if len (self .varname ) > 0 and not self .options .get (
1600+ "time_checks_only" , False
1601+ ):
14331602 messages .append (
14341603 f"MIP table for '{ self .varname [0 ]} ' could not be identified"
14351604 " and thus no 'cell_methods' attribute could be read."
@@ -1570,7 +1739,19 @@ def check_time_range(self, ds):
15701739
15711740 # If time_range is not part of the file name structure, abort
15721741 if "time_range" not in self .drs_fn :
1573- return self .make_result (level , out_of , out_of , desc , messages )
1742+ # Attempt to infer time range from filename if only timechecks are to be run:
1743+ if self .options .get ("time_checks_only" , False ):
1744+ matches = list (
1745+ filter (
1746+ re .compile (r"^\d{1,}-?\d*$" ).match ,
1747+ os .path .splitext (os .path .basename (self .filepath ))[0 ].split ("_" ),
1748+ )
1749+ )
1750+ if len (matches ) != 1 :
1751+ return self .make_result (level , out_of , out_of , desc , messages )
1752+ self .drs_fn = {"time_range" : matches [0 ]}
1753+ else :
1754+ return self .make_result (level , out_of , out_of , desc , messages )
15741755
15751756 # Check if frequency is identified and data is not time invariant
15761757 # (as defined in deltdic)
0 commit comments