1+ import re
2+
3+
14from xrlint .node import DataArrayNode
25from xrlint .plugins .core .plugin import plugin
36from xrlint .rule import RuleContext , RuleOp
47
58
6- _EXPECTED_UNITY_FORMAT = "<unit> since <date> <time> <timezone>"
9+ _EXAMPLE_UNIT_1 = "seconds since 2010-10-8 15:15:42.5 -6:00"
10+ _EXAMPLE_UNIT_2 = "days since 2000-01-01 +0:00"
11+
12+ _AMBIGUOUS_UNITS_OF_TIME = (
13+ "years" ,
14+ "year" ,
15+ "y" ,
16+ "months" ,
17+ "month" ,
18+ "m" ,
19+ )
20+
21+ _UNAMBIGUOUS_UNITS_OF_TIME = (
22+ "days" ,
23+ "day" ,
24+ "d" ,
25+ "hours" ,
26+ "hour" ,
27+ "hr" ,
28+ "h" ,
29+ "minutes" ,
30+ "minute" ,
31+ "min" ,
32+ "seconds" ,
33+ "second" ,
34+ "sec" ,
35+ "s" ,
36+ )
37+
38+ _ALL_UNITS_OF_TIME = (* _AMBIGUOUS_UNITS_OF_TIME , * _UNAMBIGUOUS_UNITS_OF_TIME )
39+
40+ _RE_DATE = re .compile (r"^\d{4}-\d{1,2}-\d{1,2}$" )
41+ _RE_TIME = re .compile (r"^\d{1,2}:\d{1,2}:\d{1,2}(\.\d{1,6})?$" )
42+ _RE_TZ = re .compile (r"^[+-]\d{1,2}:\d{1,2}$" )
743
844
945@plugin .define_rule (
1046 "time-coordinate" ,
1147 version = "1.0.0" ,
1248 type = "problem" ,
1349 description = (
14- "Time coordinate (standard_name='time') should have"
15- " unambiguous time units encoding."
50+ "Time coordinates should have valid and unambiguous time units encoding."
1651 ),
1752 docs_url = (
1853 "https://cfconventions.org/cf-conventions/cf-conventions.html#time-coordinate"
@@ -24,50 +59,113 @@ def data_array(self, ctx: RuleContext, node: DataArrayNode):
2459 attrs = array .attrs
2560 encoding = array .encoding
2661
27- if node .name not in ctx .dataset .coords or attrs .get ("standard_name" ) != "time" :
62+ units : str | None = encoding .get ("units" , attrs .get ("units" ))
63+ if units is None :
64+ if _is_time_by_name (attrs ):
65+ ctx .report ("Missing 'units' attribute for time coordinate." )
66+ # No more to check w.o. time units
67+ return
68+ elif not isinstance (units , str ):
69+ if _is_time_by_name (attrs ):
70+ ctx .report (
71+ f"Invalid 'units' attribute for time coordinate,"
72+ f" expected type str, got { type (units ).__name__ } ."
73+ )
74+ # No more to check w.o. time units
2875 return
2976
30- if attrs .get ("long_name" ) != "time" :
31- ctx .report ("Attribute 'long_name' should be 'time'." )
77+ units_ok = True
78+ units_parts = units .split (" " )
79+ num_unit_parts = len (units_parts )
80+ is_time_by_units = num_unit_parts >= 3 and units_parts [1 ] == "since"
81+ if not is_time_by_units :
82+ if not _is_time_by_name (attrs ):
83+ # Not a time coordinate
84+ return
85+ units_ok = False
86+ else :
87+ # We have time units
3288
33- use_units_format_msg = (
34- f"Specify 'units' attribute using format { _EXPECTED_UNITY_FORMAT !r} ."
35- )
89+ if not encoding .get ("calendar" , attrs .get ("calendar" )):
90+ ctx .report (
91+ "Attribute/encoding 'calendar' should be specified." ,
92+ )
3693
37- calendar : str | None = encoding .get ("calendar" , attrs .get ("calendar" ))
38- units : str | None = encoding .get ("units" , attrs .get ("units" ))
39- if not units or not calendar :
40- if not calendar :
94+ uot_part = units_parts [0 ]
95+ date_part = units_parts [2 ]
96+ time_part = None
97+ tz_part = None
98+
99+ if uot_part not in _ALL_UNITS_OF_TIME :
41100 ctx .report (
42- "Attribute 'calendar' should be specified." ,
101+ f"Unrecognized units of measure for time"
102+ f" in 'units' attribute: { units !r} ." ,
103+ suggestions = [
104+ _units_format_suggestion (),
105+ _units_of_time_suggestion (),
106+ ],
43107 )
44- if not units :
108+ elif uot_part in _AMBIGUOUS_UNITS_OF_TIME :
45109 ctx .report (
46- "Attribute 'units' should be specified." ,
47- suggestions = [use_units_format_msg ],
110+ f"Ambiguous units of measure for time in"
111+ f" 'units' attribute: { units !r} ." ,
112+ suggestions = [
113+ _units_format_suggestion (),
114+ _units_of_time_suggestion (),
115+ ],
48116 )
49- # next checks concern units only
50- return
51117
52- units_parts = units .split (" " )
53- # note, may use regex here
54- if len (units_parts ) >= 4 and units_parts [1 ] == "since" :
55- # format seems ok, check timezone part
56- last_part = units_parts [- 1 ]
57- has_tz = last_part .lower () == "utc" or last_part [0 ] in ("+" , "-" )
58- if not has_tz :
118+ if num_unit_parts == 3 :
119+ pass
120+ elif num_unit_parts == 4 :
121+ time_or_tz_part = units_parts [3 ]
122+ if _RE_TIME .match (time_or_tz_part ):
123+ time_part = time_or_tz_part
124+ else :
125+ tz_part = time_or_tz_part
126+ elif num_unit_parts == 5 :
127+ time_part = units_parts [3 ]
128+ tz_part = units_parts [4 ]
129+ else :
130+ time_part = units_parts [- 2 ]
131+ tz_part = units_parts [- 1 ]
132+ units_ok = False
133+
134+ if units_ok and not _RE_DATE .match (date_part ):
135+ units_ok = False
136+ if units_ok and time_part and not _RE_TIME .match (time_part ):
137+ units_ok = False
138+ if units_ok and tz_part and not _RE_TZ .match (tz_part ):
139+ units_ok = False
140+
141+ if not tz_part :
59142 ctx .report (
60- f"Missing timezone in 'units' attribute: { units !r} ." ,
143+ f"Missing timezone '+H:MM' or '-H:MM' in 'units' attribute: { units !r} ." ,
61144 suggestions = [
62- use_units_format_msg ,
145+ _units_format_suggestion () ,
63146 f"Append timezone specification, e.g., use"
64- f" { ' ' .join (units_parts [:- 1 ] + ['utc ' ])!r} ." ,
147+ f" { ' ' .join (units_parts [:- 1 ] + ['+0:00 ' ])!r} ." ,
65148 ],
66149 )
67- # units ok
68- return
69150
70- ctx .report (
71- f"Invalid 'units' attribute: { units !r} ." ,
72- suggestions = [use_units_format_msg ],
73- )
151+ if not units_ok :
152+ ctx .report (
153+ f"Invalid 'units' attribute: { units !r} ." ,
154+ suggestions = [_units_format_suggestion ()],
155+ )
156+
157+
158+ def _units_format_suggestion ():
159+ use_units_format_msg = (
160+ f"Specify 'units' attribute using the UDUNITS format,"
161+ f" e.g., { _EXAMPLE_UNIT_1 !r} or { _EXAMPLE_UNIT_2 !r} ."
162+ )
163+ return use_units_format_msg
164+
165+
166+ def _units_of_time_suggestion ():
167+ return f"Use one of { ', ' .join (map (repr , _UNAMBIGUOUS_UNITS_OF_TIME ))} "
168+
169+
170+ def _is_time_by_name (attrs ):
171+ return attrs .get ("standard_name" ) == "time" or attrs .get ("axis" ) == "T"
0 commit comments