Skip to content

Commit 7390352

Browse files
authored
Merge pull request #34 from bcdev/forman-33-fix_and_enhance_time_coord_rule
Fix and enhance core `time-coordinate` rule
2 parents c247a7e + 13f7332 commit 7390352

File tree

4 files changed

+188
-48
lines changed

4 files changed

+188
-48
lines changed

CHANGES.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
## Version 0.4.0 (in development)
44

5+
- Fixed and enhanced core rule `time-coordinate`. `(#33)
56
- New xcube rule `no-chunked-coords`. (#29)
67
- New xcube multi-level dataset rules:
78
- `ml-dataset-meta`: verifies that a meta info file exists and is consistent;

docs/rule-ref.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ Contained in: `all`-:material-lightning-bolt: `recommended`-:material-alert:
5959

6060
### :material-bug: `time-coordinate`
6161

62-
Time coordinate (standard_name='time') should have unambiguous time units encoding.
62+
Time coordinates should have valid and unambiguous time units encoding.
6363
[:material-information-variant:](https://cfconventions.org/cf-conventions/cf-conventions.html#time-coordinate)
6464

6565
Contained in: `all`-:material-lightning-bolt: `recommended`-:material-lightning-bolt:

tests/plugins/core/rules/test_time_coordinate.py

Lines changed: 53 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@
1212
dims="time",
1313
attrs={
1414
"standard_name": "time",
15-
"long_name": "time",
1615
},
1716
),
1817
},
@@ -27,36 +26,70 @@
2726
valid_dataset_2 = valid_dataset_1.copy()
2827
del valid_dataset_2.time.encoding["units"]
2928
del valid_dataset_2.time.encoding["calendar"]
30-
valid_dataset_2.time.attrs["units"] = "seconds since 2000-01-01 UTC"
29+
valid_dataset_2.time.attrs["units"] = "seconds since 2000-1-1 +2:00"
3130
valid_dataset_2.time.attrs["calendar"] = "gregorian"
3231

3332
# OK, because not identified as time
3433
valid_dataset_3 = valid_dataset_1.copy()
34+
del valid_dataset_3.time.encoding["units"]
3535
del valid_dataset_3.time.attrs["standard_name"]
3636

37-
# OK, because we only look for standard_name
37+
# OK, because we only look for time units
3838
valid_dataset_4 = valid_dataset_1.rename_vars({"time": "tm"})
39+
del valid_dataset_4.tm.attrs["standard_name"]
3940

40-
# Invalid, because long_name is missing
41+
# OK, because not recognized as time coord
42+
valid_dataset_5 = valid_dataset_1.copy()
43+
valid_dataset_5.time.encoding["units"] = 1
44+
del valid_dataset_5.time.attrs["standard_name"]
45+
46+
# Invalid, because units is invalid but standard_name given
4147
invalid_dataset_0 = valid_dataset_1.copy()
42-
del invalid_dataset_0.time.attrs["long_name"]
48+
invalid_dataset_0.time.encoding["units"] = 1
4349

44-
# Invalid, because we require units
50+
# Invalid, because we require units but standard_name given
4551
invalid_dataset_1 = valid_dataset_1.copy(deep=True)
4652
del invalid_dataset_1.time.encoding["units"]
4753

48-
# Invalid, because we require calendar
54+
# Invalid, because we no time units although standard_name given
4955
invalid_dataset_2 = valid_dataset_1.copy(deep=True)
50-
del invalid_dataset_2.time.encoding["calendar"]
56+
invalid_dataset_2.time.encoding["units"] = "years from 2000-1-1 +0:0"
5157

52-
# Invalid, because we require TZ units part
58+
# Invalid, because we require calendar
5359
invalid_dataset_3 = valid_dataset_1.copy(deep=True)
54-
invalid_dataset_3.time.encoding["units"] = "seconds since 2000-01-01 00:00:00"
60+
del invalid_dataset_3.time.encoding["calendar"]
5561

56-
# Invalid, because we require units format wrong
62+
# Invalid, because we use invalid UOT
5763
invalid_dataset_4 = valid_dataset_1.copy(deep=True)
58-
invalid_dataset_4.time.encoding["units"] = "2000-01-01 00:00:00 UTC"
64+
invalid_dataset_4.time.encoding["units"] = "millis since 2000-1-1 +0:0"
65+
66+
# Invalid, because we use ambiguous UOT
67+
invalid_dataset_5 = valid_dataset_1.copy(deep=True)
68+
invalid_dataset_5.time.encoding["units"] = "years since 2000-1-1 +0:0"
69+
70+
# Invalid, because we require timezone
71+
invalid_dataset_6 = valid_dataset_1.copy(deep=True)
72+
invalid_dataset_6.time.encoding["units"] = "seconds since 2000-01-01 00:00:00"
73+
74+
# Invalid, because we require timezone
75+
invalid_dataset_7 = valid_dataset_1.copy(deep=True)
76+
invalid_dataset_7.time.encoding["units"] = "seconds since 2000-01-01"
77+
78+
# Invalid, because we have 6 units parts
79+
invalid_dataset_8 = valid_dataset_1.copy(deep=True)
80+
invalid_dataset_8.time.encoding["units"] = "days since 2000-01-01 12:00:00 +0:00 utc"
81+
82+
# Invalid, because we date part is invalid
83+
invalid_dataset_9 = valid_dataset_1.copy(deep=True)
84+
invalid_dataset_9.time.encoding["units"] = "days since 00-01-01 12:00:00 +0:00"
85+
86+
# Invalid, because we time part is invalid
87+
invalid_dataset_10 = valid_dataset_1.copy(deep=True)
88+
invalid_dataset_10.time.encoding["units"] = "days since 2000-01-01 12:00 +0:00"
5989

90+
# Invalid, because we tz part is invalid
91+
invalid_dataset_11 = valid_dataset_1.copy(deep=True)
92+
invalid_dataset_11.time.encoding["units"] = "days since 2000-01-01 12:00:00 utc"
6093

6194
TimeCoordinateTest = RuleTester.define_test(
6295
"time-coordinate",
@@ -67,12 +100,20 @@
67100
RuleTest(dataset=valid_dataset_2),
68101
RuleTest(dataset=valid_dataset_3),
69102
RuleTest(dataset=valid_dataset_4),
103+
RuleTest(dataset=valid_dataset_5),
70104
],
71105
invalid=[
72106
RuleTest(dataset=invalid_dataset_0),
73107
RuleTest(dataset=invalid_dataset_1),
74108
RuleTest(dataset=invalid_dataset_2),
75109
RuleTest(dataset=invalid_dataset_3),
76110
RuleTest(dataset=invalid_dataset_4),
111+
RuleTest(dataset=invalid_dataset_5),
112+
RuleTest(dataset=invalid_dataset_6),
113+
RuleTest(dataset=invalid_dataset_7),
114+
RuleTest(dataset=invalid_dataset_8),
115+
RuleTest(dataset=invalid_dataset_9),
116+
RuleTest(dataset=invalid_dataset_10),
117+
RuleTest(dataset=invalid_dataset_11),
77118
],
78119
)
Lines changed: 133 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,53 @@
1+
import re
2+
3+
14
from xrlint.node import DataArrayNode
25
from xrlint.plugins.core.plugin import plugin
36
from xrlint.rule import RuleContext, RuleOp
47

58

6-
_EXPECTED_UNITY_FORMAT = "<unit> since <date> <time> <timezone>"
9+
_EXAMPLE_UNIT_1 = "seconds since 2010-10-8 15:15:42.5 -6:00"
10+
_EXAMPLE_UNIT_2 = "days since 2000-01-01 +0:00"
11+
12+
_AMBIGUOUS_UNITS_OF_TIME = (
13+
"years",
14+
"year",
15+
"y",
16+
"months",
17+
"month",
18+
"m",
19+
)
20+
21+
_UNAMBIGUOUS_UNITS_OF_TIME = (
22+
"days",
23+
"day",
24+
"d",
25+
"hours",
26+
"hour",
27+
"hr",
28+
"h",
29+
"minutes",
30+
"minute",
31+
"min",
32+
"seconds",
33+
"second",
34+
"sec",
35+
"s",
36+
)
37+
38+
_ALL_UNITS_OF_TIME = (*_AMBIGUOUS_UNITS_OF_TIME, *_UNAMBIGUOUS_UNITS_OF_TIME)
39+
40+
_RE_DATE = re.compile(r"^\d{4}-\d{1,2}-\d{1,2}$")
41+
_RE_TIME = re.compile(r"^\d{1,2}:\d{1,2}:\d{1,2}(\.\d{1,6})?$")
42+
_RE_TZ = re.compile(r"^[+-]\d{1,2}:\d{1,2}$")
743

844

945
@plugin.define_rule(
1046
"time-coordinate",
1147
version="1.0.0",
1248
type="problem",
1349
description=(
14-
"Time coordinate (standard_name='time') should have"
15-
" unambiguous time units encoding."
50+
"Time coordinates should have valid and unambiguous time units encoding."
1651
),
1752
docs_url=(
1853
"https://cfconventions.org/cf-conventions/cf-conventions.html#time-coordinate"
@@ -24,50 +59,113 @@ def data_array(self, ctx: RuleContext, node: DataArrayNode):
2459
attrs = array.attrs
2560
encoding = array.encoding
2661

27-
if node.name not in ctx.dataset.coords or attrs.get("standard_name") != "time":
62+
units: str | None = encoding.get("units", attrs.get("units"))
63+
if units is None:
64+
if _is_time_by_name(attrs):
65+
ctx.report("Missing 'units' attribute for time coordinate.")
66+
# No more to check w.o. time units
67+
return
68+
elif not isinstance(units, str):
69+
if _is_time_by_name(attrs):
70+
ctx.report(
71+
f"Invalid 'units' attribute for time coordinate,"
72+
f" expected type str, got {type(units).__name__}."
73+
)
74+
# No more to check w.o. time units
2875
return
2976

30-
if attrs.get("long_name") != "time":
31-
ctx.report("Attribute 'long_name' should be 'time'.")
77+
units_ok = True
78+
units_parts = units.split(" ")
79+
num_unit_parts = len(units_parts)
80+
is_time_by_units = num_unit_parts >= 3 and units_parts[1] == "since"
81+
if not is_time_by_units:
82+
if not _is_time_by_name(attrs):
83+
# Not a time coordinate
84+
return
85+
units_ok = False
86+
else:
87+
# We have time units
3288

33-
use_units_format_msg = (
34-
f"Specify 'units' attribute using format {_EXPECTED_UNITY_FORMAT!r}."
35-
)
89+
if not encoding.get("calendar", attrs.get("calendar")):
90+
ctx.report(
91+
"Attribute/encoding 'calendar' should be specified.",
92+
)
3693

37-
calendar: str | None = encoding.get("calendar", attrs.get("calendar"))
38-
units: str | None = encoding.get("units", attrs.get("units"))
39-
if not units or not calendar:
40-
if not calendar:
94+
uot_part = units_parts[0]
95+
date_part = units_parts[2]
96+
time_part = None
97+
tz_part = None
98+
99+
if uot_part not in _ALL_UNITS_OF_TIME:
41100
ctx.report(
42-
"Attribute 'calendar' should be specified.",
101+
f"Unrecognized units of measure for time"
102+
f" in 'units' attribute: {units!r}.",
103+
suggestions=[
104+
_units_format_suggestion(),
105+
_units_of_time_suggestion(),
106+
],
43107
)
44-
if not units:
108+
elif uot_part in _AMBIGUOUS_UNITS_OF_TIME:
45109
ctx.report(
46-
"Attribute 'units' should be specified.",
47-
suggestions=[use_units_format_msg],
110+
f"Ambiguous units of measure for time in"
111+
f" 'units' attribute: {units!r}.",
112+
suggestions=[
113+
_units_format_suggestion(),
114+
_units_of_time_suggestion(),
115+
],
48116
)
49-
# next checks concern units only
50-
return
51117

52-
units_parts = units.split(" ")
53-
# note, may use regex here
54-
if len(units_parts) >= 4 and units_parts[1] == "since":
55-
# format seems ok, check timezone part
56-
last_part = units_parts[-1]
57-
has_tz = last_part.lower() == "utc" or last_part[0] in ("+", "-")
58-
if not has_tz:
118+
if num_unit_parts == 3:
119+
pass
120+
elif num_unit_parts == 4:
121+
time_or_tz_part = units_parts[3]
122+
if _RE_TIME.match(time_or_tz_part):
123+
time_part = time_or_tz_part
124+
else:
125+
tz_part = time_or_tz_part
126+
elif num_unit_parts == 5:
127+
time_part = units_parts[3]
128+
tz_part = units_parts[4]
129+
else:
130+
time_part = units_parts[-2]
131+
tz_part = units_parts[-1]
132+
units_ok = False
133+
134+
if units_ok and not _RE_DATE.match(date_part):
135+
units_ok = False
136+
if units_ok and time_part and not _RE_TIME.match(time_part):
137+
units_ok = False
138+
if units_ok and tz_part and not _RE_TZ.match(tz_part):
139+
units_ok = False
140+
141+
if not tz_part:
59142
ctx.report(
60-
f"Missing timezone in 'units' attribute: {units!r}.",
143+
f"Missing timezone '+H:MM' or '-H:MM' in 'units' attribute: {units!r}.",
61144
suggestions=[
62-
use_units_format_msg,
145+
_units_format_suggestion(),
63146
f"Append timezone specification, e.g., use"
64-
f" {' '.join(units_parts[:-1] + ['utc'])!r}.",
147+
f" {' '.join(units_parts[:-1] + ['+0:00'])!r}.",
65148
],
66149
)
67-
# units ok
68-
return
69150

70-
ctx.report(
71-
f"Invalid 'units' attribute: {units!r}.",
72-
suggestions=[use_units_format_msg],
73-
)
151+
if not units_ok:
152+
ctx.report(
153+
f"Invalid 'units' attribute: {units!r}.",
154+
suggestions=[_units_format_suggestion()],
155+
)
156+
157+
158+
def _units_format_suggestion():
159+
use_units_format_msg = (
160+
f"Specify 'units' attribute using the UDUNITS format,"
161+
f" e.g., {_EXAMPLE_UNIT_1!r} or {_EXAMPLE_UNIT_2!r}."
162+
)
163+
return use_units_format_msg
164+
165+
166+
def _units_of_time_suggestion():
167+
return f"Use one of {', '.join(map(repr, _UNAMBIGUOUS_UNITS_OF_TIME))}"
168+
169+
170+
def _is_time_by_name(attrs):
171+
return attrs.get("standard_name") == "time" or attrs.get("axis") == "T"

0 commit comments

Comments
 (0)