Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,13 @@

## Version 0.3.0 (in development)

- Added cure rule "flags"
- Added more rules
- core rule "flags"
- core rule "time-coordinate" (#15)
- xcube rule "time-naming" (#15)

- Fixed problem where referring to values in modules via
the form `"<module>:<attr>"` raised. #21
the form `"<module>:<attr>"` raised. (#21)

- Introduced factory method `new_plugin` which simplifies
creating plugin objects.
Expand Down
14 changes: 14 additions & 0 deletions docs/rule-ref.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,13 @@ Every dataset element should have metadata that describes it.

Contained in: `all`-:material-lightning-bolt: `recommended`-:material-alert:

### :material-bug: `time-coordinate`

Time coordinate (standard_name='time') should have unambiguous time units encoding.
[More information.](https://cfconventions.org/cf-conventions/cf-conventions.html#time-coordinate)

Contained in: `all`-:material-lightning-bolt: `recommended`-:material-lightning-bolt:

### :material-lightbulb: `var-units-attr`

Every variable should have a valid 'units' attribute.
Expand Down Expand Up @@ -93,3 +100,10 @@ A single grid mapping shall be used for all spatial data variables of a datacube

Contained in: `all`-:material-lightning-bolt: `recommended`-:material-lightning-bolt:

### :material-bug: `time-naming`

Time coordinate and dimension should be called 'time'.
[More information.](https://xcube.readthedocs.io/en/latest/cubespec.html#temporal-reference)

Contained in: `all`-:material-lightning-bolt: `recommended`-:material-lightning-bolt:

10 changes: 9 additions & 1 deletion notebooks/mkdataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,14 @@ def make_dataset() -> xr.Dataset:
np.linspace(-90, 90, ny), dims="y", attrs={"units": "degrees"}
),
"time": xr.DataArray(
[2010 + y for y in range(nt)], dims="time", attrs={"units": "years"}
[365 * i for i in range(nt)],
dims="time",
attrs={
"standard_name": "time",
"long_name": "time",
"units": "days since 2020-01-01 utc",
"calendar": "gregorian",
},
),
"spatial_ref": xr.DataArray(
0,
Expand Down Expand Up @@ -48,6 +55,7 @@ def make_dataset() -> xr.Dataset:
def make_dataset_with_issues() -> xr.Dataset:
"""Create a dataset that produces issues with xrlint core rules."""
invalid_ds = make_dataset()
invalid_ds.time.attrs["units"] = "days since 2020-01-01 ß0:000:00"
invalid_ds.attrs = {}
invalid_ds.sst.attrs["units"] = 1
invalid_ds["sst_avg"] = xr.DataArray(
Expand Down
196 changes: 100 additions & 96 deletions notebooks/xrlint-linter.ipynb

Large diffs are not rendered by default.

78 changes: 78 additions & 0 deletions tests/plugins/core/rules/test_time_coordinate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
import numpy as np
import xarray as xr

from xrlint.plugins.core.rules.time_coordinate import TimeCoordinate
from xrlint.testing import RuleTest, RuleTester

valid_dataset_0 = xr.Dataset()
valid_dataset_1 = xr.Dataset(
coords={
"time": xr.DataArray(
np.array([3, 4, 5], dtype=np.dtype("datetime64[s]")),
dims="time",
attrs={
"standard_name": "time",
"long_name": "time",
},
),
},
data_vars={
"pos": xr.DataArray([10, 20, 30], dims="time", attrs={"units": "seconds"})
},
)
valid_dataset_1.time.encoding["units"] = "seconds since 2000-01-01 00:00:00 +2:00"
valid_dataset_1.time.encoding["calendar"] = "gregorian"

# OK, because with decode_cf=False meta-info is in attrs still
valid_dataset_2 = valid_dataset_1.copy()
del valid_dataset_2.time.encoding["units"]
del valid_dataset_2.time.encoding["calendar"]
valid_dataset_2.time.attrs["units"] = "seconds since 2000-01-01 UTC"
valid_dataset_2.time.attrs["calendar"] = "gregorian"

# OK, because not identified as time
valid_dataset_3 = valid_dataset_1.copy()
del valid_dataset_3.time.attrs["standard_name"]

# OK, because we only look for standard_name
valid_dataset_4 = valid_dataset_1.rename_vars({"time": "tm"})

# Invalid, because long_name is missing
invalid_dataset_0 = valid_dataset_1.copy()
del invalid_dataset_0.time.attrs["long_name"]

# Invalid, because we require units
invalid_dataset_1 = valid_dataset_1.copy(deep=True)
del invalid_dataset_1.time.encoding["units"]

# Invalid, because we require calendar
invalid_dataset_2 = valid_dataset_1.copy(deep=True)
del invalid_dataset_2.time.encoding["calendar"]

# Invalid, because we require TZ units part
invalid_dataset_3 = valid_dataset_1.copy(deep=True)
invalid_dataset_3.time.encoding["units"] = "seconds since 2000-01-01 00:00:00"

# Invalid, because we require units format wrong
invalid_dataset_4 = valid_dataset_1.copy(deep=True)
invalid_dataset_4.time.encoding["units"] = "2000-01-01 00:00:00 UTC"


TimeCoordinateTest = RuleTester.define_test(
"time-coordinate",
TimeCoordinate,
valid=[
RuleTest(dataset=valid_dataset_0),
RuleTest(dataset=valid_dataset_1),
RuleTest(dataset=valid_dataset_2),
RuleTest(dataset=valid_dataset_3),
RuleTest(dataset=valid_dataset_4),
],
invalid=[
RuleTest(dataset=invalid_dataset_0),
RuleTest(dataset=invalid_dataset_1),
RuleTest(dataset=invalid_dataset_2),
RuleTest(dataset=invalid_dataset_3),
RuleTest(dataset=invalid_dataset_4),
],
)
1 change: 1 addition & 0 deletions tests/plugins/core/test_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ def test_rules_complete(self):
"grid-mappings",
"flags",
"no-empty-attrs",
"time-coordinate",
"var-units-attr",
},
set(_plugin.rules.keys()),
Expand Down
75 changes: 75 additions & 0 deletions tests/plugins/xcube/rules/test_time_naming.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
import numpy as np

from xrlint.plugins.xcube.rules.time_naming import TimeNaming

import xarray as xr

from xrlint.testing import RuleTester, RuleTest


def make_dataset(time_var: str, time_dim: str | None = None):
time_dim = time_dim or time_var
dims = [time_dim, "y", "x"]
n = 3
return xr.Dataset(
attrs=dict(title="v-data"),
coords={
"x": xr.DataArray(np.linspace(0, 1, n), dims="x", attrs={"units": "m"}),
"y": xr.DataArray(np.linspace(0, 1, n), dims="y", attrs={"units": "m"}),
time_var: xr.DataArray(
list(range(n)),
dims=time_dim,
attrs={"units": "days since 2010-05-01 UTC", "calendar": "gregorian"},
),
},
data_vars={
"chl": xr.DataArray(
np.random.random((n, n, n)), dims=dims, attrs={"units": "mg/m^-3"}
),
"tsm": xr.DataArray(
np.random.random((n, n, n)), dims=dims, attrs={"units": "mg/m^-3"}
),
},
)


valid_dataset_0 = xr.Dataset()
valid_dataset_1 = make_dataset("time")

# Not ok, because time coord not called time
invalid_dataset_0 = make_dataset("t")

# Not ok, because no units
invalid_dataset_1 = make_dataset("time")
del invalid_dataset_1.time.attrs["units"]

# Not ok, because no calendar
invalid_dataset_2 = make_dataset("time")
del invalid_dataset_2.time.attrs["calendar"]

# Not ok, because invalid unit
invalid_dataset_3 = make_dataset("time")
invalid_dataset_3.time.attrs["units"] = "meters"

# Not ok, because coordinate 'time' should have dim 'time'
invalid_dataset_4 = make_dataset("time", "t0")

# Not ok, because coordinate 't0' should be named 'time'
invalid_dataset_5 = make_dataset("t0", "time")

TimeNamingTest = RuleTester.define_test(
"time-naming",
TimeNaming,
valid=[
RuleTest(dataset=valid_dataset_0),
RuleTest(dataset=valid_dataset_1),
],
invalid=[
RuleTest(dataset=invalid_dataset_0),
RuleTest(dataset=invalid_dataset_1),
RuleTest(dataset=invalid_dataset_2),
RuleTest(dataset=invalid_dataset_3),
RuleTest(dataset=invalid_dataset_4),
RuleTest(dataset=invalid_dataset_5),
],
)
3 changes: 2 additions & 1 deletion tests/plugins/xcube/test_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,10 @@ def test_rules_complete(self):
"cube-dims-order",
"data-var-colors",
"grid-mapping-naming",
"increasing-time",
"lat-lon-naming",
"single-grid-mapping",
"increasing-time",
"time-naming",
},
set(_plugin.rules.keys()),
)
1 change: 1 addition & 0 deletions xrlint/plugins/core/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ def export_plugin() -> Plugin:
"dataset-title-attr": "warn",
"grid-mappings": "error",
"no-empty-attrs": "warn",
"time-coordinate": "error",
"var-units-attr": "warn",
},
}
Expand Down
75 changes: 75 additions & 0 deletions xrlint/plugins/core/rules/time_coordinate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
from xrlint.node import DataArrayNode
from xrlint.plugins.core.rules import plugin
from xrlint.rule import RuleContext, RuleOp


_EXPECTED_UNITY_FORMAT = "<unit> since <date> <time> <timezone>"


@plugin.define_rule(
"time-coordinate",
version="1.0.0",
type="problem",
description=(
"Time coordinate (standard_name='time') should have"
" unambiguous time units encoding."
),
docs_url=(
"https://cfconventions.org/cf-conventions/cf-conventions.html"
"#time-coordinate"
),
)
class TimeCoordinate(RuleOp):
def data_array(self, ctx: RuleContext, node: DataArrayNode):

array = node.data_array
attrs = array.attrs
encoding = array.encoding

if node.name not in ctx.dataset.coords or attrs.get("standard_name") != "time":
return

if attrs.get("long_name") != "time":
ctx.report("Attribute 'long_name' should be 'time'.")

use_units_format_msg = (
f"Specify 'units' attribute using format {_EXPECTED_UNITY_FORMAT!r}."
)

calendar: str | None = encoding.get("calendar", attrs.get("calendar"))
units: str | None = encoding.get("units", attrs.get("units"))
if not units or not calendar:
if not calendar:
ctx.report(
"Attribute 'calendar' should be specified.",
)
if not units:
ctx.report(
"Attribute 'units' should be specified.",
suggestions=[use_units_format_msg],
)
# next checks concern units only
return

units_parts = units.split(" ")
# note, may use regex here
if len(units_parts) >= 4 and units_parts[1] == "since":
# format seems ok, check timezone part
last_part = units_parts[-1]
has_tz = last_part.lower() == "utc" or last_part[0] in ("+", "-")
if not has_tz:
ctx.report(
f"Missing timezone in 'units' attribute: {units}",
suggestions=[
use_units_format_msg,
f"Append timezone specification, e.g., use"
f" {' '.join(units_parts[:-1] + ['utc'])!r}.",
],
)
# units ok
return

ctx.report(
f"Invalid 'units' attribute: {units}",
suggestions=[use_units_format_msg],
)
1 change: 1 addition & 0 deletions xrlint/plugins/xcube/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ def export_plugin() -> Plugin:
"xcube/increasing-time": "error",
"xcube/lat-lon-naming": "error",
"xcube/single-grid-mapping": "error",
"xcube/time-naming": "error",
},
}
)
Expand Down
2 changes: 1 addition & 1 deletion xrlint/plugins/xcube/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
LAT_NAME = "lat"
X_NAME = "x"
Y_NAME = "y"
T_NAME = "time"
TIME_NAME = "time"

GM_NAMES = "spatial_ref", "crs"
GM_NAMES_TEXT = " or ".join(repr(gm_name) for gm_name in GM_NAMES)
8 changes: 4 additions & 4 deletions xrlint/plugins/xcube/rules/cube_dims_order.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from xrlint.node import DataArrayNode
from xrlint.plugins.xcube.constants import LAT_NAME, LON_NAME, T_NAME, X_NAME, Y_NAME
from xrlint.plugins.xcube.constants import LAT_NAME, LON_NAME, X_NAME, Y_NAME, TIME_NAME
from xrlint.plugins.xcube.rules import plugin
from xrlint.rule import RuleContext, RuleOp

Expand All @@ -10,7 +10,7 @@
type="problem",
description=(
f"Order of dimensions in spatio-temporal datacube variables"
f" should be [{T_NAME}, ..., {Y_NAME}, {X_NAME}]."
f" should be [{TIME_NAME}, ..., {Y_NAME}, {X_NAME}]."
),
docs_url=(
"https://xcube.readthedocs.io/en/latest/cubespec.html#data-model-and-format"
Expand All @@ -36,8 +36,8 @@ def data_array(self, ctx: RuleContext, node: DataArrayNode):
return

t_name = None
if T_NAME in indexes:
t_name = T_NAME
if TIME_NAME in indexes:
t_name = TIME_NAME

n = len(dims)
t_index = indexes[t_name] if t_name else None
Expand Down
Loading
Loading