Skip to content

Commit cacd12c

Browse files
committed
new rule var-missing-data
1 parent 2817da4 commit cacd12c

File tree

5 files changed

+106
-2
lines changed

5 files changed

+106
-2
lines changed

CHANGES.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@
44

55
### Adjustments and Enhancements
66

7-
- Added a new core rule `var-fill-value` that checks for the recommended
8-
use of a variable's fill value.
7+
- Added a new core rule `var-missing-data` that checks for the recommended
8+
use of a variable's missing data.
99

1010
- Added a new core rule `access-latency` that can be used to check the
1111
time it takes to open a dataset.
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
# Copyright © 2025 Brockmann Consult GmbH.
2+
# This software is distributed under the terms and conditions of the
3+
# MIT license (https://mit-license.org/).
4+
5+
import xarray as xr
6+
7+
from xrlint.plugins.core.rules.var_missing_data import VarMissingData
8+
from xrlint.testing import RuleTest, RuleTester
9+
10+
# TODO: adjust datasets to rule
11+
12+
valid_dataset_0 = xr.Dataset()
13+
valid_dataset_1 = xr.Dataset(
14+
attrs=dict(title="v-data"),
15+
coords={"t": xr.DataArray([0, 1, 2], dims="t", attrs={"units": "seconds"})},
16+
data_vars={"v": xr.DataArray([10, 20, 30], dims="t", attrs={"units": "m/s"})},
17+
)
18+
valid_dataset_2 = valid_dataset_1.copy()
19+
valid_dataset_2.t.encoding["units"] = "seconds since 2025-02-01 12:15:00"
20+
del valid_dataset_2.t.attrs["units"]
21+
22+
valid_dataset_3 = valid_dataset_1.copy()
23+
valid_dataset_3.t.attrs["grid_mapping_name"] = "latitude_longitude"
24+
25+
invalid_dataset_0 = valid_dataset_1.copy()
26+
invalid_dataset_0.t.attrs = {}
27+
28+
invalid_dataset_1 = valid_dataset_1.copy()
29+
invalid_dataset_1.t.attrs = {"units": 1}
30+
31+
invalid_dataset_2 = valid_dataset_1.copy()
32+
invalid_dataset_2.t.attrs = {"units": ""}
33+
34+
35+
VarMissingDataTest = RuleTester.define_test(
36+
"var-missing-data",
37+
VarMissingData,
38+
valid=[
39+
RuleTest(dataset=valid_dataset_0),
40+
RuleTest(dataset=valid_dataset_1),
41+
RuleTest(dataset=valid_dataset_2),
42+
RuleTest(dataset=valid_dataset_3),
43+
],
44+
invalid=[
45+
RuleTest(dataset=invalid_dataset_0, expected=["Missing attribute 'units'."]),
46+
RuleTest(dataset=invalid_dataset_1, expected=["Invalid attribute 'units': 1"]),
47+
RuleTest(dataset=invalid_dataset_2, expected=["Empty attribute 'units'."]),
48+
],
49+
)

tests/plugins/core/test_plugin.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ def test_rules_complete(self):
2424
"time-coordinate",
2525
"var-desc",
2626
"var-flags",
27+
"var-missing-data",
2728
"var-units",
2829
},
2930
set(plugin.rules.keys()),

xrlint/plugins/core/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ def export_plugin() -> Plugin:
2828
"time-coordinate": "error",
2929
"var-desc": "warn",
3030
"var-flags": "error",
31+
"var-missing-data": "warn",
3132
"var-units": "warn",
3233
},
3334
},
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
# Copyright © 2025 Brockmann Consult GmbH.
2+
# This software is distributed under the terms and conditions of the
3+
# MIT license (https://mit-license.org/).
4+
5+
import numpy as np
6+
7+
from xrlint.node import VariableNode
8+
from xrlint.plugins.core.plugin import plugin
9+
from xrlint.rule import RuleContext, RuleOp
10+
11+
12+
@plugin.define_rule(
13+
"var-missing-data",
14+
version="1.0.0",
15+
type="suggestion",
16+
description=(
17+
"Checks the recommended use of missing data, i.e., coordinate variables"
18+
" should not define missing data, but packed data should."
19+
" Notifies about the use of valid ranges to indicate missing data, which"
20+
" is currently not supported by xarray."
21+
),
22+
docs_url="https://cfconventions.org/cf-conventions/cf-conventions.html#units",
23+
)
24+
class VarMissingData(RuleOp):
25+
def validate_variable(self, ctx: RuleContext, node: VariableNode):
26+
array = node.array
27+
encoding = array.encoding
28+
attrs = array.attrs
29+
30+
fill_value_source = None
31+
if "_FillValue" in encoding:
32+
fill_value_source = "encoding"
33+
elif "_FillValue" in attrs:
34+
fill_value_source = "attribute"
35+
36+
if fill_value_source is not None and node.name in ctx.dataset.coords:
37+
ctx.report(
38+
f"Unexpected {fill_value_source} '_FillValue',"
39+
f" coordinates must not have missing data."
40+
)
41+
elif fill_value_source is None and node.name in ctx.dataset.data_vars:
42+
scaling_factor = encoding.get("scaling_factor", attrs.get("scaling_factor"))
43+
add_offset = encoding.get("add_offset", attrs.get("add_offset"))
44+
raw_dtype = encoding.get("dtype")
45+
if add_offset is not None or scaling_factor is not None:
46+
ctx.report("Missing attribute '_FillValue' since data is packed.")
47+
elif isinstance(raw_dtype, np.dtype) and np.issubdtype(
48+
raw_dtype, np.floating
49+
):
50+
ctx.report("Missing attribute '_FillValue', which should be NaN.")
51+
52+
if any((name in attrs) for name in ("valid_min", "valid_max", "valid_range")):
53+
ctx.report("Valid ranges are not recognized by xarray (as of Feb 2025).")

0 commit comments

Comments
 (0)