Skip to content

Commit 1210c1e

Browse files
authored
Merge pull request #53 from bcdev/forman-var_missing_data
New rule `var-missing-data`
2 parents 060e085 + 25af807 commit 1210c1e

File tree

6 files changed

+123
-1
lines changed

6 files changed

+123
-1
lines changed

CHANGES.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,9 @@
44

55
### Adjustments and Enhancements
66

7+
- Added a new core rule `var-missing-data` that checks for the recommended
8+
use of a variable's missing data.
9+
710
- Added a new core rule `access-latency` that can be used to check the
811
time it takes to open a dataset.
912

docs/todo.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
## Desired
1414

1515
- project logo
16-
- add `core` rule checks recommended use of fill value
1716
- add `xcube` rule that helps to identify chunking issues
1817
- apply rule op args/kwargs validation schema
1918
- allow outputting suggestions, if any, that are emitted by some rules
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
# Copyright © 2025 Brockmann Consult GmbH.
2+
# This software is distributed under the terms and conditions of the
3+
# MIT license (https://mit-license.org/).
4+
import numpy as np
5+
import xarray as xr
6+
7+
from xrlint.plugins.core.rules.var_missing_data import VarMissingData
8+
from xrlint.testing import RuleTest, RuleTester
9+
10+
valid_dataset_0 = xr.Dataset()
11+
valid_dataset_1 = xr.Dataset(
12+
attrs=dict(title="v-data"),
13+
coords={"t": xr.DataArray([0, 1, 2], dims="t", attrs={"units": "seconds"})},
14+
data_vars={"v": xr.DataArray([10, 20, 30], dims="t", attrs={"units": "m/s"})},
15+
)
16+
17+
invalid_dataset_0 = valid_dataset_1.copy(deep=True)
18+
invalid_dataset_0.t.attrs["_FillValue"] = -999
19+
20+
invalid_dataset_1 = valid_dataset_1.copy(deep=True)
21+
invalid_dataset_1.t.encoding["_FillValue"] = -999
22+
23+
invalid_dataset_2 = valid_dataset_1.copy(deep=True)
24+
invalid_dataset_2.v.attrs["scaling_factor"] = 0.01
25+
26+
invalid_dataset_3 = valid_dataset_1.copy(deep=True)
27+
invalid_dataset_3.v.encoding["dtype"] = np.dtype(np.float64)
28+
29+
invalid_dataset_4 = valid_dataset_1.copy(deep=True)
30+
invalid_dataset_4.v.attrs["valid_range"] = [0, 1]
31+
32+
VarMissingDataTest = RuleTester.define_test(
33+
"var-missing-data",
34+
VarMissingData,
35+
valid=[
36+
RuleTest(dataset=valid_dataset_0),
37+
RuleTest(dataset=valid_dataset_1),
38+
],
39+
invalid=[
40+
RuleTest(
41+
dataset=invalid_dataset_0,
42+
expected=[
43+
"Unexpected attribute '_FillValue', coordinates must not have missing data."
44+
],
45+
),
46+
RuleTest(
47+
dataset=invalid_dataset_1,
48+
expected=[
49+
"Unexpected encoding '_FillValue', coordinates must not have missing data."
50+
],
51+
),
52+
RuleTest(
53+
dataset=invalid_dataset_2,
54+
expected=["Missing attribute '_FillValue' since data packing is used."],
55+
),
56+
RuleTest(
57+
dataset=invalid_dataset_3,
58+
expected=["Missing attribute '_FillValue', which should be NaN."],
59+
),
60+
RuleTest(
61+
dataset=invalid_dataset_4,
62+
expected=["Valid ranges are not recognized by xarray (as of Feb 2025)."],
63+
),
64+
],
65+
)

tests/plugins/core/test_plugin.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ def test_rules_complete(self):
2424
"time-coordinate",
2525
"var-desc",
2626
"var-flags",
27+
"var-missing-data",
2728
"var-units",
2829
},
2930
set(plugin.rules.keys()),

xrlint/plugins/core/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ def export_plugin() -> Plugin:
2828
"time-coordinate": "error",
2929
"var-desc": "warn",
3030
"var-flags": "error",
31+
"var-missing-data": "warn",
3132
"var-units": "warn",
3233
},
3334
},
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
# Copyright © 2025 Brockmann Consult GmbH.
2+
# This software is distributed under the terms and conditions of the
3+
# MIT license (https://mit-license.org/).
4+
5+
import numpy as np
6+
7+
from xrlint.node import VariableNode
8+
from xrlint.plugins.core.plugin import plugin
9+
from xrlint.rule import RuleContext, RuleOp
10+
11+
12+
@plugin.define_rule(
13+
"var-missing-data",
14+
version="1.0.0",
15+
type="suggestion",
16+
description=(
17+
"Checks the recommended use of missing data, i.e., coordinate variables"
18+
" should not define missing data, but packed data should."
19+
" Notifies about the use of valid ranges to indicate missing data, which"
20+
" is currently not supported by xarray."
21+
),
22+
docs_url="https://cfconventions.org/cf-conventions/cf-conventions.html#units",
23+
)
24+
class VarMissingData(RuleOp):
25+
def validate_variable(self, ctx: RuleContext, node: VariableNode):
26+
array = node.array
27+
encoding = array.encoding
28+
attrs = array.attrs
29+
30+
fill_value_source = None
31+
if "_FillValue" in encoding:
32+
fill_value_source = "encoding"
33+
elif "_FillValue" in attrs:
34+
fill_value_source = "attribute"
35+
36+
if fill_value_source is not None and node.name in ctx.dataset.coords:
37+
ctx.report(
38+
f"Unexpected {fill_value_source} '_FillValue',"
39+
f" coordinates must not have missing data."
40+
)
41+
elif fill_value_source is None and node.name in ctx.dataset.data_vars:
42+
scaling_factor = encoding.get("scaling_factor", attrs.get("scaling_factor"))
43+
add_offset = encoding.get("add_offset", attrs.get("add_offset"))
44+
raw_dtype = encoding.get("dtype")
45+
if add_offset is not None or scaling_factor is not None:
46+
ctx.report("Missing attribute '_FillValue' since data packing is used.")
47+
elif isinstance(raw_dtype, np.dtype) and np.issubdtype(
48+
raw_dtype, np.floating
49+
):
50+
ctx.report("Missing attribute '_FillValue', which should be NaN.")
51+
52+
if any((name in attrs) for name in ("valid_min", "valid_max", "valid_range")):
53+
ctx.report("Valid ranges are not recognized by xarray (as of Feb 2025).")

0 commit comments

Comments
 (0)