Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

## Version 0.3.0 (in development)

- Added cure rule "flags"

- Fixed problem where referring to values in modules via
the form `"<module>:<attr>"` raised. #21

Expand Down
7 changes: 7 additions & 0 deletions docs/rule-ref.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,13 @@ Datasets should be given a non-empty title.

Contained in: `all`-:material-lightning-bolt: `recommended`-:material-alert:

### :material-lightbulb: `flags`

Validate attributes 'flag_values', 'flag_masks' and 'flag_meanings' that make variables that contain flag values self describing.
[:material-information-variant:](https://cfconventions.org/cf-conventions/cf-conventions.html#flags)

Contained in: `all`-:material-lightning-bolt:

### :material-bug: `grid-mappings`

Grid mappings, if any, shall have valid grid mapping coordinate variables.
Expand Down
95 changes: 95 additions & 0 deletions tests/plugins/core/rules/test_flags.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
import numpy as np
import xarray as xr

from xrlint.plugins.core.rules.flags import Flags
from xrlint.testing import RuleTester, RuleTest

valid_dataset_0 = xr.Dataset()
valid_dataset_1 = xr.Dataset(
attrs=dict(title="sensor-data"),
data_vars={
"sensor_status_qc": xr.DataArray(
[1, 3, 5, 2, 0, 5],
dims="x",
attrs=dict(
long_name="Sensor Status",
standard_name="status_flag",
_FillValue=0,
valid_range=[1, 15],
flag_masks=[1, 2, 12, 12, 12],
flag_values=[1, 2, 4, 8, 12],
flag_meanings=(
"low_battery"
" hardware_fault"
" offline_mode"
" calibration_mode"
" maintenance_mode"
),
),
)
},
)

# Valid, because and flag_values and flag_meanings are sufficient
valid_dataset_2 = valid_dataset_1.copy()
del valid_dataset_2.sensor_status_qc.attrs["flag_masks"]

# Valid, because and flag_masks and flag_meanings are sufficient
valid_dataset_3 = valid_dataset_1.copy()
del valid_dataset_3.sensor_status_qc.attrs["flag_values"]

# Invalid, because flag_meanings are needed
invalid_dataset_0 = valid_dataset_1.copy()
del invalid_dataset_0.sensor_status_qc.attrs["flag_meanings"]

# Invalid, because flag_values are invalid
invalid_dataset_1 = valid_dataset_1.copy()
invalid_dataset_1.sensor_status_qc.attrs["flag_values"] = "1, 2, 4, 8, 12"

# Invalid, because flag_masks are invalid
invalid_dataset_2 = valid_dataset_1.copy()
invalid_dataset_2.sensor_status_qc.attrs["flag_masks"] = "1, 2, 12, 12, 12"

# Invalid, because flag_values and flag_masks must have same length
invalid_dataset_3 = valid_dataset_1.copy()
invalid_dataset_3.sensor_status_qc.attrs["flag_masks"] = [1, 2, 12]

# Invalid, because missing flag_values and flag_masks
invalid_dataset_4 = valid_dataset_1.copy()
del invalid_dataset_4.sensor_status_qc.attrs["flag_values"]
del invalid_dataset_4.sensor_status_qc.attrs["flag_masks"]

# Invalid, because flag_meanings type is not ok
invalid_dataset_5 = valid_dataset_1.copy()
invalid_dataset_5.sensor_status_qc.attrs["flag_meanings"] = [1, 2, 12, 12, 12]

# Invalid, because flag_meanings length is not ok
invalid_dataset_6 = valid_dataset_1.copy()
invalid_dataset_6.sensor_status_qc.attrs["flag_meanings"] = "a b c"

# Invalid, because flag variable is not int
invalid_dataset_7 = valid_dataset_1.copy()
invalid_dataset_7["sensor_status_qc"] = valid_dataset_1.sensor_status_qc.astype(
np.float64
)

FlagsTest = RuleTester.define_test(
"flags",
Flags,
valid=[
RuleTest(dataset=valid_dataset_0),
RuleTest(dataset=valid_dataset_1),
RuleTest(dataset=valid_dataset_2),
RuleTest(dataset=valid_dataset_3),
],
invalid=[
RuleTest(dataset=invalid_dataset_0),
RuleTest(dataset=invalid_dataset_1),
RuleTest(dataset=invalid_dataset_2),
RuleTest(dataset=invalid_dataset_3),
RuleTest(dataset=invalid_dataset_4),
RuleTest(dataset=invalid_dataset_5),
RuleTest(dataset=invalid_dataset_6),
RuleTest(dataset=invalid_dataset_7),
],
)
1 change: 1 addition & 0 deletions tests/plugins/core/test_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ def test_rules_complete(self):
"coords-for-dims",
"dataset-title-attr",
"grid-mappings",
"flags",
"no-empty-attrs",
"var-units-attr",
},
Expand Down
159 changes: 159 additions & 0 deletions xrlint/plugins/core/rules/flags.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
from typing import Any

import numpy as np

from xrlint.node import DataArrayNode
from xrlint.plugins.core.rules import plugin
from xrlint.rule import RuleOp, RuleContext


FLAG_MEANINGS = "flag_meanings"
FLAG_VALUES = "flag_values"
FLAG_MASKS = "flag_masks"


@plugin.define_rule(
"flags",
version="1.0.0",
type="suggestion",
description=(
"Validate attributes 'flag_values', 'flag_masks' and 'flag_meanings'"
" that make variables that contain flag values self describing. "
),
docs_url="https://cfconventions.org/cf-conventions/cf-conventions.html#flags",
)
class Flags(RuleOp):
def data_array(self, ctx: RuleContext, node: DataArrayNode):
flag_values = node.data_array.attrs.get(FLAG_VALUES)
flag_masks = node.data_array.attrs.get(FLAG_MASKS)
flag_meanings = node.data_array.attrs.get(FLAG_MEANINGS)

has_values = flag_values is not None
has_masks = flag_masks is not None
has_meanings = flag_meanings is not None

flag_count: int | None = None

if has_values:
flag_count = _validate_flag_values(
ctx,
flag_values,
has_meanings,
)

if has_masks:
flag_count = _validate_flag_masks(
ctx,
flag_masks,
has_meanings,
flag_count,
)

if has_meanings:
_validate_flag_meanings(
ctx,
flag_meanings,
has_values,
has_masks,
flag_count,
)

if has_values and has_masks:
_validate_variable(
ctx,
node.data_array.dtype,
)


def _validate_flag_values(
ctx: RuleContext, flag_values: Any, has_meanings: bool
) -> int | None:
if not has_meanings:
ctx.report(
f"Missing attribute {FLAG_MEANINGS!r} to explain"
f" attribute {FLAG_VALUES!r}"
)
type_ok, flag_count = _check_values(flag_values)
if not type_ok or flag_count is None:
ctx.report(
f"Attribute {FLAG_VALUES!r} must be a"
" 1-d array of integers with length >= 1."
)
return flag_count


def _validate_flag_masks(
ctx: RuleContext, flag_masks: Any, has_meanings: bool, flag_count: int | None
) -> int | None:
if not has_meanings:
ctx.report(
f"Missing attribute {FLAG_MEANINGS!r} to explain"
f" attribute {FLAG_MASKS!r}"
)
type_ok, flag_masks_count = _check_values(flag_masks)
if not type_ok or flag_masks_count is None:
ctx.report(
f"Attribute {FLAG_MASKS!r} must be a"
" 1-d array of integers with length >= 1."
)
if flag_count is None:
flag_count = flag_masks_count
elif flag_masks_count is not None and flag_masks_count != flag_count:
ctx.report(
f"Attribute {FLAG_MASKS!r} must have same length"
f" as attribute {FLAG_VALUES!r}."
)
return flag_count


def _validate_flag_meanings(
ctx: RuleContext,
flag_meanings: Any,
has_values: bool,
has_masks: bool,
flag_count: int | None,
):
if not has_values and not has_masks:
ctx.report(
f"Missing attribute {FLAG_VALUES!r} or {FLAG_MASKS!r}"
f" when attribute {FLAG_MASKS!r} is used."
)
type_ok, flag_meanings_count = _check_meanings(flag_meanings)
if not type_ok or flag_meanings_count is None:
ctx.report(
f"Attribute {FLAG_MASKS!r} must be a space-separated string"
f" with at least two entries."
)
if (
flag_meanings_count is not None
and flag_count is not None
and flag_meanings_count != flag_count
):
ctx.report(
f"Attribute {FLAG_MASKS!r} must have same length"
f" as attributes {FLAG_VALUES!r} or {FLAG_MEANINGS!r}."
)


def _validate_variable(ctx: RuleContext, var_dtype: np.dtype):
if not np.issubdtype(var_dtype, np.integer):
ctx.report(
f"Flags variable should have an integer data type, was {var_dtype!r}"
)


def _check_values(values: Any) -> tuple[bool, int | None]:
if isinstance(values, (tuple, list)) or (
isinstance(values, np.ndarray) and values.ndim == 1
):
count = len(values)
return all(isinstance(v, int) for v in values), count if count >= 1 else None
return False, None


def _check_meanings(meanings: Any):
if isinstance(meanings, str):
meanings_list = [m.strip() for m in meanings.split(" ")]
count = len(meanings_list)
return True, count if count >= 1 else None
return False, None
3 changes: 2 additions & 1 deletion xrlint/testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,9 +209,10 @@ def _format_error_message(
actual = format_problems(result.error_count, result.warning_count)
expected = f"{'no problem' if test_mode == 'valid' else 'one or more problems'}"
messages = "\n".join(f"- {m.message}" for m in result.messages)
messages = (":\n" + messages) if messages else "."
return (
f"Rule {rule_name!r}: {test_id}:"
f" expected {expected}, but got {actual}{f':\n{messages}' if messages else '.'}"
f" expected {expected}, but got {actual}{messages}"
)


Expand Down
Loading