diff --git a/CHANGES.md b/CHANGES.md index c61a513..91e5605 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -2,6 +2,8 @@ ## Version 0.3.0 (in development) +- Added cure rule "flags" + - Fixed problem where referring to values in modules via the form `":"` raised. #21 diff --git a/docs/rule-ref.md b/docs/rule-ref.md index 5e909ed..9605915 100644 --- a/docs/rule-ref.md +++ b/docs/rule-ref.md @@ -17,6 +17,13 @@ Datasets should be given a non-empty title. Contained in: `all`-:material-lightning-bolt: `recommended`-:material-alert: +### :material-lightbulb: `flags` + +Validate attributes 'flag_values', 'flag_masks' and 'flag_meanings' that make variables that contain flag values self describing. +[:material-information-variant:](https://cfconventions.org/cf-conventions/cf-conventions.html#flags) + +Contained in: `all`-:material-lightning-bolt: + ### :material-bug: `grid-mappings` Grid mappings, if any, shall have valid grid mapping coordinate variables. diff --git a/tests/plugins/core/rules/test_flags.py b/tests/plugins/core/rules/test_flags.py new file mode 100644 index 0000000..521832d --- /dev/null +++ b/tests/plugins/core/rules/test_flags.py @@ -0,0 +1,95 @@ +import numpy as np +import xarray as xr + +from xrlint.plugins.core.rules.flags import Flags +from xrlint.testing import RuleTester, RuleTest + +valid_dataset_0 = xr.Dataset() +valid_dataset_1 = xr.Dataset( + attrs=dict(title="sensor-data"), + data_vars={ + "sensor_status_qc": xr.DataArray( + [1, 3, 5, 2, 0, 5], + dims="x", + attrs=dict( + long_name="Sensor Status", + standard_name="status_flag", + _FillValue=0, + valid_range=[1, 15], + flag_masks=[1, 2, 12, 12, 12], + flag_values=[1, 2, 4, 8, 12], + flag_meanings=( + "low_battery" + " hardware_fault" + " offline_mode" + " calibration_mode" + " maintenance_mode" + ), + ), + ) + }, +) + +# Valid, because and flag_values and flag_meanings are sufficient +valid_dataset_2 = valid_dataset_1.copy() +del valid_dataset_2.sensor_status_qc.attrs["flag_masks"] + +# Valid, because and flag_masks and flag_meanings are sufficient +valid_dataset_3 = valid_dataset_1.copy() +del valid_dataset_3.sensor_status_qc.attrs["flag_values"] + +# Invalid, because flag_meanings are needed +invalid_dataset_0 = valid_dataset_1.copy() +del invalid_dataset_0.sensor_status_qc.attrs["flag_meanings"] + +# Invalid, because flag_values are invalid +invalid_dataset_1 = valid_dataset_1.copy() +invalid_dataset_1.sensor_status_qc.attrs["flag_values"] = "1, 2, 4, 8, 12" + +# Invalid, because flag_masks are invalid +invalid_dataset_2 = valid_dataset_1.copy() +invalid_dataset_2.sensor_status_qc.attrs["flag_masks"] = "1, 2, 12, 12, 12" + +# Invalid, because flag_values and flag_masks must have same length +invalid_dataset_3 = valid_dataset_1.copy() +invalid_dataset_3.sensor_status_qc.attrs["flag_masks"] = [1, 2, 12] + +# Invalid, because missing flag_values and flag_masks +invalid_dataset_4 = valid_dataset_1.copy() +del invalid_dataset_4.sensor_status_qc.attrs["flag_values"] +del invalid_dataset_4.sensor_status_qc.attrs["flag_masks"] + +# Invalid, because flag_meanings type is not ok +invalid_dataset_5 = valid_dataset_1.copy() +invalid_dataset_5.sensor_status_qc.attrs["flag_meanings"] = [1, 2, 12, 12, 12] + +# Invalid, because flag_meanings length is not ok +invalid_dataset_6 = valid_dataset_1.copy() +invalid_dataset_6.sensor_status_qc.attrs["flag_meanings"] = "a b c" + +# Invalid, because flag variable is not int +invalid_dataset_7 = valid_dataset_1.copy() +invalid_dataset_7["sensor_status_qc"] = valid_dataset_1.sensor_status_qc.astype( + np.float64 +) + +FlagsTest = RuleTester.define_test( + "flags", + Flags, + valid=[ + RuleTest(dataset=valid_dataset_0), + RuleTest(dataset=valid_dataset_1), + RuleTest(dataset=valid_dataset_2), + RuleTest(dataset=valid_dataset_3), + ], + invalid=[ + RuleTest(dataset=invalid_dataset_0), + RuleTest(dataset=invalid_dataset_1), + RuleTest(dataset=invalid_dataset_2), + RuleTest(dataset=invalid_dataset_3), + RuleTest(dataset=invalid_dataset_4), + RuleTest(dataset=invalid_dataset_5), + RuleTest(dataset=invalid_dataset_6), + RuleTest(dataset=invalid_dataset_7), + ], +) diff --git a/tests/plugins/core/test_plugin.py b/tests/plugins/core/test_plugin.py index 6346032..f81b2ff 100644 --- a/tests/plugins/core/test_plugin.py +++ b/tests/plugins/core/test_plugin.py @@ -22,6 +22,7 @@ def test_rules_complete(self): "coords-for-dims", "dataset-title-attr", "grid-mappings", + "flags", "no-empty-attrs", "var-units-attr", }, diff --git a/xrlint/plugins/core/rules/flags.py b/xrlint/plugins/core/rules/flags.py new file mode 100644 index 0000000..b15fae1 --- /dev/null +++ b/xrlint/plugins/core/rules/flags.py @@ -0,0 +1,159 @@ +from typing import Any + +import numpy as np + +from xrlint.node import DataArrayNode +from xrlint.plugins.core.rules import plugin +from xrlint.rule import RuleOp, RuleContext + + +FLAG_MEANINGS = "flag_meanings" +FLAG_VALUES = "flag_values" +FLAG_MASKS = "flag_masks" + + +@plugin.define_rule( + "flags", + version="1.0.0", + type="suggestion", + description=( + "Validate attributes 'flag_values', 'flag_masks' and 'flag_meanings'" + " that make variables that contain flag values self describing. " + ), + docs_url="https://cfconventions.org/cf-conventions/cf-conventions.html#flags", +) +class Flags(RuleOp): + def data_array(self, ctx: RuleContext, node: DataArrayNode): + flag_values = node.data_array.attrs.get(FLAG_VALUES) + flag_masks = node.data_array.attrs.get(FLAG_MASKS) + flag_meanings = node.data_array.attrs.get(FLAG_MEANINGS) + + has_values = flag_values is not None + has_masks = flag_masks is not None + has_meanings = flag_meanings is not None + + flag_count: int | None = None + + if has_values: + flag_count = _validate_flag_values( + ctx, + flag_values, + has_meanings, + ) + + if has_masks: + flag_count = _validate_flag_masks( + ctx, + flag_masks, + has_meanings, + flag_count, + ) + + if has_meanings: + _validate_flag_meanings( + ctx, + flag_meanings, + has_values, + has_masks, + flag_count, + ) + + if has_values and has_masks: + _validate_variable( + ctx, + node.data_array.dtype, + ) + + +def _validate_flag_values( + ctx: RuleContext, flag_values: Any, has_meanings: bool +) -> int | None: + if not has_meanings: + ctx.report( + f"Missing attribute {FLAG_MEANINGS!r} to explain" + f" attribute {FLAG_VALUES!r}" + ) + type_ok, flag_count = _check_values(flag_values) + if not type_ok or flag_count is None: + ctx.report( + f"Attribute {FLAG_VALUES!r} must be a" + " 1-d array of integers with length >= 1." + ) + return flag_count + + +def _validate_flag_masks( + ctx: RuleContext, flag_masks: Any, has_meanings: bool, flag_count: int | None +) -> int | None: + if not has_meanings: + ctx.report( + f"Missing attribute {FLAG_MEANINGS!r} to explain" + f" attribute {FLAG_MASKS!r}" + ) + type_ok, flag_masks_count = _check_values(flag_masks) + if not type_ok or flag_masks_count is None: + ctx.report( + f"Attribute {FLAG_MASKS!r} must be a" + " 1-d array of integers with length >= 1." + ) + if flag_count is None: + flag_count = flag_masks_count + elif flag_masks_count is not None and flag_masks_count != flag_count: + ctx.report( + f"Attribute {FLAG_MASKS!r} must have same length" + f" as attribute {FLAG_VALUES!r}." + ) + return flag_count + + +def _validate_flag_meanings( + ctx: RuleContext, + flag_meanings: Any, + has_values: bool, + has_masks: bool, + flag_count: int | None, +): + if not has_values and not has_masks: + ctx.report( + f"Missing attribute {FLAG_VALUES!r} or {FLAG_MASKS!r}" + f" when attribute {FLAG_MASKS!r} is used." + ) + type_ok, flag_meanings_count = _check_meanings(flag_meanings) + if not type_ok or flag_meanings_count is None: + ctx.report( + f"Attribute {FLAG_MASKS!r} must be a space-separated string" + f" with at least two entries." + ) + if ( + flag_meanings_count is not None + and flag_count is not None + and flag_meanings_count != flag_count + ): + ctx.report( + f"Attribute {FLAG_MASKS!r} must have same length" + f" as attributes {FLAG_VALUES!r} or {FLAG_MEANINGS!r}." + ) + + +def _validate_variable(ctx: RuleContext, var_dtype: np.dtype): + if not np.issubdtype(var_dtype, np.integer): + ctx.report( + f"Flags variable should have an integer data type, was {var_dtype!r}" + ) + + +def _check_values(values: Any) -> tuple[bool, int | None]: + if isinstance(values, (tuple, list)) or ( + isinstance(values, np.ndarray) and values.ndim == 1 + ): + count = len(values) + return all(isinstance(v, int) for v in values), count if count >= 1 else None + return False, None + + +def _check_meanings(meanings: Any): + if isinstance(meanings, str): + meanings_list = [m.strip() for m in meanings.split(" ")] + count = len(meanings_list) + return True, count if count >= 1 else None + return False, None diff --git a/xrlint/testing.py b/xrlint/testing.py index 40243e1..6169e50 100644 --- a/xrlint/testing.py +++ b/xrlint/testing.py @@ -209,9 +209,10 @@ def _format_error_message( actual = format_problems(result.error_count, result.warning_count) expected = f"{'no problem' if test_mode == 'valid' else 'one or more problems'}" messages = "\n".join(f"- {m.message}" for m in result.messages) + messages = (":\n" + messages) if messages else "." return ( f"Rule {rule_name!r}: {test_id}:" - f" expected {expected}, but got {actual}{f':\n{messages}' if messages else '.'}" + f" expected {expected}, but got {actual}{messages}" )