From b9c865ae0b3630f94608c25c40eea8c548e479d7 Mon Sep 17 00:00:00 2001 From: Norman Fomferra Date: Wed, 29 Jan 2025 19:51:23 +0100 Subject: [PATCH 1/3] Started two new rules --- docs/rule-ref.md | 21 +++++ xrlint/plugins/core/rules/conventions.py | 50 +++++++++++ .../plugins/core/rules/dataset_description.py | 86 +++++++++++++++++++ xrlint/plugins/core/rules/var_units_attr.py | 1 + 4 files changed, 158 insertions(+) create mode 100644 xrlint/plugins/core/rules/conventions.py create mode 100644 xrlint/plugins/core/rules/dataset_description.py diff --git a/docs/rule-ref.md b/docs/rule-ref.md index 2974084..17c1289 100644 --- a/docs/rule-ref.md +++ b/docs/rule-ref.md @@ -5,12 +5,32 @@ New rules will be added by upcoming XRLint releases. ## Core Rules +### :material-lightbulb: `conventions` + +Datasets should identify the applicable conventions using the `Conventions` attribute. + The rule has an optional configuration parameter `match` which is a regex pattern that the value of the `Conventions` attribute must match, if any. +[:material-information-variant:](https://cfconventions.org/cf-conventions/cf-conventions.html#identification-of-conventions) + +Contained in: `all`-:material-lightning-bolt: + ### :material-bug: `coords-for-dims` Dimensions of data variables should have corresponding coordinates. Contained in: `all`-:material-lightning-bolt: `recommended`-:material-lightning-bolt: +### :material-lightbulb: `dataset-description` + +A dataset should provide information about where the data came from and what has been done to it. This information is mainly for the benefit of human readers. The rule accepts the following configuration parameters: + +- `global_attrs`: list of global attribute names. Defaults to `['title', 'history']`. +- `var_attrs`: list of variable attribute names. Defaults to `['institution', 'source', 'references', 'comment']`. +- `ignored_vars`: list of ignored variables (regex patterns). Defaults to `['crs', 'spatial_ref']`. + +[:material-information-variant:](https://cfconventions.org/cf-conventions/cf-conventions.html#description-of-file-contents) + +Contained in: `all`-:material-lightning-bolt: + ### :material-lightbulb: `dataset-title-attr` Datasets should be given a non-empty title. @@ -67,6 +87,7 @@ Contained in: `all`-:material-lightning-bolt: `recommended`-:material-lightning ### :material-lightbulb: `var-units-attr` Every variable should have a valid 'units' attribute. +[:material-information-variant:](https://cfconventions.org/cf-conventions/cf-conventions.html#units) Contained in: `all`-:material-lightning-bolt: `recommended`-:material-alert: diff --git a/xrlint/plugins/core/rules/conventions.py b/xrlint/plugins/core/rules/conventions.py new file mode 100644 index 0000000..df2aa24 --- /dev/null +++ b/xrlint/plugins/core/rules/conventions.py @@ -0,0 +1,50 @@ +import re + + +from xrlint.node import DatasetNode +from xrlint.plugins.core.plugin import plugin +from xrlint.rule import RuleContext, RuleOp, RuleExit +from xrlint.util.schema import schema + +DEFAULT_ATTR_NAMES = ["Conventions", "title", "source", "history", "name"] + + +@plugin.define_rule( + "conventions", + version="1.0.0", + type="suggestion", + description=( + "Datasets should identify the applicable conventions" + " using the `Conventions` attribute.\n" + " The rule has an optional configuration parameter `match` which" + " is a regex pattern that the value of the `Conventions` attribute" + " must match, if any." + ), + docs_url=( + "https://cfconventions.org/cf-conventions/cf-conventions.html" + "#identification-of-conventions" + ), + schema=schema( + "object", properties=dict(match=schema("string", title="Regex pattern")) + ), +) +class Conventions(RuleOp): + def __init__(self, match: str | None = None): + self.match = re.compile(match) if match else None + + def dataset(self, ctx: RuleContext, node: DatasetNode): + if "Conventions" not in node.dataset.attrs: + ctx.report("Missing attribute 'Conventions'.") + else: + conventions_spec = node.dataset.attrs.get("Conventions") + if not isinstance(conventions_spec, str): + ctx.report( + f"Invalid attribute 'Conventions':" + f" expected string, got value {conventions_spec!r}." + ) + elif self.match is not None and not self.match.match(conventions_spec): + ctx.report( + f"Invalid attribute 'Conventions':" + f" {conventions_spec!r} doesn't match pattern {self.match.pattern!r}." + ) + raise RuleExit diff --git a/xrlint/plugins/core/rules/dataset_description.py b/xrlint/plugins/core/rules/dataset_description.py new file mode 100644 index 0000000..ba47786 --- /dev/null +++ b/xrlint/plugins/core/rules/dataset_description.py @@ -0,0 +1,86 @@ +import re + +from xrlint.node import DataArrayNode, DatasetNode +from xrlint.plugins.core.plugin import plugin +from xrlint.rule import RuleContext, RuleOp +from xrlint.util.schema import schema + +DEFAULT_GLOBAL_ATTRS = ["title", "history"] +DEFAULT_VAR_ATTRS = ["institution", "source", "references", "comment"] +DEFAULT_IGNORES = ["crs", "spatial_ref"] + + +@plugin.define_rule( + "dataset-description", + version="1.0.0", + type="suggestion", + description=( + "A dataset should provide information about where the data came" + " from and what has been done to it." + " This information is mainly for the benefit of human readers." + " The rule accepts the following configuration parameters:\n\n" + "- `global_attrs`: list of global attribute names." + f" Defaults to `{DEFAULT_GLOBAL_ATTRS}`.\n" + "- `var_attrs`: list of variable attribute names." + f" Defaults to `{DEFAULT_VAR_ATTRS}`.\n" + "- `ignored_vars`: list of ignored variables (regex patterns)." + f" Defaults to `{DEFAULT_IGNORES}`.\n" + "" + ), + docs_url=( + "https://cfconventions.org/cf-conventions/cf-conventions.html" + "#description-of-file-contents" + ), + schema=schema( + "object", + properties={ + "global_attrs": schema( + "array", + items=schema("string"), + default=DEFAULT_GLOBAL_ATTRS, + title="Global dataset attribute names", + ), + "var_attrs": schema( + "array", + items=schema("string"), + default=DEFAULT_VAR_ATTRS, + title="Data variable attribute names", + ), + "ignored_vars": schema( + "array", + items=schema("string"), + default=DEFAULT_IGNORES, + title="Ignored variables (regex name patterns)", + ), + }, + ), +) +class DatasetDescription(RuleOp): + def __init__( + self, global_attrs: list[str] | None = None, var_attrs: list[str] | None = None + ): + self.global_attrs = global_attrs if global_attrs else DEFAULT_GLOBAL_ATTRS + self.var_attrs = var_attrs if var_attrs else DEFAULT_VAR_ATTRS + self.ignored_vars = [ + re.compile(p) for p in (var_attrs if var_attrs else DEFAULT_VAR_ATTRS) + ] + + def dataset(self, ctx: RuleContext, node: DatasetNode): + dataset = node.dataset + for attr_name in self.global_attrs: + if attr_name not in dataset.attrs: + ctx.report(f"Missing dataset attribute {attr_name!r}.") + + def data_array(self, ctx: RuleContext, node: DataArrayNode): + dataset = ctx.dataset + if node.name not in dataset.data_vars: + return + + for m in self.ignored_vars: + if m.match(str(node.name)): + return + + var = node.data_array + for attr_name in self.var_attrs: + if attr_name not in var.attrs and attr_name not in dataset.attrs: + ctx.report(f"Missing data variable attribute {attr_name!r}.") diff --git a/xrlint/plugins/core/rules/var_units_attr.py b/xrlint/plugins/core/rules/var_units_attr.py index 81d2ba7..40780e9 100644 --- a/xrlint/plugins/core/rules/var_units_attr.py +++ b/xrlint/plugins/core/rules/var_units_attr.py @@ -8,6 +8,7 @@ version="1.0.0", type="suggestion", description="Every variable should have a valid 'units' attribute.", + docs_url="https://cfconventions.org/cf-conventions/cf-conventions.html#units", ) class VarUnitsAttr(RuleOp): def data_array(self, ctx: RuleContext, node: DataArrayNode): From 88fb2cfb29a2598c87f64f63e21684988bbfa27b Mon Sep 17 00:00:00 2001 From: Norman Fomferra Date: Thu, 30 Jan 2025 13:05:35 +0100 Subject: [PATCH 2/3] new rules and rule renaming --- CHANGES.md | 11 ++ docs/todo.md | 7 ++ tests/plugins/core/rules/test_content_desc.py | 98 ++++++++++++++++ tests/plugins/core/rules/test_conventions.py | 37 ++++++ tests/plugins/core/rules/test_var_desc.py | 92 +++++++++++++++ .../{test_flags.py => test_var_flags.py} | 8 +- ...st_var_units_attr.py => test_var_units.py} | 8 +- tests/plugins/core/test_plugin.py | 7 +- tests/test_config.py | 2 +- xrlint/plugins/core/__init__.py | 7 +- xrlint/plugins/core/rules/content_desc.py | 107 ++++++++++++++++++ xrlint/plugins/core/rules/conventions.py | 23 ++-- .../plugins/core/rules/dataset_description.py | 86 -------------- xrlint/plugins/core/rules/var_desc.py | 48 ++++++++ .../core/rules/{flags.py => var_flags.py} | 4 +- .../rules/{var_units_attr.py => var_units.py} | 6 +- xrlint/testing.py | 29 ++++- 17 files changed, 461 insertions(+), 119 deletions(-) create mode 100644 tests/plugins/core/rules/test_content_desc.py create mode 100644 tests/plugins/core/rules/test_conventions.py create mode 100644 tests/plugins/core/rules/test_var_desc.py rename tests/plugins/core/rules/{test_flags.py => test_var_flags.py} (96%) rename tests/plugins/core/rules/{test_var_units_attr.py => test_var_units.py} (85%) create mode 100644 xrlint/plugins/core/rules/content_desc.py delete mode 100644 xrlint/plugins/core/rules/dataset_description.py create mode 100644 xrlint/plugins/core/rules/var_desc.py rename xrlint/plugins/core/rules/{flags.py => var_flags.py} (99%) rename xrlint/plugins/core/rules/{var_units_attr.py => var_units.py} (86%) diff --git a/CHANGES.md b/CHANGES.md index 041fa6c..35ec2cf 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -2,6 +2,17 @@ ## Version 0.4.1 (in development) +### Changes + +- Added core rule `conventions` that checks for the `Conventions`attribute. +- Added core rule `context-descr` that checks content description +- Added core rule `var-descr` that checks data variable description +- Renamed rules for consistency: + - `var-units-attrs` and `var-units` + - `flags` into `var-flags` + +### Fixes + - Fixed an issue that prevented recursively traversing folders referred to by URLs (such as `s3:////`) rather than local directory paths. (#39) diff --git a/docs/todo.md b/docs/todo.md index df802f0..2e24ad7 100644 --- a/docs/todo.md +++ b/docs/todo.md @@ -10,6 +10,13 @@ - use mkdocstrings ref syntax in docstrings - provide configuration examples (use as tests?) - add `docs_url` to all existing rules +- API changes for v0.5: + - clarify when users can pass configuration objects like values + and when configuration like values + - config class naming is confusing, + change `Config` -> `ConfigObject`, `ConfigList` -> `Config` + - Change `verify` -> `validate`, + prefix `RuleOp` methods by `validate_` for clarity. ## Desired diff --git a/tests/plugins/core/rules/test_content_desc.py b/tests/plugins/core/rules/test_content_desc.py new file mode 100644 index 0000000..10806b0 --- /dev/null +++ b/tests/plugins/core/rules/test_content_desc.py @@ -0,0 +1,98 @@ +import xarray as xr + +from xrlint.plugins.core.rules.content_desc import ContentDesc +from xrlint.testing import RuleTest, RuleTester + +global_attrs = dict( + title="OC-Climatology", + history="2025-01-26: created", +) + +common_attrs = dict( + institution="ESA", + source="a.nc; b.nc", + references="!", + comment="?", +) + +all_attrs = global_attrs | common_attrs + +time_coord = xr.DataArray( + [1, 2, 3], dims="time", attrs=dict(units="days since 2025-01-01") +) + +valid_dataset_0 = xr.Dataset( + attrs=all_attrs, + data_vars=dict(chl=xr.DataArray([1, 2, 3], dims="time", attrs=dict())), + coords=dict(time=time_coord), +) +valid_dataset_1 = xr.Dataset( + attrs=global_attrs, + data_vars=dict(chl=xr.DataArray([1, 2, 3], dims="time", attrs=common_attrs)), + coords=dict(time=time_coord), +) +valid_dataset_1a = xr.Dataset( + attrs=global_attrs, + data_vars=dict( + chl=xr.DataArray([1, 2, 3], dims="time", attrs=common_attrs), + crs=xr.DataArray(0, attrs=dict(grid_mapping_name="...")), + ), + coords=dict(time=time_coord), +) +valid_dataset_1b = xr.Dataset( + attrs=global_attrs, + data_vars=dict( + chl=xr.DataArray([1, 2, 3], dims="time", attrs=common_attrs), + chl_unc=xr.DataArray(0, attrs=dict(units="...")), + ), + coords=dict(time=time_coord), +) +valid_dataset_2 = xr.Dataset( + attrs=global_attrs, + data_vars=dict(chl=xr.DataArray([1, 2, 3], dims="time", attrs=dict())), + coords=dict(time=time_coord), +) +valid_dataset_3 = xr.Dataset( + attrs=global_attrs, + data_vars=dict( + chl=xr.DataArray([1, 2, 3], dims="time", attrs=dict(description="Bla!")) + ), + coords=dict(time=time_coord), +) + +invalid_dataset_0 = xr.Dataset() +invalid_dataset_1 = xr.Dataset( + attrs=dict(), + data_vars=dict(chl=xr.DataArray([1, 2, 3], dims="time", attrs=dict())), + coords=dict(time=time_coord), +) +invalid_dataset_2 = xr.Dataset( + attrs=global_attrs, + data_vars=dict(chl=xr.DataArray([1, 2, 3], dims="time", attrs=dict())), + coords=dict(time=time_coord), +) + +ContentDescTest = RuleTester.define_test( + "content-desc", + ContentDesc, + valid=[ + RuleTest(dataset=valid_dataset_0, name="0"), + RuleTest(dataset=valid_dataset_1, name="1"), + RuleTest(dataset=valid_dataset_1a, name="1a"), + RuleTest( + dataset=valid_dataset_1b, name="1b", kwargs={"ignored_vars": ["chl_unc"]} + ), + RuleTest(dataset=valid_dataset_2, name="2", kwargs={"commons": []}), + RuleTest( + dataset=valid_dataset_2, name="2", kwargs={"commons": [], "skip_vars": True} + ), + RuleTest( + dataset=valid_dataset_3, name="3", kwargs={"commons": ["description"]} + ), + ], + invalid=[ + RuleTest(dataset=invalid_dataset_0, expected=2), + RuleTest(dataset=invalid_dataset_1, expected=6), + RuleTest(dataset=invalid_dataset_2, kwargs={"skip_vars": True}, expected=4), + ], +) diff --git a/tests/plugins/core/rules/test_conventions.py b/tests/plugins/core/rules/test_conventions.py new file mode 100644 index 0000000..0fb214b --- /dev/null +++ b/tests/plugins/core/rules/test_conventions.py @@ -0,0 +1,37 @@ +import xarray as xr + +from xrlint.plugins.core.rules.conventions import Conventions +from xrlint.testing import RuleTest, RuleTester + +valid_dataset_0 = xr.Dataset(attrs=dict(Conventions="CF-1.10")) + +invalid_dataset_0 = xr.Dataset() +invalid_dataset_1 = xr.Dataset(attrs=dict(Conventions=1.12)) +invalid_dataset_2 = xr.Dataset(attrs=dict(Conventions="CF 1.10")) + + +ConventionsTest = RuleTester.define_test( + "conventions", + Conventions, + valid=[ + RuleTest(dataset=valid_dataset_0), + RuleTest(dataset=valid_dataset_0, kwargs={"match": r"CF-.*"}), + ], + invalid=[ + RuleTest( + dataset=invalid_dataset_0, + expected=["Missing attribute 'Conventions'."], + ), + RuleTest( + dataset=invalid_dataset_1, + expected=["Invalid attribute 'Conventions': 1.12."], + ), + RuleTest( + dataset=invalid_dataset_2, + kwargs={"match": r"CF-.*"}, + expected=[ + "Invalid attribute 'Conventions': 'CF 1.10' doesn't match 'CF-.*'." + ], + ), + ], +) diff --git a/tests/plugins/core/rules/test_var_desc.py b/tests/plugins/core/rules/test_var_desc.py new file mode 100644 index 0000000..362b8c0 --- /dev/null +++ b/tests/plugins/core/rules/test_var_desc.py @@ -0,0 +1,92 @@ +import xarray as xr + +from xrlint.plugins.core.rules.var_desc import VarDesc +from xrlint.testing import RuleTest, RuleTester + +pressure_attrs = dict( + long_name="mean sea level pressure", + units="hPa", + standard_name="air_pressure_at_sea_level", +) + +time_coord = xr.DataArray( + [1, 2, 3], dims="time", attrs=dict(units="days since 2025-01-01") +) + +valid_dataset_0 = xr.Dataset( + coords=dict(time=time_coord), +) +valid_dataset_1 = xr.Dataset( + data_vars=dict(pressure=xr.DataArray([1, 2, 3], dims="time", attrs=pressure_attrs)), + coords=dict(time=time_coord), +) +valid_dataset_2 = xr.Dataset( + data_vars=dict( + chl=xr.DataArray( + [1, 2, 3], dims="time", attrs=dict(description="It is air pressure") + ) + ), + coords=dict(time=time_coord), +) + +invalid_dataset_0 = xr.Dataset( + attrs=dict(), + data_vars=dict(chl=xr.DataArray([1, 2, 3], dims="time", attrs=dict())), + coords=dict(time=time_coord), +) + +invalid_dataset_1 = xr.Dataset( + attrs=dict(), + data_vars=dict( + chl=xr.DataArray( + [1, 2, 3], + dims="time", + attrs=dict(standard_name="air_pressure_at_sea_level"), + ) + ), + coords=dict(time=time_coord), +) +invalid_dataset_2 = xr.Dataset( + attrs=dict(), + data_vars=dict( + chl=xr.DataArray( + [1, 2, 3], dims="time", attrs=dict(long_name="mean sea level pressure") + ) + ), + coords=dict(time=time_coord), +) +invalid_dataset_3 = xr.Dataset( + attrs=dict(), + data_vars=dict(chl=xr.DataArray([1, 2, 3], dims="time", attrs=pressure_attrs)), + coords=dict(time=time_coord), +) + +VarDescTest = RuleTester.define_test( + "var-desc", + VarDesc, + valid=[ + RuleTest(dataset=valid_dataset_0), + RuleTest(dataset=valid_dataset_1), + RuleTest(dataset=valid_dataset_2, kwargs={"attrs": ["description"]}), + ], + invalid=[ + RuleTest( + dataset=invalid_dataset_0, + expected=[ + "Missing attribute 'standard_name'.", + "Missing attribute 'long_name'.", + ], + ), + RuleTest( + dataset=invalid_dataset_1, expected=["Missing attribute 'long_name'."] + ), + RuleTest( + dataset=invalid_dataset_2, expected=["Missing attribute 'standard_name'."] + ), + RuleTest( + dataset=invalid_dataset_3, + kwargs={"attrs": ["description"]}, + expected=["Missing attribute 'description'."], + ), + ], +) diff --git a/tests/plugins/core/rules/test_flags.py b/tests/plugins/core/rules/test_var_flags.py similarity index 96% rename from tests/plugins/core/rules/test_flags.py rename to tests/plugins/core/rules/test_var_flags.py index 8b25e94..95e8622 100644 --- a/tests/plugins/core/rules/test_flags.py +++ b/tests/plugins/core/rules/test_var_flags.py @@ -1,7 +1,7 @@ import numpy as np import xarray as xr -from xrlint.plugins.core.rules.flags import Flags +from xrlint.plugins.core.rules.var_flags import VarFlags from xrlint.testing import RuleTest, RuleTester valid_dataset_0 = xr.Dataset() @@ -73,9 +73,9 @@ np.float64 ) -FlagsTest = RuleTester.define_test( - "flags", - Flags, +VarFlagsTest = RuleTester.define_test( + "var-flags", + VarFlags, valid=[ RuleTest(dataset=valid_dataset_0), RuleTest(dataset=valid_dataset_1), diff --git a/tests/plugins/core/rules/test_var_units_attr.py b/tests/plugins/core/rules/test_var_units.py similarity index 85% rename from tests/plugins/core/rules/test_var_units_attr.py rename to tests/plugins/core/rules/test_var_units.py index a33620a..e5bd9d5 100644 --- a/tests/plugins/core/rules/test_var_units_attr.py +++ b/tests/plugins/core/rules/test_var_units.py @@ -1,6 +1,6 @@ import xarray as xr -from xrlint.plugins.core.rules.var_units_attr import VarUnitsAttr +from xrlint.plugins.core.rules.var_units import VarUnits from xrlint.testing import RuleTest, RuleTester valid_dataset_1 = xr.Dataset() @@ -19,9 +19,9 @@ invalid_dataset_3.v.attrs = {"units": 1} -VarUnitsAttrTest = RuleTester.define_test( - "var-units-attr", - VarUnitsAttr, +VarUnitsTest = RuleTester.define_test( + "var-units", + VarUnits, valid=[ RuleTest(dataset=valid_dataset_1), RuleTest(dataset=valid_dataset_2), diff --git a/tests/plugins/core/test_plugin.py b/tests/plugins/core/test_plugin.py index 057ef4d..c6d69b6 100644 --- a/tests/plugins/core/test_plugin.py +++ b/tests/plugins/core/test_plugin.py @@ -8,16 +8,19 @@ def test_rules_complete(self): plugin = export_plugin() self.assertEqual( { + "content-desc", + "conventions", "coords-for-dims", "dataset-title-attr", - "flags", "grid-mappings", "lat-coordinate", "lon-coordinate", "no-empty-attrs", "time-coordinate", "no-empty-chunks", - "var-units-attr", + "var-desc", + "var-flags", + "var-units", }, set(plugin.rules.keys()), ) diff --git a/tests/test_config.py b/tests/test_config.py index 650f86f..59a2e3f 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -40,7 +40,7 @@ def test_get_plugin(self): def test_get_rule(self): config = get_core_config() - rule = config.get_rule("flags") + rule = config.get_rule("var-flags") self.assertIsInstance(rule, Rule) with pytest.raises(ValueError, match="unknown rule 'foo'"): diff --git a/xrlint/plugins/core/__init__.py b/xrlint/plugins/core/__init__.py index 5a69302..916dec7 100644 --- a/xrlint/plugins/core/__init__.py +++ b/xrlint/plugins/core/__init__.py @@ -12,16 +12,19 @@ def export_plugin() -> Plugin: { "name": "recommended", "rules": { + "content-desc": "warn", + "conventions": "warn", "coords-for-dims": "error", "dataset-title-attr": "warn", - "flags": "error", "grid-mappings": "error", "lat-coordinate": "error", "lon-coordinate": "error", "no-empty-attrs": "warn", "no-empty-chunks": "warn", "time-coordinate": "error", - "var-units-attr": "warn", + "var-desc": "warn", + "var-flags": "error", + "var-units": "warn", }, }, ) diff --git a/xrlint/plugins/core/rules/content_desc.py b/xrlint/plugins/core/rules/content_desc.py new file mode 100644 index 0000000..bfa2246 --- /dev/null +++ b/xrlint/plugins/core/rules/content_desc.py @@ -0,0 +1,107 @@ +import re + +from xrlint.node import DataArrayNode, DatasetNode +from xrlint.plugins.core.plugin import plugin +from xrlint.rule import RuleContext, RuleOp, RuleExit +from xrlint.util.schema import schema + +DEFAULT_GLOBAL_ATTRS = ["title", "history"] +DEFAULT_COMMON_ATTRS = ["institution", "source", "references", "comment"] +DEFAULT_SKIP_VARS = False +# for the case that these are data vars by accident +DEFAULT_IGNORED_VARS = ["crs", "spatial_ref"] + + +@plugin.define_rule( + "content-desc", + version="1.0.0", + type="suggestion", + description=( + "A dataset should provide information about where the data came" + " from and what has been done to it." + " This information is mainly for the benefit of human readers." + " The rule accepts the following configuration parameters:\n\n" + "- `globals`: list of names of required global attributes." + f" Defaults to `{DEFAULT_GLOBAL_ATTRS}`.\n" + "- `commons`: list of names of required variable attributes" + " that can also be defined globally." + f" Defaults to `{DEFAULT_COMMON_ATTRS}`.\n" + "- `no_vars`: do not check variables at all." + f" Defaults to `{DEFAULT_SKIP_VARS}`.\n" + "- `ignored_vars`: list of ignored variables (regex patterns)." + f" Defaults to `{DEFAULT_IGNORED_VARS}`.\n" + "" + ), + docs_url=( + "https://cfconventions.org/cf-conventions/cf-conventions.html" + "#description-of-file-contents" + ), + schema=schema( + "object", + properties={ + "globals": schema( + "array", + items=schema("string"), + default=DEFAULT_GLOBAL_ATTRS, + title="Global attribute names", + ), + "commons": schema( + "array", + items=schema("string"), + default=DEFAULT_COMMON_ATTRS, + title="Common attribute names", + ), + "skip_vars": schema( + "boolean", + default=DEFAULT_SKIP_VARS, + title="Do not check variables", + ), + "ignored_vars": schema( + "array", + items=schema("string"), + default=DEFAULT_IGNORED_VARS, + title="Ignored variables (regex name patterns)", + ), + }, + ), +) +class ContentDesc(RuleOp): + def __init__(self, **params): + self.global_attrs = params.get("globals", DEFAULT_GLOBAL_ATTRS) + self.common_attrs = params.get("commons", DEFAULT_COMMON_ATTRS) + self.skip_vars = params.get("skip_vars", DEFAULT_SKIP_VARS) + self.ignored_vars = [ + re.compile(p) for p in params.get("ignored_vars", DEFAULT_IGNORED_VARS) + ] + + def dataset(self, ctx: RuleContext, node: DatasetNode): + dataset_attrs = node.dataset.attrs + attr_names = ( + self.global_attrs + self.common_attrs + if self.skip_vars + else self.global_attrs + ) + for attr_name in attr_names: + if attr_name not in dataset_attrs: + ctx.report(f"Missing attribute {attr_name!r}.") + + def data_array(self, ctx: RuleContext, node: DataArrayNode): + if self.skip_vars: + # Since dataset() has already been processed, + # no need to check other nodes. + raise RuleExit + + if node.name not in ctx.dataset.data_vars: + # Not a data variable + return + + for m in self.ignored_vars: + if m.match(str(node.name)): + # Ignored variable + return + + var_attrs = node.data_array.attrs + dataset_attrs = ctx.dataset.attrs + for attr_name in self.common_attrs: + if attr_name not in var_attrs and attr_name not in dataset_attrs: + ctx.report(f"Missing attribute {attr_name!r}.") diff --git a/xrlint/plugins/core/rules/conventions.py b/xrlint/plugins/core/rules/conventions.py index df2aa24..9da49f2 100644 --- a/xrlint/plugins/core/rules/conventions.py +++ b/xrlint/plugins/core/rules/conventions.py @@ -6,8 +6,6 @@ from xrlint.rule import RuleContext, RuleOp, RuleExit from xrlint.util.schema import schema -DEFAULT_ATTR_NAMES = ["Conventions", "title", "source", "history", "name"] - @plugin.define_rule( "conventions", @@ -18,14 +16,18 @@ " using the `Conventions` attribute.\n" " The rule has an optional configuration parameter `match` which" " is a regex pattern that the value of the `Conventions` attribute" - " must match, if any." + " must match, if any. If not provided, the rule just verifies" + " that the attribute exists and whether it is a character string." ), docs_url=( "https://cfconventions.org/cf-conventions/cf-conventions.html" "#identification-of-conventions" ), schema=schema( - "object", properties=dict(match=schema("string", title="Regex pattern")) + "object", + properties={ + "match": schema("string", title="Regex pattern"), + }, ), ) class Conventions(RuleOp): @@ -36,15 +38,12 @@ def dataset(self, ctx: RuleContext, node: DatasetNode): if "Conventions" not in node.dataset.attrs: ctx.report("Missing attribute 'Conventions'.") else: - conventions_spec = node.dataset.attrs.get("Conventions") - if not isinstance(conventions_spec, str): - ctx.report( - f"Invalid attribute 'Conventions':" - f" expected string, got value {conventions_spec!r}." - ) - elif self.match is not None and not self.match.match(conventions_spec): + value = node.dataset.attrs.get("Conventions") + if not isinstance(value, str) and value: + ctx.report(f"Invalid attribute 'Conventions': {value!r}.") + elif self.match is not None and not self.match.match(value): ctx.report( f"Invalid attribute 'Conventions':" - f" {conventions_spec!r} doesn't match pattern {self.match.pattern!r}." + f" {value!r} doesn't match {self.match.pattern!r}." ) raise RuleExit diff --git a/xrlint/plugins/core/rules/dataset_description.py b/xrlint/plugins/core/rules/dataset_description.py deleted file mode 100644 index ba47786..0000000 --- a/xrlint/plugins/core/rules/dataset_description.py +++ /dev/null @@ -1,86 +0,0 @@ -import re - -from xrlint.node import DataArrayNode, DatasetNode -from xrlint.plugins.core.plugin import plugin -from xrlint.rule import RuleContext, RuleOp -from xrlint.util.schema import schema - -DEFAULT_GLOBAL_ATTRS = ["title", "history"] -DEFAULT_VAR_ATTRS = ["institution", "source", "references", "comment"] -DEFAULT_IGNORES = ["crs", "spatial_ref"] - - -@plugin.define_rule( - "dataset-description", - version="1.0.0", - type="suggestion", - description=( - "A dataset should provide information about where the data came" - " from and what has been done to it." - " This information is mainly for the benefit of human readers." - " The rule accepts the following configuration parameters:\n\n" - "- `global_attrs`: list of global attribute names." - f" Defaults to `{DEFAULT_GLOBAL_ATTRS}`.\n" - "- `var_attrs`: list of variable attribute names." - f" Defaults to `{DEFAULT_VAR_ATTRS}`.\n" - "- `ignored_vars`: list of ignored variables (regex patterns)." - f" Defaults to `{DEFAULT_IGNORES}`.\n" - "" - ), - docs_url=( - "https://cfconventions.org/cf-conventions/cf-conventions.html" - "#description-of-file-contents" - ), - schema=schema( - "object", - properties={ - "global_attrs": schema( - "array", - items=schema("string"), - default=DEFAULT_GLOBAL_ATTRS, - title="Global dataset attribute names", - ), - "var_attrs": schema( - "array", - items=schema("string"), - default=DEFAULT_VAR_ATTRS, - title="Data variable attribute names", - ), - "ignored_vars": schema( - "array", - items=schema("string"), - default=DEFAULT_IGNORES, - title="Ignored variables (regex name patterns)", - ), - }, - ), -) -class DatasetDescription(RuleOp): - def __init__( - self, global_attrs: list[str] | None = None, var_attrs: list[str] | None = None - ): - self.global_attrs = global_attrs if global_attrs else DEFAULT_GLOBAL_ATTRS - self.var_attrs = var_attrs if var_attrs else DEFAULT_VAR_ATTRS - self.ignored_vars = [ - re.compile(p) for p in (var_attrs if var_attrs else DEFAULT_VAR_ATTRS) - ] - - def dataset(self, ctx: RuleContext, node: DatasetNode): - dataset = node.dataset - for attr_name in self.global_attrs: - if attr_name not in dataset.attrs: - ctx.report(f"Missing dataset attribute {attr_name!r}.") - - def data_array(self, ctx: RuleContext, node: DataArrayNode): - dataset = ctx.dataset - if node.name not in dataset.data_vars: - return - - for m in self.ignored_vars: - if m.match(str(node.name)): - return - - var = node.data_array - for attr_name in self.var_attrs: - if attr_name not in var.attrs and attr_name not in dataset.attrs: - ctx.report(f"Missing data variable attribute {attr_name!r}.") diff --git a/xrlint/plugins/core/rules/var_desc.py b/xrlint/plugins/core/rules/var_desc.py new file mode 100644 index 0000000..4d2ea77 --- /dev/null +++ b/xrlint/plugins/core/rules/var_desc.py @@ -0,0 +1,48 @@ +from xrlint.node import DataArrayNode +from xrlint.plugins.core.plugin import plugin +from xrlint.rule import RuleContext, RuleOp +from xrlint.util.schema import schema + +DEFAULT_ATTRS = ["standard_name", "long_name"] + + +@plugin.define_rule( + "var-desc", + version="1.0.0", + type="suggestion", + description=( + "Check that each data variable provides an" + " identification and description of the content." + " The rule can be configured by parameter `attrs` which is a list" + " of names of attributes that provides descriptive information." + f" It defaults to `{DEFAULT_ATTRS}`." + "" + ), + docs_url=( + "https://cfconventions.org/cf-conventions/cf-conventions.html#standard-name" + ), + schema=schema( + "object", + properties={ + "attrs": schema( + "array", + items=schema("string"), + default=DEFAULT_ATTRS, + title="Attribute names to check", + ), + }, + ), +) +class VarDesc(RuleOp): + def __init__(self, attrs: list[str] | None = None): + self._attrs = attrs if attrs is not None else DEFAULT_ATTRS + + def data_array(self, ctx: RuleContext, node: DataArrayNode): + if node.name not in ctx.dataset.data_vars: + # This rule applies to data variables only + return + + var_attrs = node.data_array.attrs + for attr_name in self._attrs: + if attr_name not in var_attrs: + ctx.report(f"Missing attribute {attr_name!r}.") diff --git a/xrlint/plugins/core/rules/flags.py b/xrlint/plugins/core/rules/var_flags.py similarity index 99% rename from xrlint/plugins/core/rules/flags.py rename to xrlint/plugins/core/rules/var_flags.py index 2fede26..a199357 100644 --- a/xrlint/plugins/core/rules/flags.py +++ b/xrlint/plugins/core/rules/var_flags.py @@ -12,7 +12,7 @@ @plugin.define_rule( - "flags", + "var-flags", version="1.0.0", type="suggestion", description=( @@ -21,7 +21,7 @@ ), docs_url="https://cfconventions.org/cf-conventions/cf-conventions.html#flags", ) -class Flags(RuleOp): +class VarFlags(RuleOp): def data_array(self, ctx: RuleContext, node: DataArrayNode): flag_values = node.data_array.attrs.get(FLAG_VALUES) flag_masks = node.data_array.attrs.get(FLAG_MASKS) diff --git a/xrlint/plugins/core/rules/var_units_attr.py b/xrlint/plugins/core/rules/var_units.py similarity index 86% rename from xrlint/plugins/core/rules/var_units_attr.py rename to xrlint/plugins/core/rules/var_units.py index 40780e9..0a4350d 100644 --- a/xrlint/plugins/core/rules/var_units_attr.py +++ b/xrlint/plugins/core/rules/var_units.py @@ -4,13 +4,13 @@ @plugin.define_rule( - "var-units-attr", + "var-units", version="1.0.0", type="suggestion", - description="Every variable should have a valid 'units' attribute.", + description="Every variable should provide a description of its units.", docs_url="https://cfconventions.org/cf-conventions/cf-conventions.html#units", ) -class VarUnitsAttr(RuleOp): +class VarUnits(RuleOp): def data_array(self, ctx: RuleContext, node: DataArrayNode): data_array = node.data_array units = data_array.attrs.get("units") diff --git a/xrlint/testing.py b/xrlint/testing.py index 0607ef4..26825c3 100644 --- a/xrlint/testing.py +++ b/xrlint/testing.py @@ -118,13 +118,36 @@ def _create_tests( valid: list[RuleTest] | None, invalid: list[RuleTest] | None, ) -> dict[str, Callable[[unittest.TestCase | None], None]]: - def make_args(tests: list[RuleTest] | None, mode: Literal["valid", "invalid"]): - return [(test, index, mode) for index, test in enumerate(tests or [])] + if hasattr(rule_op_class, "meta"): + tests = dict([self._create_name_test(rule_name, rule_op_class)]) + else: + tests = {} + + def make_args(checks: list[RuleTest] | None, mode: Literal["valid", "invalid"]): + return [(check, index, mode) for index, check in enumerate(checks or [])] - return dict( + tests |= dict( self._create_test(rule_name, rule_op_class, *args) for args in make_args(valid, "valid") + make_args(invalid, "invalid") ) + return tests + + # noinspection PyMethodMayBeStatic + def _create_name_test( + self, + rule_name: str, + rule_op_class: Type[RuleOp], + ) -> tuple[str, Callable]: + test_id = "test_rule_meta" + + def test_fn(_self: unittest.TestCase): + rule_meta: RuleMeta = getattr(rule_op_class, "meta") + assert rule_meta.name == rule_name, ( + f"rule name expected to be {rule_name!r}, but was {rule_meta.name!r}" + ) + + test_fn.__name__ = test_id + return test_id, test_fn def _create_test( self, From 0da431083256a9524fce5874a810e8319a57b270 Mon Sep 17 00:00:00 2001 From: Norman Fomferra Date: Thu, 30 Jan 2025 13:08:07 +0100 Subject: [PATCH 3/3] formatting --- docs/rule-ref.md | 54 +++++++++++++---------- xrlint/cli/engine.py | 2 +- xrlint/plugins/core/rules/content_desc.py | 2 +- xrlint/plugins/core/rules/conventions.py | 3 +- 4 files changed, 34 insertions(+), 27 deletions(-) diff --git a/docs/rule-ref.md b/docs/rule-ref.md index 17c1289..4b9295b 100644 --- a/docs/rule-ref.md +++ b/docs/rule-ref.md @@ -5,13 +5,26 @@ New rules will be added by upcoming XRLint releases. ## Core Rules +### :material-lightbulb: `content-desc` + +A dataset should provide information about where the data came from and what has been done to it. This information is mainly for the benefit of human readers. The rule accepts the following configuration parameters: + +- `globals`: list of names of required global attributes. Defaults to `['title', 'history']`. +- `commons`: list of names of required variable attributes that can also be defined globally. Defaults to `['institution', 'source', 'references', 'comment']`. +- `no_vars`: do not check variables at all. Defaults to `False`. +- `ignored_vars`: list of ignored variables (regex patterns). Defaults to `['crs', 'spatial_ref']`. + +[:material-information-variant:](https://cfconventions.org/cf-conventions/cf-conventions.html#description-of-file-contents) + +Contained in: `all`-:material-lightning-bolt: `recommended`-:material-alert: + ### :material-lightbulb: `conventions` Datasets should identify the applicable conventions using the `Conventions` attribute. - The rule has an optional configuration parameter `match` which is a regex pattern that the value of the `Conventions` attribute must match, if any. + The rule has an optional configuration parameter `match` which is a regex pattern that the value of the `Conventions` attribute must match, if any. If not provided, the rule just verifies that the attribute exists and whether it is a character string. [:material-information-variant:](https://cfconventions.org/cf-conventions/cf-conventions.html#identification-of-conventions) -Contained in: `all`-:material-lightning-bolt: +Contained in: `all`-:material-lightning-bolt: `recommended`-:material-alert: ### :material-bug: `coords-for-dims` @@ -19,31 +32,12 @@ Dimensions of data variables should have corresponding coordinates. Contained in: `all`-:material-lightning-bolt: `recommended`-:material-lightning-bolt: -### :material-lightbulb: `dataset-description` - -A dataset should provide information about where the data came from and what has been done to it. This information is mainly for the benefit of human readers. The rule accepts the following configuration parameters: - -- `global_attrs`: list of global attribute names. Defaults to `['title', 'history']`. -- `var_attrs`: list of variable attribute names. Defaults to `['institution', 'source', 'references', 'comment']`. -- `ignored_vars`: list of ignored variables (regex patterns). Defaults to `['crs', 'spatial_ref']`. - -[:material-information-variant:](https://cfconventions.org/cf-conventions/cf-conventions.html#description-of-file-contents) - -Contained in: `all`-:material-lightning-bolt: - ### :material-lightbulb: `dataset-title-attr` Datasets should be given a non-empty title. Contained in: `all`-:material-lightning-bolt: `recommended`-:material-alert: -### :material-lightbulb: `flags` - -Validate attributes 'flag_values', 'flag_masks' and 'flag_meanings' that make variables that contain flag values self describing. -[:material-information-variant:](https://cfconventions.org/cf-conventions/cf-conventions.html#flags) - -Contained in: `all`-:material-lightning-bolt: `recommended`-:material-lightning-bolt: - ### :material-bug: `grid-mappings` Grid mappings, if any, shall have valid grid mapping coordinate variables. @@ -84,9 +78,23 @@ Time coordinates should have valid and unambiguous time units encoding. Contained in: `all`-:material-lightning-bolt: `recommended`-:material-lightning-bolt: -### :material-lightbulb: `var-units-attr` +### :material-lightbulb: `var-desc` + +Check that each data variable provides an identification and description of the content. The rule can be configured by parameter `attrs` which is a list of names of attributes that provides descriptive information. It defaults to `['standard_name', 'long_name']`. +[:material-information-variant:](https://cfconventions.org/cf-conventions/cf-conventions.html#standard-name) + +Contained in: `all`-:material-lightning-bolt: `recommended`-:material-alert: + +### :material-lightbulb: `var-flags` + +Validate attributes 'flag_values', 'flag_masks' and 'flag_meanings' that make variables that contain flag values self describing. +[:material-information-variant:](https://cfconventions.org/cf-conventions/cf-conventions.html#flags) + +Contained in: `all`-:material-lightning-bolt: `recommended`-:material-lightning-bolt: + +### :material-lightbulb: `var-units` -Every variable should have a valid 'units' attribute. +Every variable should provide a description of its units. [:material-information-variant:](https://cfconventions.org/cf-conventions/cf-conventions.html#units) Contained in: `all`-:material-lightning-bolt: `recommended`-:material-alert: diff --git a/xrlint/cli/engine.py b/xrlint/cli/engine.py index 33366fa..320e645 100644 --- a/xrlint/cli/engine.py +++ b/xrlint/cli/engine.py @@ -163,7 +163,7 @@ def get_files(self, file_paths: Iterable[str]) -> Iterator[tuple[str, Config]]: Returns: An iterator of pairs comprising a file or directory path - and its computed configuration. + and its computed configuration. """ config_list, global_filter = self.config_list.split_global_filter( default=DEFAULT_GLOBAL_FILTER diff --git a/xrlint/plugins/core/rules/content_desc.py b/xrlint/plugins/core/rules/content_desc.py index bfa2246..1473d17 100644 --- a/xrlint/plugins/core/rules/content_desc.py +++ b/xrlint/plugins/core/rules/content_desc.py @@ -2,7 +2,7 @@ from xrlint.node import DataArrayNode, DatasetNode from xrlint.plugins.core.plugin import plugin -from xrlint.rule import RuleContext, RuleOp, RuleExit +from xrlint.rule import RuleContext, RuleExit, RuleOp from xrlint.util.schema import schema DEFAULT_GLOBAL_ATTRS = ["title", "history"] diff --git a/xrlint/plugins/core/rules/conventions.py b/xrlint/plugins/core/rules/conventions.py index 9da49f2..ed184e6 100644 --- a/xrlint/plugins/core/rules/conventions.py +++ b/xrlint/plugins/core/rules/conventions.py @@ -1,9 +1,8 @@ import re - from xrlint.node import DatasetNode from xrlint.plugins.core.plugin import plugin -from xrlint.rule import RuleContext, RuleOp, RuleExit +from xrlint.rule import RuleContext, RuleExit, RuleOp from xrlint.util.schema import schema