From 3cb9a4790236c3f6ee0fd87916416bbe0b5ba7e8 Mon Sep 17 00:00:00 2001 From: Norman Fomferra Date: Thu, 30 Jan 2025 13:38:03 +0100 Subject: [PATCH 1/3] verify --> validate --- CHANGES.md | 29 ++++++++++++++--------- docs/start.md | 2 +- docs/todo.md | 23 +++++++----------- examples/check_s3_bucket.py | 2 +- notebooks/xrlint-linter.ipynb | 8 +++---- tests/test_linter.py | 28 ++++++++++------------ xrlint/_linter/{verify.py => validate.py} | 14 +++++------ xrlint/cli/engine.py | 6 ++--- xrlint/cli/main.py | 2 +- xrlint/linter.py | 10 ++++---- xrlint/rule.py | 12 +++++----- xrlint/testing.py | 2 +- 12 files changed, 68 insertions(+), 70 deletions(-) rename xrlint/_linter/{verify.py => validate.py} (86%) diff --git a/CHANGES.md b/CHANGES.md index de888e2..0e63832 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -2,17 +2,24 @@ ## Version 0.5.0 (in development) -- Introduced type aliases `ConfigLike` and `ConfigObjectLike`. -- Renamed multiple components for improved clarity and consistency: - - Renamed `Config` into `ConfigObject` - - Renamed `ConfigList.configs` into `config_objects` - - Renamed `ConfigList` into `Config` - - Renamed `ConfigList.compute_config()` into `compute_config_object()` - - Renamed `Result.config` into `config_object` - - Renamed `XRLint.load_config_list()` into `init_config()` - - Renamed `XRLint.verify_datasets()` into `verify_files()` -- Added class method `from_config()` to `ConfigList`. -- Removed function `xrlint.config.merge_configs` as it was no longer used. +### Incompatible API changes: + +- Changed general use of term _verify_ into _validate_: + - prefixed `RuleOp` methods by `validate_` for clarity. + - renamed `XRLint.verify_datasets()` into `validate_files()` + - renamed `Lint.verify_dataset()` into `validate()` + +- Various changes for improved clarity and consistency + regarding configuration management: + - introduced type aliases `ConfigLike` and `ConfigObjectLike`. + - renamed `Config` into `ConfigObject` + - renamed `ConfigList.configs` into `config_objects` + - renamed `ConfigList` into `Config` + - renamed `ConfigList.compute_config()` into `compute_config_object()` + - renamed `Result.config` into `config_object` + - renamed `XRLint.load_config_list()` into `init_config()` + - added class method `from_config()` to `ConfigList`. + - removed function `xrlint.config.merge_configs` as it was no longer used. ## Version 0.4.1 (from 2025-01-31) diff --git a/docs/start.md b/docs/start.md index 3d6afe3..94bab57 100644 --- a/docs/start.md +++ b/docs/start.md @@ -106,5 +106,5 @@ import xrlint.all as xrl test_ds = xr.Dataset(attrs={"title": "Test Dataset"}) linter = xrl.new_linter("recommended") -linter.verify_dataset(test_ds) +linter.validate(test_ds) ``` diff --git a/docs/todo.md b/docs/todo.md index 2e24ad7..38eeca3 100644 --- a/docs/todo.md +++ b/docs/todo.md @@ -10,13 +10,6 @@ - use mkdocstrings ref syntax in docstrings - provide configuration examples (use as tests?) - add `docs_url` to all existing rules -- API changes for v0.5: - - clarify when users can pass configuration objects like values - and when configuration like values - - config class naming is confusing, - change `Config` -> `ConfigObject`, `ConfigList` -> `Config` - - Change `verify` -> `validate`, - prefix `RuleOp` methods by `validate_` for clarity. ## Desired @@ -51,20 +44,20 @@ ## Generalize data linting -Do not limit verification to `xr.Dataset`. +Do not limit validations to `xr.Dataset`. However, this requires new rule sets. To allow for other data models, we need to allow -for a specific verifier type for a given data type. +for a specific validator type for a given data type. -The verifier verifies specific node types +The validator validates specific node types that are characteristic for a data type. To do so a traverser must traverse the elements of the data -and pass each node to the verifier. +and pass each node to the validator. Note, this is the [_Visitor Pattern_](https://en.wikipedia.org/wiki/Visitor_pattern), -where the verifier is the _Visitor_ and a node refers to _Element_. +where the validator is the _Visitor_ and a node refers to _Element_. To support the CLI mode, we need different data opener types that can read the data from a file path. @@ -72,9 +65,9 @@ types that can read the data from a file path. 1. open data, if given data is a file path: - find opener for file path - open data -2. verify data +2. validate data - find root element type and visitor type for data - - call the root element `accept(verifier)` that verifies the - root element `verify.root()` and starts traversal of + - call the root element `accept(validator)` that validates the + root element `validate.root()` and starts traversal of child elements. diff --git a/examples/check_s3_bucket.py b/examples/check_s3_bucket.py index ac8488c..083b093 100644 --- a/examples/check_s3_bucket.py +++ b/examples/check_s3_bucket.py @@ -4,5 +4,5 @@ xrlint = xrl.XRLint(no_config_lookup=True) xrlint.init_config("recommended") -results = xrlint.verify_files([URL]) +results = xrlint.validate_files([URL]) print(xrlint.format_results(results)) diff --git a/notebooks/xrlint-linter.ipynb b/notebooks/xrlint-linter.ipynb index cc8a02d..7fa9226 100644 --- a/notebooks/xrlint-linter.ipynb +++ b/notebooks/xrlint-linter.ipynb @@ -532,7 +532,7 @@ } ], "source": [ - "linter.verify_dataset(ds)" + "linter.validate(ds)" ] }, { @@ -1019,7 +1019,7 @@ } ], "source": [ - "linter.verify_dataset(invalid_ds)" + "linter.validate(invalid_ds)" ] }, { @@ -1100,7 +1100,7 @@ } ], "source": [ - "linter.verify_dataset(invalid_ds)" + "linter.validate(invalid_ds)" ] }, { @@ -1159,7 +1159,7 @@ } ], "source": [ - "linter.verify_dataset(invalid_ds)" + "linter.validate(invalid_ds)" ] }, { diff --git a/tests/test_linter.py b/tests/test_linter.py index 6f63385..4dda983 100644 --- a/tests/test_linter.py +++ b/tests/test_linter.py @@ -50,10 +50,10 @@ def test_new_linter_all(self): self.assertIn("coords-for-dims", config_obj_1.rules) -class LinterVerifyConfigTest(TestCase): +class LinterValidateWithConfigTest(TestCase): def test_config_with_config_list(self): linter = new_linter() - result = linter.verify_dataset( + result = linter.validate( xr.Dataset(), config=Config.from_value([{"rules": {"no-empty-attrs": 2}}]), ) @@ -61,7 +61,7 @@ def test_config_with_config_list(self): def test_config_with_list_of_config(self): linter = new_linter() - result = linter.verify_dataset( + result = linter.validate( xr.Dataset(), config=[{"rules": {"no-empty-attrs": 2}}], ) @@ -69,7 +69,7 @@ def test_config_with_list_of_config(self): def test_config_with_config_obj(self): linter = new_linter() - result = linter.verify_dataset( + result = linter.validate( xr.Dataset(), config={"rules": {"no-empty-attrs": 2}}, ) @@ -77,7 +77,7 @@ def test_config_with_config_obj(self): def test_no_config(self): linter = Linter() - result = linter.verify_dataset( + result = linter.validate( xr.Dataset(), ) self.assert_result_ok(result, "No configuration given or matches ''.") @@ -89,7 +89,7 @@ def assert_result_ok(self, result: Result, expected_message: str): self.assertEqual(expected_message, result.messages[0].message) -class LinterVerifyTest(TestCase): +class LinterValidateTest(TestCase): def setUp(self): plugin = new_plugin(name="test") @@ -157,7 +157,7 @@ def test_rules_are_ok(self): ) def test_linter_respects_rule_severity_error(self): - result = self.linter.verify_dataset( + result = self.linter.validate( xr.Dataset(), rules={"test/dataset-without-data-vars": 2} ) self.assertEqual( @@ -182,7 +182,7 @@ def test_linter_respects_rule_severity_error(self): ) def test_linter_respects_rule_severity_warn(self): - result = self.linter.verify_dataset( + result = self.linter.validate( xr.Dataset(), rules={"test/dataset-without-data-vars": 1} ) self.assertEqual( @@ -207,7 +207,7 @@ def test_linter_respects_rule_severity_warn(self): ) def test_linter_respects_rule_severity_off(self): - result = self.linter.verify_dataset( + result = self.linter.validate( xr.Dataset(), rules={"test/dataset-without-data-vars": 0} ) self.assertEqual( @@ -225,9 +225,7 @@ def test_linter_respects_rule_severity_off(self): ) def test_linter_recognized_unknown_rule(self): - result = self.linter.verify_dataset( - xr.Dataset(), rules={"test/dataset-is-fast": 2} - ) + result = self.linter.validate(xr.Dataset(), rules={"test/dataset-is-fast": 2}) self.assertEqual( [ Message( @@ -266,7 +264,7 @@ def test_linter_real_life_scenario(self): ) dataset.encoding["source"] = "chl-tsm.zarr" - result = self.linter.verify_dataset( + result = self.linter.validate( dataset, config={ "rules": { @@ -325,7 +323,7 @@ def test_linter_real_life_scenario(self): ) def test_processor_ok(self): - result = self.linter.verify_dataset( + result = self.linter.validate( "test.levels", config={ "processor": "test/multi-level-dataset", @@ -352,7 +350,7 @@ def test_processor_ok(self): ) def test_processor_fail(self): - result = self.linter.verify_dataset( + result = self.linter.validate( "bad.levels", config={ "processor": "test/multi-level-dataset", diff --git a/xrlint/_linter/verify.py b/xrlint/_linter/validate.py similarity index 86% rename from xrlint/_linter/verify.py rename to xrlint/_linter/validate.py index 8b92397..dcf254a 100644 --- a/xrlint/_linter/verify.py +++ b/xrlint/_linter/validate.py @@ -10,18 +10,18 @@ from .rulectx import RuleContextImpl -def verify_dataset(config_obj: ConfigObject, dataset: Any, file_path: str): +def validate_dataset(config_obj: ConfigObject, dataset: Any, file_path: str): assert isinstance(config_obj, ConfigObject) assert dataset is not None assert isinstance(file_path, str) if isinstance(dataset, xr.Dataset): - messages = _verify_dataset(config_obj, dataset, file_path, None) + messages = _validate_dataset(config_obj, dataset, file_path, None) else: - messages = _open_and_verify_dataset(config_obj, dataset, file_path) + messages = _open_and_validate_dataset(config_obj, dataset, file_path) return Result.new(config_object=config_obj, messages=messages, file_path=file_path) -def _verify_dataset( +def _validate_dataset( config_obj: ConfigObject, dataset: xr.Dataset, file_path: str, @@ -41,7 +41,7 @@ def _verify_dataset( return context.messages -def _open_and_verify_dataset( +def _open_and_validate_dataset( config_obj: ConfigObject, ds_source: Any, file_path: str ) -> list[Message]: assert isinstance(config_obj, ConfigObject) @@ -57,7 +57,7 @@ def _open_and_verify_dataset( return [new_fatal_message(str(e))] return processor_op.postprocess( [ - _verify_dataset(config_obj, ds, path, i) + _validate_dataset(config_obj, ds, path, i) for i, (ds, path) in enumerate(ds_path_list) ], file_path, @@ -68,7 +68,7 @@ def _open_and_verify_dataset( except (OSError, ValueError, TypeError) as e: return [new_fatal_message(str(e))] with dataset: - return _verify_dataset(config_obj, dataset, file_path, None) + return _validate_dataset(config_obj, dataset, file_path, None) def _open_dataset( diff --git a/xrlint/cli/engine.py b/xrlint/cli/engine.py index 88b3431..80b7d96 100644 --- a/xrlint/cli/engine.py +++ b/xrlint/cli/engine.py @@ -145,8 +145,8 @@ def print_config_for_file(self, file_path: str) -> None: config_json_obj = config.to_json() if config is not None else None click.echo(json.dumps(config_json_obj, indent=2)) - def verify_files(self, files: Iterable[str]) -> Iterator[Result]: - """Verify given files or directories which may also be given as URLs. + def validate_files(self, files: Iterable[str]) -> Iterator[Result]: + """Validate given files or directories which may also be given as URLs. The function produces a validation result for each file. Args: @@ -157,7 +157,7 @@ def verify_files(self, files: Iterable[str]) -> Iterator[Result]: """ linter = Linter() for file_path, config in self.get_files(files): - yield linter.verify_dataset(file_path, config=config) + yield linter.validate(file_path, config=config) def get_files( self, file_paths: Iterable[str] diff --git a/xrlint/cli/main.py b/xrlint/cli/main.py index b330180..e8d3f7c 100644 --- a/xrlint/cli/main.py +++ b/xrlint/cli/main.py @@ -152,7 +152,7 @@ def main( if files: cli_engine.init_config() - results = cli_engine.verify_files(files) + results = cli_engine.validate_files(files) report = cli_engine.format_results(results) cli_engine.write_report(report) diff --git a/xrlint/linter.py b/xrlint/linter.py index c3f6dfc..f7757ce 100644 --- a/xrlint/linter.py +++ b/xrlint/linter.py @@ -7,7 +7,7 @@ from xrlint.config import Config, ConfigLike, get_core_config_object from xrlint.result import Result -from ._linter.verify import new_fatal_message, verify_dataset +from ._linter.validate import new_fatal_message, validate_dataset from .constants import MISSING_DATASET_FILE_PATH @@ -55,7 +55,7 @@ def config(self) -> Config: """Get this linter's configuration.""" return self._config - def verify_dataset( + def validate( self, dataset: Any, *, @@ -63,7 +63,7 @@ def verify_dataset( config: ConfigLike = None, **config_props: Any, ) -> Result: - """Verify a dataset. + """Validate a dataset against applicable rules. Args: dataset: The dataset. Can be a `xr.Dataset` instance @@ -79,7 +79,7 @@ def verify_dataset( [ConfigObject][xrlint.config.ConfigObject]. Returns: - Result of the verification. + Result of the validation. """ if not file_path: if isinstance(dataset, xr.Dataset): @@ -100,7 +100,7 @@ def verify_dataset( ], ) - return verify_dataset(config_obj, dataset, file_path) + return validate_dataset(config_obj, dataset, file_path) def _get_file_path_for_dataset(dataset: xr.Dataset) -> str: diff --git a/xrlint/rule.py b/xrlint/rule.py index 6ade722..6e61d66 100644 --- a/xrlint/rule.py +++ b/xrlint/rule.py @@ -73,10 +73,10 @@ class RuleExit(Exception): class RuleOp(ABC): - """Define the specific rule verification operation.""" + """Define the specific rule validation operations.""" def dataset(self, context: RuleContext, node: DatasetNode) -> None: - """Verify the given dataset node. + """Validate the given dataset node. Args: context: The current rule context. @@ -87,7 +87,7 @@ def dataset(self, context: RuleContext, node: DatasetNode) -> None: """ def data_array(self, context: RuleContext, node: DataArrayNode) -> None: - """Verify the given data array (variable) node. + """Validate the given data array (variable) node. Args: context: The current rule context. @@ -98,7 +98,7 @@ def data_array(self, context: RuleContext, node: DataArrayNode) -> None: """ def attrs(self, context: RuleContext, node: AttrsNode) -> None: - """Verify the given attributes node. + """Validate the given attributes node. Args: context: The current rule context. @@ -109,7 +109,7 @@ def attrs(self, context: RuleContext, node: AttrsNode) -> None: """ def attr(self, context: RuleContext, node: AttrNode) -> None: - """Verify the given attribute node. + """Validate the given attribute node. Args: context: The current rule context. @@ -202,7 +202,7 @@ class that implements the rule's logic. """Rule metadata of type `RuleMeta`.""" op_class: Type[RuleOp] - """The class the implements the rule's verification operation. + """The class the implements the rule's validation operation. The class must implement the `RuleOp` interface. """ diff --git a/xrlint/testing.py b/xrlint/testing.py index 9782ae5..e919ae1 100644 --- a/xrlint/testing.py +++ b/xrlint/testing.py @@ -197,7 +197,7 @@ def _test_rule( # There is also no way for a rule to obtain the severity. severity = SEVERITY_ERROR linter = Linter(self._config, self._config_props) - result = linter.verify_dataset( + result = linter.validate( test.dataset, plugins={ _PLUGIN_NAME: ( From fa5ec7ec35701660c4a4f8905a9694d64e070f3c Mon Sep 17 00:00:00 2001 From: Norman Fomferra Date: Thu, 30 Jan 2025 13:42:23 +0100 Subject: [PATCH 2/3] prefix RuleOp methods by `validate_` --- examples/plugin_config.py | 2 +- examples/rule_testing.py | 2 +- examples/virtual_plugin_config.py | 2 +- tests/test_linter.py | 6 +++--- tests/test_testing.py | 2 +- xrlint/_linter/apply.py | 6 +++--- xrlint/plugins/core/rules/content_desc.py | 4 ++-- xrlint/plugins/core/rules/conventions.py | 2 +- xrlint/plugins/core/rules/coords_for_dims.py | 2 +- xrlint/plugins/core/rules/dataset_title_attr.py | 2 +- xrlint/plugins/core/rules/grid_mappings.py | 2 +- xrlint/plugins/core/rules/lat_lon_coordinate.py | 4 ++-- xrlint/plugins/core/rules/no_empty_chunks.py | 4 ++-- xrlint/plugins/core/rules/time_coordinate.py | 2 +- xrlint/plugins/core/rules/var_desc.py | 2 +- xrlint/plugins/core/rules/var_flags.py | 2 +- xrlint/plugins/core/rules/var_units.py | 2 +- xrlint/plugins/xcube/rules/any_spatial_data_var.py | 2 +- xrlint/plugins/xcube/rules/cube_dims_order.py | 2 +- xrlint/plugins/xcube/rules/data_var_colors.py | 2 +- xrlint/plugins/xcube/rules/grid_mapping_naming.py | 2 +- xrlint/plugins/xcube/rules/increasing_time.py | 2 +- xrlint/plugins/xcube/rules/lat_lon_naming.py | 2 +- xrlint/plugins/xcube/rules/ml_dataset_meta.py | 2 +- xrlint/plugins/xcube/rules/ml_dataset_time.py | 2 +- xrlint/plugins/xcube/rules/ml_dataset_xy.py | 2 +- xrlint/plugins/xcube/rules/no_chunked_coords.py | 2 +- xrlint/plugins/xcube/rules/single_grid_mapping.py | 2 +- xrlint/plugins/xcube/rules/time_naming.py | 2 +- xrlint/rule.py | 6 +++--- 30 files changed, 39 insertions(+), 39 deletions(-) diff --git a/examples/plugin_config.py b/examples/plugin_config.py index e98e8b1..5059f22 100644 --- a/examples/plugin_config.py +++ b/examples/plugin_config.py @@ -14,7 +14,7 @@ class GoodTitle(RuleOp): """Dataset title should be 'Hello World!'.""" - def dataset(self, ctx: RuleContext, node: DatasetNode): + def validate_dataset(self, ctx: RuleContext, node: DatasetNode): good_title = "Hello World!" if node.dataset.attrs.get("title") != good_title: ctx.report( diff --git a/examples/rule_testing.py b/examples/rule_testing.py index 4035737..e6351f5 100644 --- a/examples/rule_testing.py +++ b/examples/rule_testing.py @@ -13,7 +13,7 @@ class GoodTitle(RuleOp): """Dataset title should be 'Hello World!'.""" - def dataset(self, ctx: RuleContext, node: DatasetNode): + def validate_dataset(self, ctx: RuleContext, node: DatasetNode): good_title = "Hello World!" if node.dataset.attrs.get("title") != good_title: ctx.report( diff --git a/examples/virtual_plugin_config.py b/examples/virtual_plugin_config.py index da9a3a8..9840150 100644 --- a/examples/virtual_plugin_config.py +++ b/examples/virtual_plugin_config.py @@ -10,7 +10,7 @@ @define_rule("good-title", description="Dataset title should be 'Hello World!'.") class GoodTitle(RuleOp): - def dataset(self, ctx: RuleContext, node: DatasetNode): + def validate_dataset(self, ctx: RuleContext, node: DatasetNode): good_title = "Hello World!" if node.dataset.attrs.get("title") != good_title: ctx.report( diff --git a/tests/test_linter.py b/tests/test_linter.py index 4dda983..57558df 100644 --- a/tests/test_linter.py +++ b/tests/test_linter.py @@ -95,7 +95,7 @@ def setUp(self): @plugin.define_rule("no-space-in-attr-name") class AttrVer(RuleOp): - def attr(self, ctx: RuleContext, node: AttrNode): + def validate_attr(self, ctx: RuleContext, node: AttrNode): if " " in node.name: ctx.report(f"Attribute name with space: {node.name!r}") @@ -107,7 +107,7 @@ def attrs(self, ctx: RuleContext, node: AttrsNode): @plugin.define_rule("data-var-dim-must-have-coord") class DataArrayVer(RuleOp): - def data_array(self, ctx: RuleContext, node: DataArrayNode): + def validate_data_array(self, ctx: RuleContext, node: DataArrayNode): if node.in_data_vars(): for dim_name in node.data_array.dims: if dim_name not in ctx.dataset.coords: @@ -119,7 +119,7 @@ def data_array(self, ctx: RuleContext, node: DataArrayNode): @plugin.define_rule("dataset-without-data-vars") class DatasetVer(RuleOp): - def dataset(self, ctx: RuleContext, node: DatasetNode): + def validate_dataset(self, ctx: RuleContext, node: DatasetNode): if len(node.dataset.data_vars) == 0: ctx.report("Dataset does not have data variables") raise RuleExit # no need to traverse further diff --git a/tests/test_testing.py b/tests/test_testing.py index fc1ac7f..7ee728c 100644 --- a/tests/test_testing.py +++ b/tests/test_testing.py @@ -9,7 +9,7 @@ class ForceTitle(RuleOp): - def dataset(self, ctx: RuleContext, node: DatasetNode): + def validate_dataset(self, ctx: RuleContext, node: DatasetNode): title = node.dataset.attrs.get("title") if not title: ctx.report("Datasets must have a title") diff --git a/xrlint/_linter/apply.py b/xrlint/_linter/apply.py index 6e2a0c7..25b7455 100644 --- a/xrlint/_linter/apply.py +++ b/xrlint/_linter/apply.py @@ -48,7 +48,7 @@ def apply_rule( def _visit_dataset_node(rule_op: RuleOp, context: RuleContextImpl, node: DatasetNode): with context.use_state(node=node): - rule_op.dataset(context, node) + rule_op.validate_dataset(context, node) _visit_attrs_node( rule_op, context, @@ -86,7 +86,7 @@ def _visit_data_array_node( rule_op: RuleOp, context: RuleContextImpl, node: DataArrayNode ): with context.use_state(node=node): - rule_op.data_array(context, node) + rule_op.validate_data_array(context, node) _visit_attrs_node( rule_op, context, @@ -116,4 +116,4 @@ def _visit_attrs_node(rule_op: RuleOp, context: RuleContextImpl, node: AttrsNode def _visit_attr_node(rule_op: RuleOp, context: RuleContextImpl, node: AttrNode): with context.use_state(node=node): - rule_op.attr(context, node) + rule_op.validate_attr(context, node) diff --git a/xrlint/plugins/core/rules/content_desc.py b/xrlint/plugins/core/rules/content_desc.py index 1473d17..3c34971 100644 --- a/xrlint/plugins/core/rules/content_desc.py +++ b/xrlint/plugins/core/rules/content_desc.py @@ -74,7 +74,7 @@ def __init__(self, **params): re.compile(p) for p in params.get("ignored_vars", DEFAULT_IGNORED_VARS) ] - def dataset(self, ctx: RuleContext, node: DatasetNode): + def validate_dataset(self, ctx: RuleContext, node: DatasetNode): dataset_attrs = node.dataset.attrs attr_names = ( self.global_attrs + self.common_attrs @@ -85,7 +85,7 @@ def dataset(self, ctx: RuleContext, node: DatasetNode): if attr_name not in dataset_attrs: ctx.report(f"Missing attribute {attr_name!r}.") - def data_array(self, ctx: RuleContext, node: DataArrayNode): + def validate_data_array(self, ctx: RuleContext, node: DataArrayNode): if self.skip_vars: # Since dataset() has already been processed, # no need to check other nodes. diff --git a/xrlint/plugins/core/rules/conventions.py b/xrlint/plugins/core/rules/conventions.py index ed184e6..dcb4ce3 100644 --- a/xrlint/plugins/core/rules/conventions.py +++ b/xrlint/plugins/core/rules/conventions.py @@ -33,7 +33,7 @@ class Conventions(RuleOp): def __init__(self, match: str | None = None): self.match = re.compile(match) if match else None - def dataset(self, ctx: RuleContext, node: DatasetNode): + def validate_dataset(self, ctx: RuleContext, node: DatasetNode): if "Conventions" not in node.dataset.attrs: ctx.report("Missing attribute 'Conventions'.") else: diff --git a/xrlint/plugins/core/rules/coords_for_dims.py b/xrlint/plugins/core/rules/coords_for_dims.py index 412c68c..be0d261 100644 --- a/xrlint/plugins/core/rules/coords_for_dims.py +++ b/xrlint/plugins/core/rules/coords_for_dims.py @@ -11,7 +11,7 @@ description="Dimensions of data variables should have corresponding coordinates.", ) class CoordsForDims(RuleOp): - def dataset(self, ctx: RuleContext, node: DatasetNode): + def validate_dataset(self, ctx: RuleContext, node: DatasetNode): dataset = node.dataset # Get data variable dimensions diff --git a/xrlint/plugins/core/rules/dataset_title_attr.py b/xrlint/plugins/core/rules/dataset_title_attr.py index 4abb5e2..ce024a5 100644 --- a/xrlint/plugins/core/rules/dataset_title_attr.py +++ b/xrlint/plugins/core/rules/dataset_title_attr.py @@ -10,7 +10,7 @@ description="Datasets should be given a non-empty title.", ) class DatasetTitleAttr(RuleOp): - def dataset(self, ctx: RuleContext, node: DatasetNode): + def validate_dataset(self, ctx: RuleContext, node: DatasetNode): title = node.dataset.attrs.get("title") if not title: ctx.report("Missing 'title' attribute in dataset.") diff --git a/xrlint/plugins/core/rules/grid_mappings.py b/xrlint/plugins/core/rules/grid_mappings.py index 2050c3e..170b446 100644 --- a/xrlint/plugins/core/rules/grid_mappings.py +++ b/xrlint/plugins/core/rules/grid_mappings.py @@ -12,7 +12,7 @@ ), ) class GridMappings(RuleOp): - def dataset(self, ctx: RuleContext, node: DatasetNode): + def validate_dataset(self, ctx: RuleContext, node: DatasetNode): dataset = node.dataset # Get the mapping of grid mapping names to grid-mapped variables diff --git a/xrlint/plugins/core/rules/lat_lon_coordinate.py b/xrlint/plugins/core/rules/lat_lon_coordinate.py index 02a9e6f..ef401aa 100644 --- a/xrlint/plugins/core/rules/lat_lon_coordinate.py +++ b/xrlint/plugins/core/rules/lat_lon_coordinate.py @@ -30,7 +30,7 @@ ), ) class LatCoordinate(RuleOp): - def data_array(self, ctx: RuleContext, node: DataArrayNode): + def validate_data_array(self, ctx: RuleContext, node: DataArrayNode): if node.name in ctx.dataset.coords and _is_lat_var( str(node.name), node.data_array ): @@ -55,7 +55,7 @@ def data_array(self, ctx: RuleContext, node: DataArrayNode): ), ) class LonCoordinate(RuleOp): - def data_array(self, ctx: RuleContext, node: DataArrayNode): + def validate_data_array(self, ctx: RuleContext, node: DataArrayNode): if node.name in ctx.dataset.coords and _is_lon_var( str(node.name), node.data_array ): diff --git a/xrlint/plugins/core/rules/no_empty_chunks.py b/xrlint/plugins/core/rules/no_empty_chunks.py index f243d8f..279702d 100644 --- a/xrlint/plugins/core/rules/no_empty_chunks.py +++ b/xrlint/plugins/core/rules/no_empty_chunks.py @@ -17,14 +17,14 @@ ), ) class NoEmptyChunks(RuleOp): - def dataset(self, ctx: RuleContext, node: DataArrayNode): + def validate_dataset(self, ctx: RuleContext, node: DataArrayNode): source = ctx.dataset.encoding.get("source") is_zarr = isinstance(source, str) and source.endswith(".zarr") if not is_zarr: # if not a Zarr, no need to check further raise RuleExit - def data_array(self, ctx: RuleContext, node: DataArrayNode): + def validate_data_array(self, ctx: RuleContext, node: DataArrayNode): if ( "write_empty_chunks" not in node.data_array.encoding and "chunks" in node.data_array.encoding diff --git a/xrlint/plugins/core/rules/time_coordinate.py b/xrlint/plugins/core/rules/time_coordinate.py index 8efc75c..f62b78c 100644 --- a/xrlint/plugins/core/rules/time_coordinate.py +++ b/xrlint/plugins/core/rules/time_coordinate.py @@ -52,7 +52,7 @@ ), ) class TimeCoordinate(RuleOp): - def data_array(self, ctx: RuleContext, node: DataArrayNode): + def validate_data_array(self, ctx: RuleContext, node: DataArrayNode): array = node.data_array attrs = array.attrs encoding = array.encoding diff --git a/xrlint/plugins/core/rules/var_desc.py b/xrlint/plugins/core/rules/var_desc.py index 4d2ea77..04139d8 100644 --- a/xrlint/plugins/core/rules/var_desc.py +++ b/xrlint/plugins/core/rules/var_desc.py @@ -37,7 +37,7 @@ class VarDesc(RuleOp): def __init__(self, attrs: list[str] | None = None): self._attrs = attrs if attrs is not None else DEFAULT_ATTRS - def data_array(self, ctx: RuleContext, node: DataArrayNode): + def validate_data_array(self, ctx: RuleContext, node: DataArrayNode): if node.name not in ctx.dataset.data_vars: # This rule applies to data variables only return diff --git a/xrlint/plugins/core/rules/var_flags.py b/xrlint/plugins/core/rules/var_flags.py index a199357..8bc0a9a 100644 --- a/xrlint/plugins/core/rules/var_flags.py +++ b/xrlint/plugins/core/rules/var_flags.py @@ -22,7 +22,7 @@ docs_url="https://cfconventions.org/cf-conventions/cf-conventions.html#flags", ) class VarFlags(RuleOp): - def data_array(self, ctx: RuleContext, node: DataArrayNode): + def validate_data_array(self, ctx: RuleContext, node: DataArrayNode): flag_values = node.data_array.attrs.get(FLAG_VALUES) flag_masks = node.data_array.attrs.get(FLAG_MASKS) flag_meanings = node.data_array.attrs.get(FLAG_MEANINGS) diff --git a/xrlint/plugins/core/rules/var_units.py b/xrlint/plugins/core/rules/var_units.py index 0a4350d..eea98cc 100644 --- a/xrlint/plugins/core/rules/var_units.py +++ b/xrlint/plugins/core/rules/var_units.py @@ -11,7 +11,7 @@ docs_url="https://cfconventions.org/cf-conventions/cf-conventions.html#units", ) class VarUnits(RuleOp): - def data_array(self, ctx: RuleContext, node: DataArrayNode): + def validate_data_array(self, ctx: RuleContext, node: DataArrayNode): data_array = node.data_array units = data_array.attrs.get("units") if units is None: diff --git a/xrlint/plugins/xcube/rules/any_spatial_data_var.py b/xrlint/plugins/xcube/rules/any_spatial_data_var.py index 26f9375..030b873 100644 --- a/xrlint/plugins/xcube/rules/any_spatial_data_var.py +++ b/xrlint/plugins/xcube/rules/any_spatial_data_var.py @@ -14,6 +14,6 @@ ), ) class AnySpatialDataVar(RuleOp): - def dataset(self, ctx: RuleContext, node: DatasetNode): + def validate_dataset(self, ctx: RuleContext, node: DatasetNode): if not any(map(is_spatial_var, node.dataset.data_vars.values())): ctx.report("No spatial data variables found.") diff --git a/xrlint/plugins/xcube/rules/cube_dims_order.py b/xrlint/plugins/xcube/rules/cube_dims_order.py index b92bc2d..17ac8d7 100644 --- a/xrlint/plugins/xcube/rules/cube_dims_order.py +++ b/xrlint/plugins/xcube/rules/cube_dims_order.py @@ -17,7 +17,7 @@ ), ) class CubeDimsOrder(RuleOp): - def data_array(self, ctx: RuleContext, node: DataArrayNode): + def validate_data_array(self, ctx: RuleContext, node: DataArrayNode): if node.in_data_vars(): dims = list(node.data_array.dims) indexes = {d: i for i, d in enumerate(node.data_array.dims)} diff --git a/xrlint/plugins/xcube/rules/data_var_colors.py b/xrlint/plugins/xcube/rules/data_var_colors.py index c642259..b034286 100644 --- a/xrlint/plugins/xcube/rules/data_var_colors.py +++ b/xrlint/plugins/xcube/rules/data_var_colors.py @@ -16,7 +16,7 @@ ), ) class DataVarColors(RuleOp): - def data_array(self, ctx: RuleContext, node: DataArrayNode): + def validate_data_array(self, ctx: RuleContext, node: DataArrayNode): array = node.data_array if not node.in_data_vars() or not is_spatial_var(array): return diff --git a/xrlint/plugins/xcube/rules/grid_mapping_naming.py b/xrlint/plugins/xcube/rules/grid_mapping_naming.py index 659559b..953439e 100644 --- a/xrlint/plugins/xcube/rules/grid_mapping_naming.py +++ b/xrlint/plugins/xcube/rules/grid_mapping_naming.py @@ -15,7 +15,7 @@ docs_url="https://xcube.readthedocs.io/en/latest/cubespec.html#spatial-reference", ) class GridMappingNaming(RuleOp): - def dataset(self, ctx: RuleContext, node: DatasetNode): + def validate_dataset(self, ctx: RuleContext, node: DatasetNode): for var_name, var in node.dataset.variables.items(): if "grid_mapping_name" in var.attrs and var_name not in GM_NAMES: ctx.report( diff --git a/xrlint/plugins/xcube/rules/increasing_time.py b/xrlint/plugins/xcube/rules/increasing_time.py index c1feccc..9e84edd 100644 --- a/xrlint/plugins/xcube/rules/increasing_time.py +++ b/xrlint/plugins/xcube/rules/increasing_time.py @@ -16,7 +16,7 @@ ), ) class IncreasingTime(RuleOp): - def data_array(self, ctx: RuleContext, node: DataArrayNode): + def validate_data_array(self, ctx: RuleContext, node: DataArrayNode): array = node.data_array if node.in_coords() and node.name == "time" and array.dims == ("time",): diff_array: np.ndarray = array.diff("time").values diff --git a/xrlint/plugins/xcube/rules/lat_lon_naming.py b/xrlint/plugins/xcube/rules/lat_lon_naming.py index e3d4a14..8f71a8f 100644 --- a/xrlint/plugins/xcube/rules/lat_lon_naming.py +++ b/xrlint/plugins/xcube/rules/lat_lon_naming.py @@ -18,7 +18,7 @@ docs_url="https://xcube.readthedocs.io/en/latest/cubespec.html#spatial-reference", ) class LatLonNaming(RuleOp): - def dataset(self, ctx: RuleContext, node: DatasetNode): + def validate_dataset(self, ctx: RuleContext, node: DatasetNode): lon_ok = _check( ctx, "variable", node.dataset.variables.keys(), INVALID_LON_NAMES, LON_NAME ) diff --git a/xrlint/plugins/xcube/rules/ml_dataset_meta.py b/xrlint/plugins/xcube/rules/ml_dataset_meta.py index 5363431..a1e53a0 100644 --- a/xrlint/plugins/xcube/rules/ml_dataset_meta.py +++ b/xrlint/plugins/xcube/rules/ml_dataset_meta.py @@ -22,7 +22,7 @@ ), ) class MLDatasetMeta(RuleOp): - def dataset(self, ctx: RuleContext, node: DatasetNode): + def validate_dataset(self, ctx: RuleContext, node: DatasetNode): level_info = get_dataset_level_info(node.dataset) if level_info is None: # ok, this rules applies only to level datasets opened diff --git a/xrlint/plugins/xcube/rules/ml_dataset_time.py b/xrlint/plugins/xcube/rules/ml_dataset_time.py index 0f7e3ae..5f48a3e 100644 --- a/xrlint/plugins/xcube/rules/ml_dataset_time.py +++ b/xrlint/plugins/xcube/rules/ml_dataset_time.py @@ -17,7 +17,7 @@ docs_url="https://xcube.readthedocs.io/en/latest/mldatasets.html#definition", ) class MLDatasetTime(RuleOp): - def dataset(self, ctx: RuleContext, node: DatasetNode): + def validate_dataset(self, ctx: RuleContext, node: DatasetNode): level_info = get_dataset_level_info(node.dataset) if level_info is None: # ok, this rules applies only to level datasets opened diff --git a/xrlint/plugins/xcube/rules/ml_dataset_xy.py b/xrlint/plugins/xcube/rules/ml_dataset_xy.py index 2ed8000..fd1540d 100644 --- a/xrlint/plugins/xcube/rules/ml_dataset_xy.py +++ b/xrlint/plugins/xcube/rules/ml_dataset_xy.py @@ -17,7 +17,7 @@ docs_url="https://xcube.readthedocs.io/en/latest/mldatasets.html#definition", ) class MLDatasetXY(RuleOp): - def dataset(self, ctx: RuleContext, node: DatasetNode): + def validate_dataset(self, ctx: RuleContext, node: DatasetNode): level_info = get_dataset_level_info(node.dataset) if level_info is None: # ok, this rules applies only to level datasets opened diff --git a/xrlint/plugins/xcube/rules/no_chunked_coords.py b/xrlint/plugins/xcube/rules/no_chunked_coords.py index e74fc9e..ce69b6b 100644 --- a/xrlint/plugins/xcube/rules/no_chunked_coords.py +++ b/xrlint/plugins/xcube/rules/no_chunked_coords.py @@ -36,7 +36,7 @@ class NoChunkedCoords(RuleOp): def __init__(self, limit: int = DEFAULT_LIMIT): self.limit = limit - def data_array(self, ctx: RuleContext, node: DataArrayNode): + def validate_data_array(self, ctx: RuleContext, node: DataArrayNode): if node.name not in ctx.dataset.coords or node.data_array.ndim != 1: return diff --git a/xrlint/plugins/xcube/rules/single_grid_mapping.py b/xrlint/plugins/xcube/rules/single_grid_mapping.py index cabb7c8..e788f9b 100644 --- a/xrlint/plugins/xcube/rules/single_grid_mapping.py +++ b/xrlint/plugins/xcube/rules/single_grid_mapping.py @@ -16,7 +16,7 @@ docs_url="https://xcube.readthedocs.io/en/latest/cubespec.html#spatial-reference", ) class SingleGridMapping(RuleOp): - def dataset(self, ctx: RuleContext, node: DatasetNode): + def validate_dataset(self, ctx: RuleContext, node: DatasetNode): dataset = node.dataset if not dataset.data_vars: diff --git a/xrlint/plugins/xcube/rules/time_naming.py b/xrlint/plugins/xcube/rules/time_naming.py index 277d416..1f69db3 100644 --- a/xrlint/plugins/xcube/rules/time_naming.py +++ b/xrlint/plugins/xcube/rules/time_naming.py @@ -17,7 +17,7 @@ docs_url="https://xcube.readthedocs.io/en/latest/cubespec.html#temporal-reference", ) class TimeNaming(RuleOp): - def dataset(self, ctx: RuleContext, node: DatasetNode): + def validate_dataset(self, ctx: RuleContext, node: DatasetNode): time_vars = { var_name: var for var_name, var in node.dataset.coords.items() diff --git a/xrlint/rule.py b/xrlint/rule.py index 6e61d66..5eb97f6 100644 --- a/xrlint/rule.py +++ b/xrlint/rule.py @@ -75,7 +75,7 @@ class RuleExit(Exception): class RuleOp(ABC): """Define the specific rule validation operations.""" - def dataset(self, context: RuleContext, node: DatasetNode) -> None: + def validate_dataset(self, context: RuleContext, node: DatasetNode) -> None: """Validate the given dataset node. Args: @@ -86,7 +86,7 @@ def dataset(self, context: RuleContext, node: DatasetNode) -> None: RuleExit: to exit rule logic and further node traversal """ - def data_array(self, context: RuleContext, node: DataArrayNode) -> None: + def validate_data_array(self, context: RuleContext, node: DataArrayNode) -> None: """Validate the given data array (variable) node. Args: @@ -108,7 +108,7 @@ def attrs(self, context: RuleContext, node: AttrsNode) -> None: RuleExit: to exit rule logic and further node traversal """ - def attr(self, context: RuleContext, node: AttrNode) -> None: + def validate_attr(self, context: RuleContext, node: AttrNode) -> None: """Validate the given attribute node. Args: From 20a3ffa57e07ce5fa2d7fa6029fcdc3cf1d94cd8 Mon Sep 17 00:00:00 2001 From: Norman Fomferra Date: Thu, 30 Jan 2025 13:56:32 +0100 Subject: [PATCH 3/3] more consistency --- CHANGES.md | 4 +++ tests/test_linter.py | 8 +++--- xrlint/_linter/apply.py | 28 +++++++++---------- xrlint/all.py | 4 +-- xrlint/node.py | 12 +++++--- xrlint/plugins/core/rules/content_desc.py | 6 ++-- .../plugins/core/rules/lat_lon_coordinate.py | 18 +++++------- xrlint/plugins/core/rules/no_empty_attrs.py | 2 +- xrlint/plugins/core/rules/no_empty_chunks.py | 12 ++++---- xrlint/plugins/core/rules/time_coordinate.py | 6 ++-- xrlint/plugins/core/rules/var_desc.py | 6 ++-- xrlint/plugins/core/rules/var_flags.py | 12 ++++---- xrlint/plugins/core/rules/var_units.py | 6 ++-- xrlint/plugins/xcube/rules/cube_dims_order.py | 8 +++--- xrlint/plugins/xcube/rules/data_var_colors.py | 6 ++-- xrlint/plugins/xcube/rules/increasing_time.py | 6 ++-- .../plugins/xcube/rules/no_chunked_coords.py | 10 +++---- xrlint/rule.py | 6 ++-- 18 files changed, 81 insertions(+), 79 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 0e63832..5c4a1d1 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -4,6 +4,10 @@ ### Incompatible API changes: +- Renamed nodes and node properties for consistency and clarity: + - renamed `DataArrayNode` into `VariableNode` + - renamed `DataArrayNode.data_array` into `VariableNode.array` + - Changed general use of term _verify_ into _validate_: - prefixed `RuleOp` methods by `validate_` for clarity. - renamed `XRLint.verify_datasets()` into `validate_files()` diff --git a/tests/test_linter.py b/tests/test_linter.py index 57558df..f95f8b2 100644 --- a/tests/test_linter.py +++ b/tests/test_linter.py @@ -6,7 +6,7 @@ from xrlint.config import Config, ConfigObject from xrlint.constants import CORE_PLUGIN_NAME, NODE_ROOT_NAME from xrlint.linter import Linter, new_linter -from xrlint.node import AttrNode, AttrsNode, DataArrayNode, DatasetNode +from xrlint.node import AttrNode, AttrsNode, VariableNode, DatasetNode from xrlint.plugin import new_plugin from xrlint.processor import ProcessorOp from xrlint.result import Message, Result @@ -101,15 +101,15 @@ def validate_attr(self, ctx: RuleContext, node: AttrNode): @plugin.define_rule("no-empty-attrs") class AttrsVer(RuleOp): - def attrs(self, ctx: RuleContext, node: AttrsNode): + def validate_attrs(self, ctx: RuleContext, node: AttrsNode): if not node.attrs: ctx.report("Empty attributes") @plugin.define_rule("data-var-dim-must-have-coord") class DataArrayVer(RuleOp): - def validate_data_array(self, ctx: RuleContext, node: DataArrayNode): + def validate_variable(self, ctx: RuleContext, node: VariableNode): if node.in_data_vars(): - for dim_name in node.data_array.dims: + for dim_name in node.array.dims: if dim_name not in ctx.dataset.coords: ctx.report( f"Dimension {dim_name!r}" diff --git a/xrlint/_linter/apply.py b/xrlint/_linter/apply.py index 25b7455..891ff5d 100644 --- a/xrlint/_linter/apply.py +++ b/xrlint/_linter/apply.py @@ -1,4 +1,4 @@ -from xrlint.node import AttrNode, AttrsNode, DataArrayNode, DatasetNode +from xrlint.node import AttrNode, AttrsNode, VariableNode, DatasetNode from xrlint.rule import RuleConfig, RuleExit, RuleOp from ..constants import NODE_ROOT_NAME @@ -58,49 +58,47 @@ def _visit_dataset_node(rule_op: RuleOp, context: RuleContextImpl, node: Dataset attrs=node.dataset.attrs, ), ) - for name, data_array in node.dataset.coords.items(): - _visit_data_array_node( + for name, variable in node.dataset.coords.items(): + _visit_variable_node( rule_op, context, - DataArrayNode( + VariableNode( parent=node, path=f"{node.path}.coords[{name!r}]", name=name, - data_array=data_array, + array=variable, ), ) - for name, data_array in node.dataset.data_vars.items(): - _visit_data_array_node( + for name, variable in node.dataset.data_vars.items(): + _visit_variable_node( rule_op, context, - DataArrayNode( + VariableNode( parent=node, path=f"{node.path}.data_vars[{name!r}]", name=name, - data_array=data_array, + array=variable, ), ) -def _visit_data_array_node( - rule_op: RuleOp, context: RuleContextImpl, node: DataArrayNode -): +def _visit_variable_node(rule_op: RuleOp, context: RuleContextImpl, node: VariableNode): with context.use_state(node=node): - rule_op.validate_data_array(context, node) + rule_op.validate_variable(context, node) _visit_attrs_node( rule_op, context, AttrsNode( parent=node, path=f"{node.path}.attrs", - attrs=node.data_array.attrs, + attrs=node.array.attrs, ), ) def _visit_attrs_node(rule_op: RuleOp, context: RuleContextImpl, node: AttrsNode): with context.use_state(node=node): - rule_op.attrs(context, node) + rule_op.validate_attrs(context, node) for name, value in node.attrs.items(): _visit_attr_node( rule_op, diff --git a/xrlint/all.py b/xrlint/all.py index 0091c8e..af9ea13 100644 --- a/xrlint/all.py +++ b/xrlint/all.py @@ -8,7 +8,7 @@ FormatterRegistry, ) from xrlint.linter import Linter, new_linter -from xrlint.node import AttrNode, AttrsNode, DataArrayNode, DatasetNode, Node +from xrlint.node import AttrNode, AttrsNode, VariableNode, DatasetNode, Node from xrlint.plugin import Plugin, PluginMeta, new_plugin from xrlint.processor import Processor, ProcessorMeta, ProcessorOp, define_processor from xrlint.result import ( @@ -50,7 +50,7 @@ "FormatterRegistry", "AttrNode", "AttrsNode", - "DataArrayNode", + "VariableNode", "DatasetNode", "Node", "Plugin", diff --git a/xrlint/node.py b/xrlint/node.py index 7256417..4c78bdf 100644 --- a/xrlint/node.py +++ b/xrlint/node.py @@ -43,13 +43,17 @@ class DatasetNode(XarrayNode): @dataclass(frozen=True, kw_only=True) -class DataArrayNode(XarrayNode): - """Data array node.""" +class VariableNode(XarrayNode): + """Variable node. + Could be a coordinate or data variable. + If you need to distinguish, you can use expression + `node.name in ctx.dataset.coords`. + """ name: Hashable - """The name of the data array.""" + """The name of the variable.""" - data_array: xr.DataArray + array: xr.DataArray """The `xarray.DataArray` instance.""" diff --git a/xrlint/plugins/core/rules/content_desc.py b/xrlint/plugins/core/rules/content_desc.py index 3c34971..8e3a75c 100644 --- a/xrlint/plugins/core/rules/content_desc.py +++ b/xrlint/plugins/core/rules/content_desc.py @@ -1,6 +1,6 @@ import re -from xrlint.node import DataArrayNode, DatasetNode +from xrlint.node import VariableNode, DatasetNode from xrlint.plugins.core.plugin import plugin from xrlint.rule import RuleContext, RuleExit, RuleOp from xrlint.util.schema import schema @@ -85,7 +85,7 @@ def validate_dataset(self, ctx: RuleContext, node: DatasetNode): if attr_name not in dataset_attrs: ctx.report(f"Missing attribute {attr_name!r}.") - def validate_data_array(self, ctx: RuleContext, node: DataArrayNode): + def validate_variable(self, ctx: RuleContext, node: VariableNode): if self.skip_vars: # Since dataset() has already been processed, # no need to check other nodes. @@ -100,7 +100,7 @@ def validate_data_array(self, ctx: RuleContext, node: DataArrayNode): # Ignored variable return - var_attrs = node.data_array.attrs + var_attrs = node.array.attrs dataset_attrs = ctx.dataset.attrs for attr_name in self.common_attrs: if attr_name not in var_attrs and attr_name not in dataset_attrs: diff --git a/xrlint/plugins/core/rules/lat_lon_coordinate.py b/xrlint/plugins/core/rules/lat_lon_coordinate.py index ef401aa..256bb9d 100644 --- a/xrlint/plugins/core/rules/lat_lon_coordinate.py +++ b/xrlint/plugins/core/rules/lat_lon_coordinate.py @@ -2,7 +2,7 @@ import xarray as xr -from xrlint.node import DataArrayNode +from xrlint.node import VariableNode from xrlint.plugins.core.plugin import plugin from xrlint.rule import RuleContext, RuleOp @@ -30,13 +30,11 @@ ), ) class LatCoordinate(RuleOp): - def validate_data_array(self, ctx: RuleContext, node: DataArrayNode): - if node.name in ctx.dataset.coords and _is_lat_var( - str(node.name), node.data_array - ): + def validate_variable(self, ctx: RuleContext, node: VariableNode): + if node.name in ctx.dataset.coords and _is_lat_var(str(node.name), node.array): _maybe_report( ctx, - node.data_array.attrs, + node.array.attrs, LAT_UNITS, LAT_UNITS_ALIASES, LAT_NAME, @@ -55,13 +53,11 @@ def validate_data_array(self, ctx: RuleContext, node: DataArrayNode): ), ) class LonCoordinate(RuleOp): - def validate_data_array(self, ctx: RuleContext, node: DataArrayNode): - if node.name in ctx.dataset.coords and _is_lon_var( - str(node.name), node.data_array - ): + def validate_variable(self, ctx: RuleContext, node: VariableNode): + if node.name in ctx.dataset.coords and _is_lon_var(str(node.name), node.array): _maybe_report( ctx, - node.data_array.attrs, + node.array.attrs, LON_UNITS, LON_UNITS_ALIASES, LON_NAME, diff --git a/xrlint/plugins/core/rules/no_empty_attrs.py b/xrlint/plugins/core/rules/no_empty_attrs.py index 7405c8e..2ea0613 100644 --- a/xrlint/plugins/core/rules/no_empty_attrs.py +++ b/xrlint/plugins/core/rules/no_empty_attrs.py @@ -11,7 +11,7 @@ description="Every dataset element should have metadata that describes it.", ) class NoEmptyAttrs(RuleOp): - def attrs(self, ctx: RuleContext, node: AttrsNode): + def validate_attrs(self, ctx: RuleContext, node: AttrsNode): if not node.attrs: ctx.report( "Missing metadata, attributes are empty.", diff --git a/xrlint/plugins/core/rules/no_empty_chunks.py b/xrlint/plugins/core/rules/no_empty_chunks.py index 279702d..e89c975 100644 --- a/xrlint/plugins/core/rules/no_empty_chunks.py +++ b/xrlint/plugins/core/rules/no_empty_chunks.py @@ -1,4 +1,4 @@ -from xrlint.node import DataArrayNode +from xrlint.node import VariableNode from xrlint.plugins.core.plugin import plugin from xrlint.rule import RuleContext, RuleExit, RuleOp @@ -17,17 +17,17 @@ ), ) class NoEmptyChunks(RuleOp): - def validate_dataset(self, ctx: RuleContext, node: DataArrayNode): + def validate_dataset(self, ctx: RuleContext, node: VariableNode): source = ctx.dataset.encoding.get("source") is_zarr = isinstance(source, str) and source.endswith(".zarr") if not is_zarr: # if not a Zarr, no need to check further raise RuleExit - def validate_data_array(self, ctx: RuleContext, node: DataArrayNode): + def validate_variable(self, ctx: RuleContext, node: VariableNode): if ( - "write_empty_chunks" not in node.data_array.encoding - and "chunks" in node.data_array.encoding - and "_FillValue" in node.data_array.encoding + "write_empty_chunks" not in node.array.encoding + and "chunks" in node.array.encoding + and "_FillValue" in node.array.encoding ): ctx.report("Consider writing the dataset using 'write_empty_chunks=True'.") diff --git a/xrlint/plugins/core/rules/time_coordinate.py b/xrlint/plugins/core/rules/time_coordinate.py index f62b78c..b4bc337 100644 --- a/xrlint/plugins/core/rules/time_coordinate.py +++ b/xrlint/plugins/core/rules/time_coordinate.py @@ -1,6 +1,6 @@ import re -from xrlint.node import DataArrayNode +from xrlint.node import VariableNode from xrlint.plugins.core.plugin import plugin from xrlint.rule import RuleContext, RuleOp @@ -52,8 +52,8 @@ ), ) class TimeCoordinate(RuleOp): - def validate_data_array(self, ctx: RuleContext, node: DataArrayNode): - array = node.data_array + def validate_variable(self, ctx: RuleContext, node: VariableNode): + array = node.array attrs = array.attrs encoding = array.encoding diff --git a/xrlint/plugins/core/rules/var_desc.py b/xrlint/plugins/core/rules/var_desc.py index 04139d8..3bde393 100644 --- a/xrlint/plugins/core/rules/var_desc.py +++ b/xrlint/plugins/core/rules/var_desc.py @@ -1,4 +1,4 @@ -from xrlint.node import DataArrayNode +from xrlint.node import VariableNode from xrlint.plugins.core.plugin import plugin from xrlint.rule import RuleContext, RuleOp from xrlint.util.schema import schema @@ -37,12 +37,12 @@ class VarDesc(RuleOp): def __init__(self, attrs: list[str] | None = None): self._attrs = attrs if attrs is not None else DEFAULT_ATTRS - def validate_data_array(self, ctx: RuleContext, node: DataArrayNode): + def validate_variable(self, ctx: RuleContext, node: VariableNode): if node.name not in ctx.dataset.data_vars: # This rule applies to data variables only return - var_attrs = node.data_array.attrs + var_attrs = node.array.attrs for attr_name in self._attrs: if attr_name not in var_attrs: ctx.report(f"Missing attribute {attr_name!r}.") diff --git a/xrlint/plugins/core/rules/var_flags.py b/xrlint/plugins/core/rules/var_flags.py index 8bc0a9a..b198964 100644 --- a/xrlint/plugins/core/rules/var_flags.py +++ b/xrlint/plugins/core/rules/var_flags.py @@ -2,7 +2,7 @@ import numpy as np -from xrlint.node import DataArrayNode +from xrlint.node import VariableNode from xrlint.plugins.core.plugin import plugin from xrlint.rule import RuleContext, RuleOp @@ -22,10 +22,10 @@ docs_url="https://cfconventions.org/cf-conventions/cf-conventions.html#flags", ) class VarFlags(RuleOp): - def validate_data_array(self, ctx: RuleContext, node: DataArrayNode): - flag_values = node.data_array.attrs.get(FLAG_VALUES) - flag_masks = node.data_array.attrs.get(FLAG_MASKS) - flag_meanings = node.data_array.attrs.get(FLAG_MEANINGS) + def validate_variable(self, ctx: RuleContext, node: VariableNode): + flag_values = node.array.attrs.get(FLAG_VALUES) + flag_masks = node.array.attrs.get(FLAG_MASKS) + flag_meanings = node.array.attrs.get(FLAG_MEANINGS) has_values = flag_values is not None has_masks = flag_masks is not None @@ -60,7 +60,7 @@ def validate_data_array(self, ctx: RuleContext, node: DataArrayNode): if has_values and has_masks: _validate_variable( ctx, - node.data_array.dtype, + node.array.dtype, ) diff --git a/xrlint/plugins/core/rules/var_units.py b/xrlint/plugins/core/rules/var_units.py index eea98cc..7113f36 100644 --- a/xrlint/plugins/core/rules/var_units.py +++ b/xrlint/plugins/core/rules/var_units.py @@ -1,4 +1,4 @@ -from xrlint.node import DataArrayNode +from xrlint.node import VariableNode from xrlint.plugins.core.plugin import plugin from xrlint.rule import RuleContext, RuleOp @@ -11,8 +11,8 @@ docs_url="https://cfconventions.org/cf-conventions/cf-conventions.html#units", ) class VarUnits(RuleOp): - def validate_data_array(self, ctx: RuleContext, node: DataArrayNode): - data_array = node.data_array + def validate_variable(self, ctx: RuleContext, node: VariableNode): + data_array = node.array units = data_array.attrs.get("units") if units is None: if "grid_mapping_name" not in data_array.attrs: diff --git a/xrlint/plugins/xcube/rules/cube_dims_order.py b/xrlint/plugins/xcube/rules/cube_dims_order.py index 17ac8d7..dc87e50 100644 --- a/xrlint/plugins/xcube/rules/cube_dims_order.py +++ b/xrlint/plugins/xcube/rules/cube_dims_order.py @@ -1,4 +1,4 @@ -from xrlint.node import DataArrayNode +from xrlint.node import VariableNode from xrlint.plugins.xcube.constants import LAT_NAME, LON_NAME, TIME_NAME, X_NAME, Y_NAME from xrlint.plugins.xcube.plugin import plugin from xrlint.rule import RuleContext, RuleOp @@ -17,10 +17,10 @@ ), ) class CubeDimsOrder(RuleOp): - def validate_data_array(self, ctx: RuleContext, node: DataArrayNode): + def validate_variable(self, ctx: RuleContext, node: VariableNode): if node.in_data_vars(): - dims = list(node.data_array.dims) - indexes = {d: i for i, d in enumerate(node.data_array.dims)} + dims = list(node.array.dims) + indexes = {d: i for i, d in enumerate(node.array.dims)} yx_names = None if X_NAME in indexes and Y_NAME in indexes: diff --git a/xrlint/plugins/xcube/rules/data_var_colors.py b/xrlint/plugins/xcube/rules/data_var_colors.py index b034286..2435fd5 100644 --- a/xrlint/plugins/xcube/rules/data_var_colors.py +++ b/xrlint/plugins/xcube/rules/data_var_colors.py @@ -1,4 +1,4 @@ -from xrlint.node import DataArrayNode +from xrlint.node import VariableNode from xrlint.plugins.xcube.plugin import plugin from xrlint.plugins.xcube.util import is_spatial_var from xrlint.rule import RuleContext, RuleOp @@ -16,8 +16,8 @@ ), ) class DataVarColors(RuleOp): - def validate_data_array(self, ctx: RuleContext, node: DataArrayNode): - array = node.data_array + def validate_variable(self, ctx: RuleContext, node: VariableNode): + array = node.array if not node.in_data_vars() or not is_spatial_var(array): return attrs = array.attrs diff --git a/xrlint/plugins/xcube/rules/increasing_time.py b/xrlint/plugins/xcube/rules/increasing_time.py index 9e84edd..e48073c 100644 --- a/xrlint/plugins/xcube/rules/increasing_time.py +++ b/xrlint/plugins/xcube/rules/increasing_time.py @@ -1,6 +1,6 @@ import numpy as np -from xrlint.node import DataArrayNode +from xrlint.node import VariableNode from xrlint.plugins.xcube.plugin import plugin from xrlint.rule import RuleContext, RuleExit, RuleOp from xrlint.util.formatting import format_count, format_seq @@ -16,8 +16,8 @@ ), ) class IncreasingTime(RuleOp): - def validate_data_array(self, ctx: RuleContext, node: DataArrayNode): - array = node.data_array + def validate_variable(self, ctx: RuleContext, node: VariableNode): + array = node.array if node.in_coords() and node.name == "time" and array.dims == ("time",): diff_array: np.ndarray = array.diff("time").values if not np.count_nonzero(diff_array > 0) == diff_array.size: diff --git a/xrlint/plugins/xcube/rules/no_chunked_coords.py b/xrlint/plugins/xcube/rules/no_chunked_coords.py index ce69b6b..9f577f0 100644 --- a/xrlint/plugins/xcube/rules/no_chunked_coords.py +++ b/xrlint/plugins/xcube/rules/no_chunked_coords.py @@ -1,6 +1,6 @@ import math -from xrlint.node import DataArrayNode +from xrlint.node import VariableNode from xrlint.plugins.xcube.plugin import plugin from xrlint.rule import RuleContext, RuleOp from xrlint.util.schema import schema @@ -36,13 +36,13 @@ class NoChunkedCoords(RuleOp): def __init__(self, limit: int = DEFAULT_LIMIT): self.limit = limit - def validate_data_array(self, ctx: RuleContext, node: DataArrayNode): - if node.name not in ctx.dataset.coords or node.data_array.ndim != 1: + def validate_variable(self, ctx: RuleContext, node: VariableNode): + if node.name not in ctx.dataset.coords or node.array.ndim != 1: return - chunks = node.data_array.encoding.get("chunks") + chunks = node.array.encoding.get("chunks") if isinstance(chunks, (list, tuple)) and len(chunks) == 1: - num_chunks = math.ceil(node.data_array.size / chunks[0]) + num_chunks = math.ceil(node.array.size / chunks[0]) if num_chunks > self.limit: ctx.report( f"Number of chunks exceeds limit: {num_chunks} > {self.limit}.", diff --git a/xrlint/rule.py b/xrlint/rule.py index 5eb97f6..bacfcda 100644 --- a/xrlint/rule.py +++ b/xrlint/rule.py @@ -6,7 +6,7 @@ import xarray as xr from xrlint.constants import SEVERITY_ENUM, SEVERITY_ENUM_TEXT -from xrlint.node import AttrNode, AttrsNode, DataArrayNode, DatasetNode +from xrlint.node import AttrNode, AttrsNode, VariableNode, DatasetNode from xrlint.operation import Operation, OperationMeta from xrlint.result import Suggestion from xrlint.util.constructible import ValueConstructible @@ -86,7 +86,7 @@ def validate_dataset(self, context: RuleContext, node: DatasetNode) -> None: RuleExit: to exit rule logic and further node traversal """ - def validate_data_array(self, context: RuleContext, node: DataArrayNode) -> None: + def validate_variable(self, context: RuleContext, node: VariableNode) -> None: """Validate the given data array (variable) node. Args: @@ -97,7 +97,7 @@ def validate_data_array(self, context: RuleContext, node: DataArrayNode) -> None RuleExit: to exit rule logic and further node traversal """ - def attrs(self, context: RuleContext, node: AttrsNode) -> None: + def validate_attrs(self, context: RuleContext, node: AttrsNode) -> None: """Validate the given attributes node. Args: