diff --git a/CHANGES.md b/CHANGES.md index 2cf7b1a..dee3b3b 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -3,10 +3,11 @@ ## Version 0.3.0 (in development) - Added more rules - - core rule "flags" - - core rule "lon-coordinate" - - core rule "lat-coordinate" - - core rule "time-coordinate" (#15) + - core/CF rule "flags" + - core/CF rule "lon-coordinate" + - core/CF rule "lat-coordinate" + - core/CF rule "time-coordinate" (#15) + - core rule "no-empty-chunks" - xcube rule "time-naming" (#15) - Fixed problem where referring to values in modules via diff --git a/docs/rule-ref.md b/docs/rule-ref.md index 6db23a8..d9aaf0b 100644 --- a/docs/rule-ref.md +++ b/docs/rule-ref.md @@ -22,7 +22,7 @@ Contained in: `all`-:material-lightning-bolt: `recommended`-:material-alert: Validate attributes 'flag_values', 'flag_masks' and 'flag_meanings' that make variables that contain flag values self describing. [:material-information-variant:](https://cfconventions.org/cf-conventions/cf-conventions.html#flags) -Contained in: `all`-:material-lightning-bolt: +Contained in: `all`-:material-lightning-bolt: `recommended`-:material-lightning-bolt: ### :material-bug: `grid-mappings` @@ -33,16 +33,16 @@ Contained in: `all`-:material-lightning-bolt: `recommended`-:material-lightning ### :material-bug: `lat-coordinate` Latitude coordinate should have standard units and standard names. -[More information.](https://cfconventions.org/cf-conventions/cf-conventions.html#latitude-coordinate) +[:material-information-variant:](https://cfconventions.org/cf-conventions/cf-conventions.html#latitude-coordinate) -Contained in: `all`-:material-lightning-bolt: +Contained in: `all`-:material-lightning-bolt: `recommended`-:material-lightning-bolt: ### :material-bug: `lon-coordinate` Longitude coordinate should have standard units and standard names. -[More information.](https://cfconventions.org/cf-conventions/cf-conventions.html#longitude-coordinate) +[:material-information-variant:](https://cfconventions.org/cf-conventions/cf-conventions.html#longitude-coordinate) -Contained in: `all`-:material-lightning-bolt: +Contained in: `all`-:material-lightning-bolt: `recommended`-:material-lightning-bolt: ### :material-lightbulb: `no-empty-attrs` @@ -50,10 +50,17 @@ Every dataset element should have metadata that describes it. Contained in: `all`-:material-lightning-bolt: `recommended`-:material-alert: +### :material-lightbulb: `no-empty-chunks` + +Empty chunks should not be encoded and written. The rule currently applies to Zarr format only. +[:material-information-variant:](https://docs.xarray.dev/en/stable/generated/xarray.Dataset.to_zarr.html#xarray-dataset-to-zarr) + +Contained in: `all`-:material-lightning-bolt: `recommended`-:material-alert: + ### :material-bug: `time-coordinate` Time coordinate (standard_name='time') should have unambiguous time units encoding. -[More information.](https://cfconventions.org/cf-conventions/cf-conventions.html#time-coordinate) +[:material-information-variant:](https://cfconventions.org/cf-conventions/cf-conventions.html#time-coordinate) Contained in: `all`-:material-lightning-bolt: `recommended`-:material-lightning-bolt: @@ -117,7 +124,7 @@ Contained in: `all`-:material-lightning-bolt: `recommended`-:material-lightning ### :material-bug: `time-naming` Time coordinate and dimension should be called 'time'. -[More information.](https://xcube.readthedocs.io/en/latest/cubespec.html#temporal-reference) +[:material-information-variant:](https://xcube.readthedocs.io/en/latest/cubespec.html#temporal-reference) Contained in: `all`-:material-lightning-bolt: `recommended`-:material-lightning-bolt: diff --git a/notebooks/mkdataset.py b/notebooks/mkdataset.py index 39b9a04..c15f0cf 100644 --- a/notebooks/mkdataset.py +++ b/notebooks/mkdataset.py @@ -13,18 +13,22 @@ def make_dataset() -> xr.Dataset: attrs=dict(title="SST-Climatology Subset"), coords={ "x": xr.DataArray( - np.linspace(-180, 180, nx), dims="x", attrs={ + np.linspace(-180, 180, nx), + dims="x", + attrs={ "standard_name": "longitude", "long_name": "longitude", - "units": "degrees_east" - } + "units": "degrees_east", + }, ), "y": xr.DataArray( - np.linspace(-90, 90, ny), dims="y", attrs={ + np.linspace(-90, 90, ny), + dims="y", + attrs={ "standard_name": "latitude", "long_name": "latitude", - "units": "degrees_north" - } + "units": "degrees_north", + }, ), "time": xr.DataArray( [365 * i for i in range(nt)], diff --git a/tests/plugins/core/rules/test_no_empty_chunks.py b/tests/plugins/core/rules/test_no_empty_chunks.py new file mode 100644 index 0000000..89b24d9 --- /dev/null +++ b/tests/plugins/core/rules/test_no_empty_chunks.py @@ -0,0 +1,41 @@ +import xarray as xr + +from xrlint.plugins.core.rules.no_empty_chunks import NoEmptyChunks +from xrlint.testing import RuleTest, RuleTester + +# valid, because it does not write empty chunks +valid_dataset_0 = xr.Dataset(attrs=dict(title="OC-Climatology")) +valid_dataset_0.encoding["source"] = "test.zarr" +valid_dataset_0["sst"] = xr.DataArray([273, 274, 272], dims="time") +valid_dataset_0["sst"].encoding["_FillValue"] = 0 +valid_dataset_0["sst"].encoding["chunks"] = [ + 1, +] +valid_dataset_0["sst"].encoding["write_empty_chunks"] = False +# valid, because it does not apply +valid_dataset_1 = valid_dataset_0.copy() +del valid_dataset_1.encoding["source"] +# valid, because it does not apply +valid_dataset_2 = valid_dataset_0.copy() +valid_dataset_2.encoding["source"] = "test.nc" +# valid, because it does not apply +valid_dataset_3 = valid_dataset_0.copy() +valid_dataset_3.sst.encoding["write_empty_chunks"] = True + +# valid, because it does not apply +invalid_dataset_0 = valid_dataset_0.copy() +del invalid_dataset_0.sst.encoding["write_empty_chunks"] + +NoEmptyChunksTest = RuleTester.define_test( + "no-empty-chunks", + NoEmptyChunks, + valid=[ + RuleTest(dataset=valid_dataset_0), + RuleTest(dataset=valid_dataset_1), + RuleTest(dataset=valid_dataset_2), + RuleTest(dataset=valid_dataset_3), + ], + invalid=[ + RuleTest(dataset=invalid_dataset_0), + ], +) diff --git a/tests/plugins/core/test_plugin.py b/tests/plugins/core/test_plugin.py index 5ab9d7a..0c7baed 100644 --- a/tests/plugins/core/test_plugin.py +++ b/tests/plugins/core/test_plugin.py @@ -16,6 +16,7 @@ def test_rules_complete(self): "lon-coordinate", "no-empty-attrs", "time-coordinate", + "no-empty-chunks", "var-units-attr", }, set(plugin.rules.keys()), diff --git a/tests/plugins/xcube/test_plugin.py b/tests/plugins/xcube/test_plugin.py index e1d272f..e0f4654 100644 --- a/tests/plugins/xcube/test_plugin.py +++ b/tests/plugins/xcube/test_plugin.py @@ -4,7 +4,6 @@ class ExportPluginTest(TestCase): - def test_rules_complete(self): plugin = export_plugin() self.assertEqual( diff --git a/xrlint/plugins/core/__init__.py b/xrlint/plugins/core/__init__.py index 0dedda9..2457614 100644 --- a/xrlint/plugins/core/__init__.py +++ b/xrlint/plugins/core/__init__.py @@ -19,6 +19,7 @@ def export_plugin() -> Plugin: "lat-coordinate": "error", "lon-coordinate": "error", "no-empty-attrs": "warn", + "no-empty-chunks": "warn", "time-coordinate": "error", "var-units-attr": "warn", }, diff --git a/xrlint/plugins/core/rules/flags.py b/xrlint/plugins/core/rules/flags.py index b15fae1..0138680 100644 --- a/xrlint/plugins/core/rules/flags.py +++ b/xrlint/plugins/core/rules/flags.py @@ -70,8 +70,7 @@ def _validate_flag_values( ) -> int | None: if not has_meanings: ctx.report( - f"Missing attribute {FLAG_MEANINGS!r} to explain" - f" attribute {FLAG_VALUES!r}" + f"Missing attribute {FLAG_MEANINGS!r} to explain attribute {FLAG_VALUES!r}" ) type_ok, flag_count = _check_values(flag_values) if not type_ok or flag_count is None: @@ -87,8 +86,7 @@ def _validate_flag_masks( ) -> int | None: if not has_meanings: ctx.report( - f"Missing attribute {FLAG_MEANINGS!r} to explain" - f" attribute {FLAG_MASKS!r}" + f"Missing attribute {FLAG_MEANINGS!r} to explain attribute {FLAG_MASKS!r}" ) type_ok, flag_masks_count = _check_values(flag_masks) if not type_ok or flag_masks_count is None: diff --git a/xrlint/plugins/core/rules/no_empty_chunks.py b/xrlint/plugins/core/rules/no_empty_chunks.py new file mode 100644 index 0000000..a4b7186 --- /dev/null +++ b/xrlint/plugins/core/rules/no_empty_chunks.py @@ -0,0 +1,33 @@ +from xrlint.node import DataArrayNode +from xrlint.plugins.core.rules import plugin +from xrlint.rule import RuleContext, RuleExit, RuleOp + + +@plugin.define_rule( + "no-empty-chunks", + version="1.0.0", + type="suggestion", + description=( + "Empty chunks should not be encoded and written." + " The rule currently applies to Zarr format only." + ), + docs_url=( + "https://docs.xarray.dev/en/stable/generated/xarray.Dataset.to_zarr.html" + "#xarray-dataset-to-zarr" + ), +) +class NoEmptyChunks(RuleOp): + def dataset(self, ctx: RuleContext, node: DataArrayNode): + source = ctx.dataset.encoding.get("source") + is_zarr = isinstance(source, str) and source.endswith(".zarr") + if not is_zarr: + # if not a Zarr, no need to check further + raise RuleExit + + def data_array(self, ctx: RuleContext, node: DataArrayNode): + if ( + "write_empty_chunks" not in node.data_array.encoding + and "chunks" in node.data_array.encoding + and "_FillValue" in node.data_array.encoding + ): + ctx.report("Consider writing the dataset using 'write_empty_chunks=True`.") diff --git a/xrlint/plugins/core/rules/time_coordinate.py b/xrlint/plugins/core/rules/time_coordinate.py index 008ee0a..6842ca9 100644 --- a/xrlint/plugins/core/rules/time_coordinate.py +++ b/xrlint/plugins/core/rules/time_coordinate.py @@ -15,13 +15,11 @@ " unambiguous time units encoding." ), docs_url=( - "https://cfconventions.org/cf-conventions/cf-conventions.html" - "#time-coordinate" + "https://cfconventions.org/cf-conventions/cf-conventions.html#time-coordinate" ), ) class TimeCoordinate(RuleOp): def data_array(self, ctx: RuleContext, node: DataArrayNode): - array = node.data_array attrs = array.attrs encoding = array.encoding