bcdev · forman · Jan 24, 2025 · Jan 20, 2025 · Jan 20, 2025 · Jan 20, 2025
diff --git a/CHANGES.md b/CHANGES.md
@@ -2,7 +2,40 @@
 
 ## Version 0.4.0 (in development)
 
-
+- New xcube multi-level dataset rules:
+  - `ml-dataset-meta`: verifies that a meta info file exists and is consistent
+  - `ml-dataset-xy`: verifies that the levels have expected spatial resolutions
+  - `ml-dataset-time`: verifies that the levels have expected time dimension, if any
+- Now supporting xcube multi-level datasets `*.levels`:
+  - Added xcube plugin processor `"xcube/multi-level-dataset"` that is used
+    inside the predefined xcube configurations "all" and "recommended".
+- Directories that are recognized by file patterns associated with a non-empty 
+  configuration object are no longer recursively
+  traversed.
+- Introduced method `Plugin.define_config` which defines a named plugin
+  configuration. It takes a name and a configuration object or list of 
+  configuration objects.
+- Changed the way how configuration is defined and exported from
+  Python configuration files:
+  - Renamed function that exports configuration from `export_configs` 
+    into `export_config`.
+  - The returned value should be a list of values that can be 
+    converted into configuration objects: mixed `Config` instances,
+    dictionary, or a name that refers to a named configuration of a plugin.
+- Node path names now contain the dataset index if a file path 
+  has been opened by a processor produced multiple 
+  datasets to validate.
+
+- Other changes:
+  - Changed type of `Plugin.configs` from `dict[str, Config]` to 
+    `dict[str, list[Config]]`.
+  - Inbuilt plugin rules now import their `plugin` instance from
+    `xrlint.plugins.<plugin>.plugin` module.
+  - `JsonSerializable` now recognizes `dataclass` instances and no longer
+    serializes property values that are also default values.
+  - Pinned zarr dependency to be >=2.18, <3 until test
+    `tests.plugins.xcube.processors.test_mldataset.MultiLevelDatasetProcessorTest`
+    is adjusted or fsspec's memory filesystem is updated.
 
 ## Version 0.3.0 (from 2025-01-20)
 

diff --git a/docs/config.md b/docs/config.md
@@ -43,7 +43,7 @@ Same using JSON:
 And as Python script:
 
 ```python
-def export_configs():
+def export_config():
     return [
       {"files": ["**/*.zarr", "**/*.nc"]},
       {

diff --git a/docs/rule-ref.md b/docs/rule-ref.md
@@ -114,6 +114,27 @@ Latitude and longitude coordinates and dimensions should be called 'lat' and 'lo
 
 Contained in:  `all`-:material-lightning-bolt: `recommended`-:material-lightning-bolt:
 
+### :material-lightbulb: `ml-dataset-meta`
+
+Multi-level datasets should provide '.zlevels' meta information file and if so, it should be consistent.
+[:material-information-variant:](https://xcube.readthedocs.io/en/latest/mldatasets.html#the-xcube-levels-format)
+
+Contained in:  `all`-:material-lightning-bolt: `recommended`-:material-lightning-bolt:
+
+### :material-bug: `ml-dataset-time`
+
+The `time` dimension of multi-level datasets should use a chunk size of 1. This allows for faster image tile generation for visualisation.
+[:material-information-variant:](https://xcube.readthedocs.io/en/latest/mldatasets.html#definition)
+
+Contained in:  `all`-:material-lightning-bolt: `recommended`-:material-alert:
+
+### :material-bug: `ml-dataset-xy`
+
+Multi-level dataset levels should provide spatial resolutions decreasing by powers of two.
+[:material-information-variant:](https://xcube.readthedocs.io/en/latest/mldatasets.html#definition)
+
+Contained in:  `all`-:material-lightning-bolt: `recommended`-:material-lightning-bolt:
+
 ### :material-bug: `single-grid-mapping`
 
 A single grid mapping shall be used for all spatial data variables of a datacube.

diff --git a/docs/todo.md b/docs/todo.md
@@ -14,10 +14,6 @@
 ## Desired
 
 - project logo
-- support validating xcube 'levels' format. Options:
-    - implement xarray backend so we can open them using `xr.open_dataset`
-      with `opener_options: {"engine": "xc-levels"}`.
-    - implement a `xrlint.processor.Processor` for that purpose.
 - add some more tests so we reach 99% coverage
 - support rule op args/kwargs schema validation
 - Support `RuleTest.expected`, it is currently unused

diff --git a/environment.yml b/environment.yml
@@ -20,7 +20,8 @@ dependencies:
   - requests-mock
   - ruff
   # Testing Datasets
+  - dask
   - pandas
   - netcdf4
   - numpy
-  - zarr
+  - zarr >=2.18,<3  # tests fail with zarr 3+
diff --git a/examples/plugin_config.py b/examples/plugin_config.py
@@ -3,27 +3,11 @@
 using the `Plugin` class and its `define_rule()` decorator method.
 """
 
-from xrlint.config import Config
 from xrlint.node import DatasetNode
 from xrlint.plugin import new_plugin
 from xrlint.rule import RuleContext, RuleOp
 
-plugin = new_plugin(
-    name="hello-plugin",
-    version="1.0.0",
-    configs={
-        # "configs" entries must be `Config` objects!
-        "recommended": Config.from_value(
-            {
-                "rules": {
-                    "hello/good-title": "warn",
-                    # Configure more rules here...
-                },
-            }
-        ),
-        # Add more configurations here...
-    },
-)
+plugin = new_plugin(name="hello-plugin", version="1.0.0")
 
 
 @plugin.define_rule("good-title")
@@ -42,7 +26,22 @@ def dataset(self, ctx: RuleContext, node: DatasetNode):
 # Define more rules here...
 
 
-def export_configs():
+plugin.define_config(
+    "recommended",
+    [
+        {
+            "rules": {
+                "hello/good-title": "warn",
+                # Configure more rules here...
+            },
+        }
+    ],
+)
+
+# Add more configurations here...
+
+
+def export_config():
     return [
         # Use "hello" plugin
         {

diff --git a/examples/virtual_plugin_config.py b/examples/virtual_plugin_config.py
@@ -22,7 +22,7 @@ def dataset(self, ctx: RuleContext, node: DatasetNode):
 # Define more rules here...
 
 
-def export_configs():
+def export_config():
     return [
         # Define and use "hello" plugin
         {
@@ -37,12 +37,14 @@ def export_configs():
                         # Add more rules here...
                     },
                     "configs": {
-                        "recommended": {
-                            "rules": {
-                                "hello/good-title": "warn",
-                                # Configure more rules here...
-                            },
-                        },
+                        "recommended": [
+                            {
+                                "rules": {
+                                    "hello/good-title": "warn",
+                                    # Configure more rules here...
+                                },
+                            }
+                        ],
                         # Add more configurations here...
                     },
                 },

diff --git a/mkruleref.py b/mkruleref.py
@@ -1,4 +1,5 @@
 from xrlint.plugin import Plugin
+from xrlint.rule import RuleConfig
 
 # for icons, see
 # https://squidfunk.github.io/mkdocs-material/reference/icons-emojis/
@@ -39,7 +40,7 @@ def write_rule_ref_page():
 
 
 def write_plugin_rules(stream, plugin: Plugin):
-    configs = plugin.configs
+    config_rules = get_plugin_rule_configs(plugin)
     for rule_id in sorted(plugin.rules.keys()):
         rule_meta = plugin.rules[rule_id].meta
         stream.write(
@@ -51,9 +52,8 @@ def write_plugin_rules(stream, plugin: Plugin):
         stream.write("\n\n")
         # List the predefined configurations that contain the rule
         stream.write("Contained in: ")
-        for config_id in sorted(configs.keys()):
-            config = configs[config_id]
-            rule_configs = config.rules or {}
+        for config_id in sorted(config_rules.keys()):
+            rule_configs = config_rules[config_id]
             rule_config = rule_configs.get(rule_id) or rule_configs.get(
                 f"{plugin.meta.name}/{rule_id}"
             )
@@ -62,5 +62,21 @@ def write_plugin_rules(stream, plugin: Plugin):
         stream.write("\n\n")
 
 
+def get_plugin_rule_configs(plugin):
+    configs = plugin.configs
+    config_rules: dict[str, dict[str, RuleConfig]] = {}
+    for config_name, config_list in configs.items():
+        # note, here we assume most plugins configure their rules
+        # in one dedicated config object only. However, this is not
+        # the general case as file patterns may be used to make the
+        # rules configurations specific.
+        rule_configs = {}
+        for config in config_list:
+            if config.rules:
+                rule_configs.update(config.rules)
+        config_rules[config_name] = rule_configs
+    return config_rules
+
+
 if __name__ == "__main__":
     write_rule_ref_page()
diff --git a/pyproject.toml b/pyproject.toml
@@ -73,10 +73,11 @@ dev = [
   "ruff",
   "twine",
   # Dataset testing
+  "dask",
   "netcdf4",
   "numpy",
   "pandas",
-  "zarr",
+  "zarr >=2.18,<3",
 ]
 doc = [
   "mkdocs",

diff --git a/tests/_linter/test_rulectx.py b/tests/_linter/test_rulectx.py
@@ -12,14 +12,15 @@ class RuleContextImplTest(TestCase):
     def test_defaults(self):
         config = Config()
         dataset = xr.Dataset()
-        context = RuleContextImpl(config, dataset, "./ds.zarr")
+        context = RuleContextImpl(config, dataset, "./ds.zarr", None)
         self.assertIs(config, context.config)
         self.assertIs(dataset, context.dataset)
         self.assertEqual({}, context.settings)
         self.assertEqual("./ds.zarr", context.file_path)
+        self.assertEqual(None, context.file_index)
 
     def test_report(self):
-        context = RuleContextImpl(Config(), xr.Dataset(), "./ds.zarr")
+        context = RuleContextImpl(Config(), xr.Dataset(), "./ds.zarr", None)
         with context.use_state(rule_id="no-xxx"):
             context.report(
                 "What the heck do you mean?",

diff --git a/tests/cli/configs/recommended.py b/tests/cli/configs/recommended.py
@@ -1,4 +1,4 @@
-def export_configs():
+def export_config():
     import xrlint.plugins.core
     import xrlint.plugins.xcube
 
@@ -10,8 +10,8 @@ def export_configs():
                 "xcube": xcube,
             }
         },
-        core.configs["recommended"],
-        xcube.configs["recommended"],
+        *core.configs["recommended"],
+        *xcube.configs["recommended"],
         {
             "rules": {
                 "dataset-title-attr": "error",

diff --git a/tests/cli/test_config.py b/tests/cli/test_config.py
@@ -34,7 +34,7 @@
 """
 
 py_text = """
-def export_configs():
+def export_config():
     return [
         {
             "name": "py-test",
@@ -117,13 +117,13 @@ def test_read_config_yaml_with_format_error(self):
                 read_config_list(config_path)
 
     def test_read_config_yaml_with_type_error(self):
-        with text_file("config.yaml", "prime: 97") as config_path:
+        with text_file("config.yaml", "97") as config_path:
             with pytest.raises(
                 ConfigError,
                 match=(
-                    "'config.yaml: configuration list must be of"
-                    " type ConfigList|list\\[Config|dict|str\\],"
-                    " but got dict'"
+                    r"config\.yaml\: config_list must be of"
+                    r" type ConfigList \| list\[Config \| dict \| str\],"
+                    r" but got int"
                 ),
             ):
                 read_config_list(config_path)
@@ -141,14 +141,14 @@ def test_read_config_py_no_export(self):
             with pytest.raises(
                 ConfigError,
                 match=(
-                    "config_1002.py: attribute 'export_configs'"
+                    "config_1002.py: attribute 'export_config'"
                     " not found in module 'config_1002'"
                 ),
             ):
                 read_config_list(config_path)
 
     def test_read_config_py_with_value_error(self):
-        py_code = "def export_configs():\n    raise ValueError('value is useless!')\n"
+        py_code = "def export_config():\n    raise ValueError('value is useless!')\n"
         with text_file(self.new_config_py(), py_code) as config_path:
             with pytest.raises(
                 ValueError,
@@ -157,7 +157,7 @@ def test_read_config_py_with_value_error(self):
                 read_config_list(config_path)
 
     def test_read_config_py_with_os_error(self):
-        py_code = "def export_configs():\n    raise OSError('where is my hat?')\n"
+        py_code = "def export_config():\n    raise OSError('where is my hat?')\n"
         with text_file(self.new_config_py(), py_code) as config_path:
             with pytest.raises(
                 ConfigError,
@@ -166,15 +166,15 @@ def test_read_config_py_with_os_error(self):
                 read_config_list(config_path)
 
     def test_read_config_py_with_invalid_config_list(self):
-        py_code = "def export_configs():\n    return 42\n"
+        py_code = "def export_config():\n    return 42\n"
         with text_file(self.new_config_py(), py_code) as config_path:
             with pytest.raises(
                 ConfigError,
                 match=(
-                    ".py: return value of export_configs\\(\\):"
-                    " configuration list must be of type"
-                    " ConfigList|list\\[Config|dict|str\\],"
-                    " but got int"
+                    r"\.py: return value of export_config\(\):"
+                    r" config_list must be of type"
+                    r" ConfigList \| list\[Config\ | dict \| str\],"
+                    r" but got int"
                 ),
             ):
                 read_config_list(config_path)
@@ -198,10 +198,10 @@ def test_read_config_yaml(self):
 
     def assert_ok(self, config_list: ConfigList):
         self.assertIsInstance(config_list, ConfigList)
-        self.assertEqual(4, len(config_list.configs))
+        self.assertEqual(7, len(config_list.configs))
         config = config_list.compute_config("test.zarr")
         self.assertIsInstance(config, Config)
-        self.assertEqual("<computed>", config.name)
+        self.assertEqual(None, config.name)
         self.assertIsInstance(config.plugins, dict)
         self.assertEqual({"xcube"}, set(config.plugins.keys()))
         self.assertIsInstance(config.rules, dict)

diff --git a/tests/cli/test_main.py b/tests/cli/test_main.py
@@ -191,7 +191,6 @@ def test_print_config_option(self):
             self.assertEqual(
                 (
                     "{\n"
-                    '  "name": "<computed>",\n'
                     '  "plugins": {\n'
                     '    "__core__": "xrlint.plugins.core:export_plugin"\n'
                     "  },\n"

diff --git a/tests/plugins/core/test_plugin.py b/tests/plugins/core/test_plugin.py
@@ -34,9 +34,9 @@ def test_configs_complete(self):
         all_rule_names = set(plugin.rules.keys())
         self.assertEqual(
             all_rule_names,
-            set(plugin.configs["all"].rules.keys()),
+            set(plugin.configs["all"][-1].rules.keys()),
         )
         self.assertEqual(
             all_rule_names,
-            set(plugin.configs["recommended"].rules.keys()),
+            set(plugin.configs["recommended"][-1].rules.keys()),
         )