Merge pull request #4 from bcdev/schema-to-md

forman · web-flow · commit 2886661cbbff · 2024-01-03T12:57:22.000+01:00
Schema to md
diff --git a/CONFIG.md b/CONFIG.md
@@ -0,0 +1,163 @@
+Configuration for the zappend tool.
+
+### `version`
+
+Configuration version.
+It's value is `1`.
+
+### `target_uri`
+
+Type _string_.
+The URI or local path of the target Zarr dataset. Must be a directory.
+
+### `target_storage_options`
+
+Type _object_.
+Options for the filesystem given by the URI of `target_uri`.
+
+### `slice_engine`
+
+Type _string_.
+The name of the engine to be used for opening contributing datasets. Refer to the `engine` argument of the function `xarray.open_dataset()`.
+
+### `slice_storage_options`
+
+Type _object_.
+Options for the filesystem given by the protocol of the URIs of contributing datasets.
+
+### `slice_polling`
+
+Defines how to poll for contributing datasets.
+Must be one of the following:
+* No polling, fail immediately if dataset is not available.
+  It's value is `false`.
+* Poll using default values.
+  It's value is `true`.
+* Type _object_.
+  Polling parameters.
+  * `interval`:
+    Type _number_.
+    Polling interval in seconds.
+    Defaults to `2`.
+  
+  * `timeout`:
+    Type _number_.
+    Polling timeout in seconds.
+    Defaults to `60`.
+  
+
+### `temp_dir`
+
+Type _string_.
+The URI or local path of the directory that will be used to temporarily store rollback information.
+
+### `temp_storage_options`
+
+Type _object_.
+Options for the filesystem given by the protocol of `temp_dir`.
+
+### `zarr_version`
+
+The Zarr version to be used.
+It's value is `2`.
+
+### `fixed_dims`
+
+Type _object_.
+Specifies the fixed dimensions of the target dataset. Keys are dimension names, values are dimension sizes.
+Object values are:
+
+Type _integer_.
+
+### `append_dim`
+
+Type _string_.
+The name of the variadic append dimension.
+Defaults to `"time"`.
+
+### `variables`
+
+Type _object_.
+Defines dimensions, encoding, and attributes for variables in the target dataset. Object property names refer to variable names. The special name `*` refers to all variables, which is useful for defining common values.
+Object values are:
+
+Type _object_.
+Variable metadata
+* `dims`:
+  Type _array_.
+  The names of the variable's dimensions in the given order. Each dimension must exist in contributing datasets.
+
+* `encoding`:
+  Type _object_.
+  Variable storage encoding. Settings given here overwrite the encoding settings of the first contributing dataset.
+  * `dtype`:
+    Storage data type
+    Must be one of `"int8", "uint8", "int16", "uint16", "int32", "uint32", "int64", "uint64", "float32", "float64"`.
+  
+  * `chunks`:
+    Storage chunking.
+    Must be one of the following:
+    * Type _array_.
+      Chunk sizes in the order of the dimensions.
+    * Disable chunking.
+      It's value is `null`.
+  
+  * `fill_value`:
+    Storage fill value.
+    Must be one of the following:
+    * Type _number_.
+      A number of type and unit of the given storage `dtype`.
+    * Not-a-number. Can be used only if storage `dtype` is `float32` or `float64`.
+      It's value is `"NaN"`.
+    * No fill value.
+      It's value is `null`.
+  
+  * `scale_factor`:
+    Type _number_.
+    Scale factor for computing the in-memory value: `memory_value = scale_factor * storage_value + add_offset`.
+  
+  * `add_offset`:
+    Type _number_.
+    Add offset for computing the in-memory value: `memory_value = scale_factor * storage_value + add_offset`.
+  
+  * `units`:
+    Type _string_.
+    Units of the storage data type if memory data type is date/time.
+  
+  * `calendar`:
+    Type _string_.
+    The calendar to be used if memory data type is date/time.
+  
+  * `compressor`:
+    Type _array_ | _null_.
+    Compressor. Set to `null` to disable data compression.
+    * `id`:
+      Type _string_.
+    
+    $`$id` are required.
+  
+  * `filters`:
+    Type _array_ | _null_.
+    Filters. Set to `null` to not use filters.
+  
+
+* `attrs`:
+  Type _object_.
+  Arbitrary variable metadata attributes.
+
+
+### `included_variables`
+
+Type _array_.
+Specifies the names of variables to be included in the target dataset. Defaults to all variables found in the first contributing dataset.
+
+### `excluded_variables`
+
+Type _array_.
+Specifies the names of individual variables to be excluded  from all contributing datasets.
+
+### `dry_run`
+
+Type _boolean_.
+If 'true', log only what would have been done, but don't apply any changes.
+Defaults to `false`.
diff --git a/tests/test_cli.py b/tests/test_cli.py
@@ -16,14 +16,14 @@
       Create or update a Zarr dataset TARGET from slice datasets SLICES.
     
     Options:
-      -c, --config CONFIG  Configuration JSON or YAML file. If multiple are passed,
-                           they will be deeply merged into one.
-      -t, --target TARGET  Target Zarr dataset path or URI. Overrides the
-                           'target_uri' configuration field.
-      --dry-run            Run the tool without creating, changing, or deleting any
-                           files.
-      --help-config        Show configuration help and exit.
-      --help               Show this message and exit.
+      -c, --config CONFIG    Configuration JSON or YAML file. If multiple are
+                             passed, they will be deeply merged into one.
+      -t, --target TARGET    Target Zarr dataset path or URI. Overrides the
+                             'target_uri' configuration field.
+      --dry-run              Run the tool without creating, changing, or deleting
+                             any files.
+      --help-config json|md  Show configuration help and exit.
+      --help                 Show this message and exit.
     """
 
 # remove indent
@@ -45,9 +45,13 @@ def test_help(self):
     def test_help_config(self):
         runner = CliRunner()
         # noinspection PyTypeChecker
-        result = runner.invoke(zappend, ['--help-config'])
+        result = runner.invoke(zappend, ['--help-config', 'json'])
         self.assertEqual(0, result.exit_code)
-        self.assertIn("Configuration JSON schema:", result.output)
+        self.assertIn('"target_uri": {', result.output)
+        # noinspection PyTypeChecker
+        result = runner.invoke(zappend, ['--help-config', 'md'])
+        self.assertEqual(0, result.exit_code)
+        self.assertIn('### `target_uri`', result.output)
 
     def test_no_slices(self):
         runner = CliRunner()
diff --git a/tests/test_config.py b/tests/test_config.py
@@ -10,6 +10,8 @@
 import yaml
 
 from zappend.config import CONFIG_V1_SCHEMA
+from zappend.config import schema_to_json
+from zappend.config import schema_to_md
 from zappend.config import merge_configs
 from zappend.config import normalize_config
 from zappend.config import validate_config
@@ -245,3 +247,15 @@ def test_merge_config(self):
         self.assertEqual({"a": {"b": 3, "c": 4}},
                          merge_configs({"a": {"b": 2, "c": 4}},
                                        {"a": {"b": 3}}))
+
+    def test_schema_to_json(self):
+        # Smoke test is sufficient here
+        text = schema_to_json()
+        self.assertIsInstance(text, str)
+        self.assertTrue(len(text) > 0)
+
+    def test_schema_to_md(self):
+        # Smoke test is sufficient here
+        text = schema_to_md()
+        self.assertIsInstance(text, str)
+        self.assertTrue(len(text) > 0)
diff --git a/zappend/cli.py b/zappend/cli.py
@@ -20,18 +20,20 @@
 @click.option("--dry-run", is_flag=True,
               help="Run the tool without creating, changing,"
                    " or deleting any files.")
-@click.option("--help-config", is_flag=True,
+@click.option("--help-config",
+              metavar="json|md",
+              type=click.Choice(["json", "md"]),
               help="Show configuration help and exit.")
 def zappend(slices: tuple[str, ...],
             config: tuple[str, ...],
             target: str | None,
             dry_run: bool,
-            help_config: bool):
+            help_config: str | None):
     """Create or update a Zarr dataset TARGET from slice datasets SLICES.
     """
 
     if help_config:
-        return _show_config_help()
+        return _show_config_help(help_config)
 
     if not slices:
         click.echo("No slice datasets given.")
@@ -45,12 +47,11 @@ def zappend(slices: tuple[str, ...],
         raise click.ClickException(f"{e}") from e
 
 
-def _show_config_help():
-    import json
-    from zappend.config import CONFIG_V1_SCHEMA
-    config_schema_json = json.dumps(CONFIG_V1_SCHEMA, indent=2)
-    print(f"Configuration JSON schema:\n")
-    print(config_schema_json)
+def _show_config_help(config_help_format: str):
+    from zappend.config import schema_to_json
+    from zappend.config import schema_to_md
+    to_text = schema_to_json if config_help_format == "json" else schema_to_md
+    print(to_text() + "\n")
 
 
 if __name__ == '__main__':
diff --git a/zappend/config.py b/zappend/config.py