Fix: mask and scale problems

heptaflar · heptaflar · commit 60e82cfc4b39 · 2025-05-06T16:55:09.000+02:00
diff --git a/README.md b/README.md
@@ -21,7 +21,7 @@ processor. The processor is invoked from the command line. Typing
 will print a detailed usage message to the screen
 
     usage: kaleidoscope [-h]
-                        --product-type {esa-cci-oc,esa-scope-exchange,ghrsst,glorys}
+                        --source-type {esa-cci-oc,esa-scope-exchange,ghrsst,glorys}
                         --selector SELECTOR
                         [--engine-reader {h5netcdf,netcdf4,zarr}]
                         [--engine-writer {h5netcdf,netcdf4,zarr}]
@@ -40,8 +40,8 @@ will print a detailed usage message to the screen
     
     options:
       -h, --help            show this help message and exit
-      --product-type {esa-cci-oc,esa-scope-exchange,ghrsst,glorys}
-                            the product type. (default: None)
+      --source-type {esa-cci-oc,esa-scope-exchange,ghrsst,glorys}
+                            the source type. (default: None)
       --selector SELECTOR   the Monte Carlo stream selector. An integral number
                             which must not be negative. (default: None)
       --engine-reader {h5netcdf,netcdf4,zarr}
@@ -75,7 +75,7 @@ will print a detailed usage message to the screen
 
 To invoke the processor from the terminal, for instance, type 
 
-    kaleidoscope --product-type ghrsst --selector 17 in.nc out.nc
+    kaleidoscope --source-type ghrsst --selector 17 in.nc out.nc
 
 which normally will log information to the terminal, e.g.,
 
@@ -86,7 +86,7 @@ which normally will log information to the terminal, e.g.,
     2025-04-30T09:42:11.928000Z <node> kaleidoscope 2025.1.0 [76069] [I] config: mode = multithreading
     2025-04-30T09:42:11.928000Z <node> kaleidoscope 2025.1.0 [76069] [I] config: processor_name = kaleidoscope
     2025-04-30T09:42:11.928000Z <node> kaleidoscope 2025.1.0 [76069] [I] config: processor_version = 2025.1.0
-    2025-04-30T09:42:11.928000Z <node> kaleidoscope 2025.1.0 [76069] [I] config: product_type = ghrsst
+    2025-04-30T09:42:11.928000Z <node> kaleidoscope 2025.1.0 [76069] [I] config: source_type = ghrsst
     2025-04-30T09:42:11.928000Z <node> kaleidoscope 2025.1.0 [76069] [I] config: progress = False
     2025-04-30T09:42:11.928000Z <node> kaleidoscope 2025.1.0 [76069] [I] config: selector = 17
     2025-04-30T09:42:11.928000Z <node> kaleidoscope 2025.1.0 [76069] [I] config: source_file = in.nc
diff --git a/bin/kaleidoscope-esa-cci-oc b/bin/kaleidoscope-esa-cci-oc
@@ -8,13 +8,18 @@ set -e
 #
 #  ./kaleidoscope-esa-cci-oc <file>
 #
-for selector in 001 002 003 004 005 006 007 008 009 010; do
+for selector in 000 001 002 003 004 005 006 007 008 009 010; do
   echo "$(tput setaf 2)$(date -u "+%Y-%m-%dT%H:%M:%S") [INFO] Selector ${selector} ...$(tput sgr0)"
+  source_file="${1}"
+  target_file="${1%.nc}"."${selector}".nc
+  if [ -f "${target_file}" ]; then
+    continue
+  fi
   kaleidoscope \
     --selector ${selector} \
-    --product-type esa-cci-oc \
+    --source-type esa-cci-oc \
     --log-level warning \
     --progress \
-    "${1}" \
-    "${1%.nc}"."${selector}".nc
+    "${source_file}" \
+    "${target_file}"
 done
diff --git a/bin/kaleidoscope-esa-scope-ex b/bin/kaleidoscope-esa-scope-ex
@@ -6,15 +6,20 @@
 set -e
 ## Produces Monte Carlo variants of the ESA SCOPE Exchange product.
 #
-#  ./kaleidoscope-esa-scope-ex
+#  ./kaleidoscope-esa-scope-ex <file>
 #
-for selector in 001 002 003 004 005 006 007 008 009 010; do
+for selector in 000 001 002 003 004 005 006 007 008 009 010; do
   echo "$(tput setaf 2)$(date -u "+%Y-%m-%dT%H:%M:%S") [INFO] Selector ${selector} ...$(tput sgr0)"
+  source_file="${1}"
+  target_file="${1%.nc}"."${selector}".nc
+  if [ -f "${target_file}" ]; then
+    continue
+  fi
   kaleidoscope \
     --selector ${selector} \
-    --product-type esa-scope-exchange \
+    --source-type esa-scope-exchange \
     --log-level warning \
     --progress \
-    Ford_et_al_UExP-FNN-U_physics_carbonatesystem_ESASCOPE_v5.nc \
-    Ford_et_al_UExP-FNN-U_physics_carbonatesystem_ESASCOPE_v5.${selector}.nc
+    "${source_file}" \
+    "${target_file}"
 done
diff --git a/bin/kaleidoscope-ghrsst b/bin/kaleidoscope-ghrsst
@@ -8,13 +8,18 @@ set -e
 #
 #  ./kaleidoscope-ghrsst <file>
 #
-for selector in 001 002 003 004 005 006 007 008 009 010; do
+for selector in 000 001 002 003 004 005 006 007 008 009 010; do
   echo "$(tput setaf 2)$(date -u "+%Y-%m-%dT%H:%M:%S") [INFO] Selector ${selector} ...$(tput sgr0)"
+  source_file="${1}"
+  target_file="${1%.nc}"."${selector}".nc
+  if [ -f "${target_file}" ]; then
+    continue
+  fi
   kaleidoscope \
     --selector ${selector} \
-    --product-type ghrsst \
+    --source-type ghrsst \
     --log-level warning \
     --progress \
-    "${1}" \
-    "${1%.nc}"."${selector}".nc
+    "${source_file}" \
+    "${target_file}"
 done
diff --git a/bin/kaleidoscope-glorys b/bin/kaleidoscope-glorys
@@ -8,13 +8,18 @@ set -e
 #
 #  ./kaleidoscope-glorys <file>
 #
-for selector in 001 002 003 004 005 006 007 008 009 010; do
+for selector in 000 001 002 003 004 005 006 007 008 009 010; do
   echo "$(tput setaf 2)$(date -u "+%Y-%m-%dT%H:%M:%S") [INFO] Selector ${selector} ...$(tput sgr0)"
+  source_file="${1}"
+  target_file="${1%.nc}"."${selector}".nc
+  if [ -f "${target_file}" ]; then
+    continue
+  fi
   kaleidoscope \
     --selector ${selector} \
-    --product-type esa-cci-oc \
+    --source-type glorys \
     --log-level warning \
     --progress \
-    "${1}" \
-    "${1%.nc}"."${selector}".nc
+    "${source_file}" \
+    "${target_file}"
 done
diff --git a/bin/kaleidoscope-run b/bin/kaleidoscope-run
@@ -0,0 +1,26 @@
+#!/usr/bin/env bash
+#
+# Copyright (c) Brockmann Consult GmbH, 2025
+# License: MIT
+#
+set -e
+## Produces Monte Carlo variants of the GLORYS product.
+#
+#  ./kaleidoscope-run <source-type> <file>
+#
+for selector in 000 001 002 003 004 005 006 007 008 009 010; do
+  echo "$(tput setaf 2)$(date -u "+%Y-%m-%dT%H:%M:%S") [INFO] Selector ${selector} ...$(tput sgr0)"
+  source_type="${1}"
+  source_file="${2}"
+  target_file="${2%.nc}"."${selector}".nc
+  if [ -f "${target_file}" ]; then
+    continue
+  fi
+  kaleidoscope \
+    --selector ${selector} \
+    --source-type "${source_type}" \
+    --log-level warning \
+    --progress \
+    "${source_file}" \
+    "${target_file}"
+done
diff --git a/kaleidoscope/algorithms/codec.py b/kaleidoscope/algorithms/codec.py
@@ -14,91 +14,10 @@
 from ..interface.algorithm import BlockAlgorithm
 
 
-class Encode(BlockAlgorithm):
-    """
-    The algorithm to encode data according to CF conventions.
-    """
-
-    def __init__(self, dtype: np.dtype, m: int):
-        """
-        Creates a new algorithm instance.
-
-        :param dtype: The result data type.
-        :param m: The number of input data dimensions.
-        """
-        super().__init__(dtype, m, m)
-
-    @override
-    def chunks(self, *inputs: da.Array) -> tuple[int, ...] | None:
-        return None
-
-    @property
-    @override
-    def created_axes(self) -> list[int] | None:
-        return None
-
-    @property
-    @override
-    def dropped_axes(self) -> list[int]:
-        return []
-
-    # noinspection PyMethodMayBeStatic
-    def encode(
-        self,
-        x: np.ndarray,
-        *,
-        add_offset: Any = None,
-        scale_factor: Any = None,
-        fill_value: Any = None,
-        valid_min: Any = None,
-        valid_max: Any = None,
-    ) -> np.ndarray:
-        """
-        Encodes data.
-
-        :param x: The data.
-        :param add_offset: The add-offset.
-        :param scale_factor: The scale factor.
-        :param fill_value: The fill value.
-        :param valid_min: The valid minimum.
-        :param valid_max: The valid maximum.
-        :return: The encoded data.
-        """
-        if (
-            fill_value is None
-            and add_offset is None
-            and scale_factor is None
-            and valid_min is None
-            and valid_max is None
-        ):
-            y = x
-        else:
-            y = x.astype(np.double)
-            if add_offset is not None:
-                y = y - add_offset
-            if scale_factor is not None:
-                y = y / scale_factor
-            if valid_max is not None:
-                y[y > valid_max] = valid_max
-            if valid_min is not None:
-                y[y < valid_min] = valid_min
-            if fill_value is not None:
-                y[np.isnan(x)] = fill_value
-        return y
-
-    compute_block = encode
-
-    @property
-    @override
-    def name(self) -> str:
-        return "encode"
-
-
 class Decode(BlockAlgorithm):
     """
     The algorithm to decode data according to CF conventions.
     """
-
     def __init__(self, dtype: np.dtype, m: int):
         """
         Creates a new algorithm instance.
diff --git a/kaleidoscope/config/config.random.json b/kaleidoscope/config/config.random.json
@@ -160,7 +160,11 @@
   "glorys": {
     "so": {
       "uncertainty": 0.1,
-      "distribution": "normal"
+      "distribution": "normal",
+      "clip": [
+        0.0,
+        41.73711352050302
+      ]
     }
   }
 }
diff --git a/kaleidoscope/config/config.yml b/kaleidoscope/config/config.yml
@@ -1,8 +1,8 @@
 #  Copyright (c) Brockmann Consult GmbH, 2025
 #  License: MIT
 
-## No default product type.
-product_type:
+## No default source type.
+source_type:
 
 ## The default selector.
 selector: 0
diff --git a/kaleidoscope/operators/randomizeop.py b/kaleidoscope/operators/randomizeop.py
@@ -16,7 +16,6 @@
 from xarray import Dataset
 
 from ..algorithms.codec import Decode
-from ..algorithms.codec import Encode
 from ..algorithms.randomize import Randomize
 from ..generators import DefaultGenerator
 from ..interface.logging import Logging
@@ -37,26 +36,13 @@ def _hash(name: str) -> int:
 
 
 def _decode(
-    x: da.Array, a: dict[str:Any], dtype: np.dtype = np.single
+    x: da.Array, a: dict[str:Any], dtype: np.dtype = np.double
 ) -> da.Array:
     f = Decode(dtype, x.ndim)
     y = f.apply_to(
         x,
         add_offset=a.get("add_offset", None),
-        scale_factor=a.get("add_offset", None),
-        fill_value=a.get("_FillValue", None),
-        valid_min=a.get("valid_min", None),
-        valid_max=a.get("valid_max", None),
-    )
-    return y
-
-
-def _encode(x: da.Array, a: dict[str:Any], dtype: np.dtype) -> da.Array:
-    f = Encode(dtype, x.ndim)
-    y = f.apply_to(
-        x,
-        add_offset=a.get("add_offset", None),
-        scale_factor=a.get("add_offset", None),
+        scale_factor=a.get("scale_factor", None),
         fill_value=a.get("_FillValue", None),
         valid_min=a.get("valid_min", None),
         valid_max=a.get("valid_max", None),
@@ -95,20 +81,18 @@ def run(self, source: Dataset) -> Dataset:  # noqa: D102
             "tracking_id",
             source.attrs.get("uuid", self._args.source_file.stem),
         )
-        target = Dataset(
+        target: Dataset = Dataset(
             data_vars=source.data_vars,
             coords=source.coords,
             attrs=source.attrs,
         )
         config: dict[str : dict[str:Any]] = self.config.get(
-            self._args.product_type, {}
+            self._args.source_type, {}
         )
         for v, x in target.data_vars.items():
-            if v not in config:
+            if v not in config or self._args.selector == 0:
                 continue
-
             get_logger().info(f"starting graph for variable: {v}")
-
             a: dict[str:Any] = config[v]
             f = Randomize(
                 m=x.ndim,
@@ -144,31 +128,24 @@ def run(self, source: Dataset) -> Dataset:  # noqa: D102
                     _decode(b.data, b.attrs),
                     clip=a.get("clip", None),
                 )
-
+            if get_logger().is_enabled(Logging.DEBUG):
+                get_logger().debug(f"min:  {da.nanmin(z).compute() :.3f}")
+                get_logger().debug(f"max:  {da.nanmax(z).compute() :.3f}")
+                get_logger().debug(f"mean: {da.nanmean(z).compute() :.3f}")
+                get_logger().debug(f"std:  {da.nanstd(z).compute() :.3f}")
             target[v] = DataArray(
-                data=_encode(
-                    z,
-                    x.attrs,
-                    x.dtype,
-                ),
-                coords=x.coords,
-                dims=x.dims,
-                attrs=x.attrs,
+                data=z, coords=x.coords, dims=x.dims, attrs=x.attrs
+            )
+            # target[v].attrs.pop("valid_min", None)
+            # target[v].attrs.pop("valid_max", None)
+            target[v].attrs["dtype"] = x.dtype
+            target[v].attrs["actual_range"] = np.array(
+                [
+                    da.nanmin(z).compute(),
+                    da.nanmax(z).compute(),
+                ],
+                dtype=z.dtype,
             )
-            if "actual_range" in target[v].attrs:
-                target[v].attrs["actual_range"] = np.array(
-                    [
-                        da.nanmin(z).compute(),
-                        da.nanmax(z).compute(),
-                    ],
-                    dtype=x.dtype,
-                )
-
-            if get_logger().is_enabled(Logging.DEBUG):
-                get_logger().debug(f"min:  {da.nanmin(z).compute() :.6f}")
-                get_logger().debug(f"max:  {da.nanmax(z).compute() :.6f}")
-                get_logger().debug(f"mean: {da.nanmean(z).compute() :.6f}")
-                get_logger().debug(f"std:  {da.nanstd(z).compute() :.6f}")
             get_logger().info(f"finished graph for variable: {v}")
         return target
 
diff --git a/kaleidoscope/parser.py b/kaleidoscope/parser.py
@@ -65,11 +65,11 @@ def _add_arguments(parser):
     def _add_options(parser):
         """This method does not belong to public API."""
         parser.add_argument(
-            "--product-type",
-            help="the product type.",
+            "--source-type",
+            help="the source type.",
             choices=["esa-cci-oc", "esa-scope-exchange", "ghrsst", "glorys"],
             required=True,
-            dest="product_type",
+            dest="source_type",
         )
         parser.add_argument(
             "--selector",
diff --git a/kaleidoscope/processor.py b/kaleidoscope/processor.py
diff --git a/kaleidoscope/writer.py b/kaleidoscope/writer.py
diff --git a/test/kaleidoscope/test_reader.py b/test/kaleidoscope/test_reader.py
diff --git a/test/kaleidoscope/test_writer.py b/test/kaleidoscope/test_writer.py

Original file line number	Diff line number	Diff line change
`@@ -160,7 +160,11 @@`
`160`	`160`	`"glorys": {`
`161`	`161`	`"so": {`
`162`	`162`	`"uncertainty": 0.1,`
`163`		`- "distribution": "normal"`
	`163`	`+ "distribution": "normal",`
	`164`	`+ "clip": [`
	`165`	`+ 0.0,`
	`166`	`+ 41.73711352050302`
	`167`	`+ ]`
`164`	`168`	`}`
`165`	`169`	`}`
`166`	`170`	`}`