Update: total apsorption coefficient

heptaflar · heptaflar · commit 52b007073a42 · 2025-05-09T18:16:33.000+02:00
diff --git a/kaleidoscope/config/config.random.json b/kaleidoscope/config/config.random.json
@@ -38,72 +38,146 @@
     "adg_412": {
       "bias": "adg_412_bias",
       "rmsd": "adg_412_rmsd",
-      "distribution": "lognormal"
+      "distribution": "lognormal",
+      "clip": [
+        0.0,
+        10.0
+      ]
     },
     "adg_443": {
       "bias": "adg_443_bias",
       "rmsd": "adg_443_rmsd",
-      "distribution": "lognormal"
+      "distribution": "lognormal",
+      "clip": [
+        0.0,
+        10.0
+      ]
     },
     "adg_490": {
       "bias": "adg_490_bias",
       "rmsd": "adg_490_rmsd",
-      "distribution": "lognormal"
+      "distribution": "lognormal",
+      "clip": [
+        0.0,
+        10.0
+      ]
     },
     "adg_510": {
       "bias": "adg_510_bias",
       "rmsd": "adg_510_rmsd",
-      "distribution": "lognormal"
+      "distribution": "lognormal",
+      "clip": [
+        0.0,
+        10.0
+      ]
     },
     "adg_560": {
       "bias": "adg_560_bias",
       "rmsd": "adg_560_rmsd",
-      "distribution": "lognormal"
+      "distribution": "lognormal",
+      "clip": [
+        0.0,
+        10.0
+      ]
     },
     "adg_665": {
       "bias": "adg_665_bias",
       "rmsd": "adg_665_rmsd",
-      "distribution": "lognormal"
+      "distribution": "lognormal",
+      "clip": [
+        0.0,
+        10.0
+      ]
     },
     "aph_412": {
       "bias": "aph_412_bias",
       "rmsd": "aph_412_rmsd",
-      "distribution": "lognormal"
+      "distribution": "lognormal",
+      "clip": [
+        0.0,
+        10.0
+      ]
     },
     "aph_443": {
       "bias": "aph_443_bias",
       "rmsd": "aph_443_rmsd",
-      "distribution": "lognormal"
+      "distribution": "lognormal",
+      "clip": [
+        0.0,
+        10.0
+      ]
     },
     "aph_490": {
       "bias": "aph_490_bias",
       "rmsd": "aph_490_rmsd",
-      "distribution": "lognormal"
+      "distribution": "lognormal",
+      "clip": [
+        0.0,
+        10.0
+      ]
     },
     "aph_510": {
       "bias": "aph_510_bias",
       "rmsd": "aph_510_rmsd",
-      "distribution": "lognormal"
+      "distribution": "lognormal",
+      "clip": [
+        0.0,
+        10.0
+      ]
     },
     "aph_560": {
       "bias": "aph_560_bias",
       "rmsd": "aph_560_rmsd",
-      "distribution": "lognormal"
+      "distribution": "lognormal",
+      "clip": [
+        0.0,
+        10.0
+      ]
     },
     "aph_665": {
       "bias": "aph_665_bias",
       "rmsd": "aph_665_rmsd",
-      "distribution": "lognormal"
+      "distribution": "lognormal",
+      "clip": [
+        0.0,
+        10.0
+      ]
+    },
+    "atot_412": {
+      "total": ["aph_412", "adg_412"]
+    },
+    "atot_443": {
+      "total": ["aph_443", "adg_443"]
+    },
+    "atot_490": {
+      "total": ["aph_490", "adg_490"]
+    },
+    "atot_510": {
+      "total": ["aph_510", "adg_510"]
+    },
+    "atot_560": {
+      "total": ["aph_560", "adg_560"]
+    },
+    "atot_665": {
+      "total": ["aph_665", "adg_665"]
     },
     "kd_490": {
       "bias": "kd_490_bias",
       "rmsd": "kd_490_rmsd",
-      "distribution": "lognormal"
+      "distribution": "lognormal",
+      "clip": [
+        0.0,
+        10.0
+      ]
     },
     "chlor_a": {
       "bias": "chlor_a_log10_bias",
       "rmsd": "chlor_a_log10_rmsd",
-      "distribution": "chlorophyll"
+      "distribution": "chlorophyll",
+      "clip": [
+        0.0,
+        100.0
+      ]
     }
   },
   "esa-scope-exchange": {
diff --git a/kaleidoscope/operators/randomizeop.py b/kaleidoscope/operators/randomizeop.py
@@ -89,77 +89,25 @@ def run(self, source: Dataset) -> Dataset:  # noqa: D102
         :param source: The source dataset.
         :return: The result dataset.
         """
+        config: dict[str : dict[str:Any]] = self.config.get(
+            self._args.source_type, {}
+        )
         target: Dataset = Dataset(
             data_vars=source.data_vars,
             coords=source.coords,
             attrs=source.attrs,
         )
-        config: dict[str : dict[str:Any]] = self.config.get(
-            self._args.source_type, {}
-        )
         for v, x in target.data_vars.items():
             if v not in config or self._args.selector == 0:
                 continue
             get_logger().info(f"starting graph for variable: {v}")
-            s: list[int] = self.entropy(v, self.uuid)
-            a: dict[str:Any] = config[v]
-            f = Randomize(m=x.ndim, dist=a["distribution"], entropy=s)
-            if "uncertainty" in a:
-                u = (
-                    target[a["uncertainty"]]
-                    if isinstance(a["uncertainty"], str)
-                    else DataArray(
-                        data=da.full(
-                            x.shape, a["uncertainty"], chunks=x.chunks
-                        ),
-                        coords=x.coords,
-                        dims=x.dims,
-                        attrs={},
-                    )
-                )
-                z = f.apply_to(
-                    _decode(x.data, x.attrs),
-                    _decode(u.data, u.attrs),
-                    coverage=a.get("coverage", 1.0),
-                    relative=a.get("relative", False),
-                    clip=a.get("clip", None),
-                )
-            else:
-                b = target[a["bias"]]
-                r = target[a["rmsd"]]
-                z = f.apply_to(
-                    _decode(x.data, x.attrs),
-                    _decode(r.data, r.attrs),
-                    _decode(b.data, b.attrs),
-                    clip=a.get("clip", None),
-                )
-            target[v] = DataArray(
-                data=_encode(z, x.attrs, x.dtype),
-                coords=x.coords,
-                dims=x.dims,
-                attrs=x.attrs,
-            )
-            if "actual_range" in target[v].attrs:
-                target[v].attrs["actual_range"] = np.array(
-                    [
-                        da.nanmin(z).compute(),
-                        da.nanmax(z).compute(),
-                    ],
-                    dtype=z.dtype,
-                )
-            target[v].attrs["entropy"] = np.array(s, dtype=np.int64)
-            if get_logger().is_enabled(Logging.DEBUG):
-                get_logger().debug(f"entropy: {s}")
-                get_logger().debug(f"min:  {da.nanmin(z).compute() :.3f}")
-                get_logger().debug(f"max:  {da.nanmax(z).compute() :.3f}")
-                get_logger().debug(f"mean: {da.nanmean(z).compute() :.3f}")
-                get_logger().debug(f"std:  {da.nanstd(z).compute() :.3f}")
+            self.randomize(source, target, v, x, config[v])
             get_logger().info(f"finished graph for variable: {v}")
         return target
 
     @property
     def config(self) -> dict[str : dict[str:Any]]:
-        """Returns the product type configuration."""
+        """Returns the randomization configuration."""
         package = "kaleidoscope.config"
         name = "config.random.json"
         with resources.path(package, name) as resource:
@@ -187,6 +135,86 @@ def entropy(self, name: str, uuid: str, n: int = 4) -> list[int]:
         g = DefaultGenerator(Philox(seed))
         return [g.next() for _ in range(n)]
 
+    def randomize(
+        self,
+        source: Dataset,
+        target: Dataset,
+        v: str,
+        x: DataArray,
+        config: dict[str:Any],
+    ):
+        """
+        Creates the graph to randomize a variable.
+
+        :param source: The source dataset.
+        :param target: The target dataset.
+        :param v: The name of the variable.
+        :param x: The data of the variable.
+        :param config: The randomization configuration.
+        """
+        if "total" in config:
+            s: list[int] = []
+            z = _decode(x.data, x.attrs)
+            for ref in config["total"]:
+                a = _decode(target[ref].data, target[ref].attrs)
+                b = _decode(source[ref].data, source[ref].attrs)
+                z = z + (a - b)
+        elif "uncertainty" in config:
+            s: list[int] = self.entropy(v, self.uuid)
+            f = Randomize(m=x.ndim, dist=config["distribution"], entropy=s)
+            u = (
+                target[config["uncertainty"]]
+                if isinstance(config["uncertainty"], str)
+                else DataArray(
+                    data=da.full(
+                        x.shape, config["uncertainty"], chunks=x.chunks
+                    ),
+                    coords=x.coords,
+                    dims=x.dims,
+                    attrs={},
+                )
+            )
+            z = f.apply_to(
+                _decode(x.data, x.attrs),
+                _decode(u.data, u.attrs),
+                coverage=config.get("coverage", 1.0),
+                relative=config.get("relative", False),
+                clip=config.get("clip", None),
+            )
+        else:
+            s: list[int] = self.entropy(v, self.uuid)
+            f = Randomize(m=x.ndim, dist=config["distribution"], entropy=s)
+            b = target[config["bias"]]
+            r = target[config["rmsd"]]
+            z = f.apply_to(
+                _decode(x.data, x.attrs),
+                _decode(r.data, r.attrs),
+                _decode(b.data, b.attrs),
+                clip=config.get("clip", None),
+            )
+        target[v] = DataArray(
+            data=_encode(z, x.attrs, x.dtype),
+            coords=x.coords,
+            dims=x.dims,
+            attrs=x.attrs,
+        )
+        if "actual_range" in target[v].attrs:
+            target[v].attrs["actual_range"] = np.array(
+                [
+                    da.nanmin(z).compute(),
+                    da.nanmax(z).compute(),
+                ],
+                dtype=z.dtype,
+            )
+        if s:
+            target[v].attrs["entropy"] = np.array(s, dtype=np.int64)
+        if get_logger().is_enabled(Logging.DEBUG):
+            get_logger().debug(f"entropy: {s}")
+            get_logger().debug(f"min:  {da.nanmin(z).compute() :.3f}")
+            get_logger().debug(f"max:  {da.nanmax(z).compute() :.3f}")
+            get_logger().debug(f"mean: {da.nanmean(z).compute() :.3f}")
+            get_logger().debug(f"std:  {da.nanstd(z).compute() :.3f}")
+
     @property
     def uuid(self) -> str:
         """