Skip to content

Commit 60e82cf

Browse files
committed
Fix: mask and scale problems
1 parent d72e27e commit 60e82cf

File tree

15 files changed

+115
-169
lines changed

15 files changed

+115
-169
lines changed

README.md

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ processor. The processor is invoked from the command line. Typing
2121
will print a detailed usage message to the screen
2222

2323
usage: kaleidoscope [-h]
24-
--product-type {esa-cci-oc,esa-scope-exchange,ghrsst,glorys}
24+
--source-type {esa-cci-oc,esa-scope-exchange,ghrsst,glorys}
2525
--selector SELECTOR
2626
[--engine-reader {h5netcdf,netcdf4,zarr}]
2727
[--engine-writer {h5netcdf,netcdf4,zarr}]
@@ -40,8 +40,8 @@ will print a detailed usage message to the screen
4040

4141
options:
4242
-h, --help show this help message and exit
43-
--product-type {esa-cci-oc,esa-scope-exchange,ghrsst,glorys}
44-
the product type. (default: None)
43+
--source-type {esa-cci-oc,esa-scope-exchange,ghrsst,glorys}
44+
the source type. (default: None)
4545
--selector SELECTOR the Monte Carlo stream selector. An integral number
4646
which must not be negative. (default: None)
4747
--engine-reader {h5netcdf,netcdf4,zarr}
@@ -75,7 +75,7 @@ will print a detailed usage message to the screen
7575

7676
To invoke the processor from the terminal, for instance, type
7777

78-
kaleidoscope --product-type ghrsst --selector 17 in.nc out.nc
78+
kaleidoscope --source-type ghrsst --selector 17 in.nc out.nc
7979

8080
which normally will log information to the terminal, e.g.,
8181

@@ -86,7 +86,7 @@ which normally will log information to the terminal, e.g.,
8686
2025-04-30T09:42:11.928000Z <node> kaleidoscope 2025.1.0 [76069] [I] config: mode = multithreading
8787
2025-04-30T09:42:11.928000Z <node> kaleidoscope 2025.1.0 [76069] [I] config: processor_name = kaleidoscope
8888
2025-04-30T09:42:11.928000Z <node> kaleidoscope 2025.1.0 [76069] [I] config: processor_version = 2025.1.0
89-
2025-04-30T09:42:11.928000Z <node> kaleidoscope 2025.1.0 [76069] [I] config: product_type = ghrsst
89+
2025-04-30T09:42:11.928000Z <node> kaleidoscope 2025.1.0 [76069] [I] config: source_type = ghrsst
9090
2025-04-30T09:42:11.928000Z <node> kaleidoscope 2025.1.0 [76069] [I] config: progress = False
9191
2025-04-30T09:42:11.928000Z <node> kaleidoscope 2025.1.0 [76069] [I] config: selector = 17
9292
2025-04-30T09:42:11.928000Z <node> kaleidoscope 2025.1.0 [76069] [I] config: source_file = in.nc

bin/kaleidoscope-esa-cci-oc

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,18 @@ set -e
88
#
99
# ./kaleidoscope-esa-cci-oc <file>
1010
#
11-
for selector in 001 002 003 004 005 006 007 008 009 010; do
11+
for selector in 000 001 002 003 004 005 006 007 008 009 010; do
1212
echo "$(tput setaf 2)$(date -u "+%Y-%m-%dT%H:%M:%S") [INFO] Selector ${selector} ...$(tput sgr0)"
13+
source_file="${1}"
14+
target_file="${1%.nc}"."${selector}".nc
15+
if [ -f "${target_file}" ]; then
16+
continue
17+
fi
1318
kaleidoscope \
1419
--selector ${selector} \
15-
--product-type esa-cci-oc \
20+
--source-type esa-cci-oc \
1621
--log-level warning \
1722
--progress \
18-
"${1}" \
19-
"${1%.nc}"."${selector}".nc
23+
"${source_file}" \
24+
"${target_file}"
2025
done

bin/kaleidoscope-esa-scope-ex

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,15 +6,20 @@
66
set -e
77
## Produces Monte Carlo variants of the ESA SCOPE Exchange product.
88
#
9-
# ./kaleidoscope-esa-scope-ex
9+
# ./kaleidoscope-esa-scope-ex <file>
1010
#
11-
for selector in 001 002 003 004 005 006 007 008 009 010; do
11+
for selector in 000 001 002 003 004 005 006 007 008 009 010; do
1212
echo "$(tput setaf 2)$(date -u "+%Y-%m-%dT%H:%M:%S") [INFO] Selector ${selector} ...$(tput sgr0)"
13+
source_file="${1}"
14+
target_file="${1%.nc}"."${selector}".nc
15+
if [ -f "${target_file}" ]; then
16+
continue
17+
fi
1318
kaleidoscope \
1419
--selector ${selector} \
15-
--product-type esa-scope-exchange \
20+
--source-type esa-scope-exchange \
1621
--log-level warning \
1722
--progress \
18-
Ford_et_al_UExP-FNN-U_physics_carbonatesystem_ESASCOPE_v5.nc \
19-
Ford_et_al_UExP-FNN-U_physics_carbonatesystem_ESASCOPE_v5.${selector}.nc
23+
"${source_file}" \
24+
"${target_file}"
2025
done

bin/kaleidoscope-ghrsst

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,18 @@ set -e
88
#
99
# ./kaleidoscope-ghrsst <file>
1010
#
11-
for selector in 001 002 003 004 005 006 007 008 009 010; do
11+
for selector in 000 001 002 003 004 005 006 007 008 009 010; do
1212
echo "$(tput setaf 2)$(date -u "+%Y-%m-%dT%H:%M:%S") [INFO] Selector ${selector} ...$(tput sgr0)"
13+
source_file="${1}"
14+
target_file="${1%.nc}"."${selector}".nc
15+
if [ -f "${target_file}" ]; then
16+
continue
17+
fi
1318
kaleidoscope \
1419
--selector ${selector} \
15-
--product-type ghrsst \
20+
--source-type ghrsst \
1621
--log-level warning \
1722
--progress \
18-
"${1}" \
19-
"${1%.nc}"."${selector}".nc
23+
"${source_file}" \
24+
"${target_file}"
2025
done

bin/kaleidoscope-glorys

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,18 @@ set -e
88
#
99
# ./kaleidoscope-glorys <file>
1010
#
11-
for selector in 001 002 003 004 005 006 007 008 009 010; do
11+
for selector in 000 001 002 003 004 005 006 007 008 009 010; do
1212
echo "$(tput setaf 2)$(date -u "+%Y-%m-%dT%H:%M:%S") [INFO] Selector ${selector} ...$(tput sgr0)"
13+
source_file="${1}"
14+
target_file="${1%.nc}"."${selector}".nc
15+
if [ -f "${target_file}" ]; then
16+
continue
17+
fi
1318
kaleidoscope \
1419
--selector ${selector} \
15-
--product-type esa-cci-oc \
20+
--source-type glorys \
1621
--log-level warning \
1722
--progress \
18-
"${1}" \
19-
"${1%.nc}"."${selector}".nc
23+
"${source_file}" \
24+
"${target_file}"
2025
done

bin/kaleidoscope-run

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
#!/usr/bin/env bash
2+
#
3+
# Copyright (c) Brockmann Consult GmbH, 2025
4+
# License: MIT
5+
#
6+
set -e
7+
## Produces Monte Carlo variants of the GLORYS product.
8+
#
9+
# ./kaleidoscope-run <source-type> <file>
10+
#
11+
for selector in 000 001 002 003 004 005 006 007 008 009 010; do
12+
echo "$(tput setaf 2)$(date -u "+%Y-%m-%dT%H:%M:%S") [INFO] Selector ${selector} ...$(tput sgr0)"
13+
source_type="${1}"
14+
source_file="${2}"
15+
target_file="${2%.nc}"."${selector}".nc
16+
if [ -f "${target_file}" ]; then
17+
continue
18+
fi
19+
kaleidoscope \
20+
--selector ${selector} \
21+
--source-type "${source_type}" \
22+
--log-level warning \
23+
--progress \
24+
"${source_file}" \
25+
"${target_file}"
26+
done

kaleidoscope/algorithms/codec.py

Lines changed: 0 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -14,91 +14,10 @@
1414
from ..interface.algorithm import BlockAlgorithm
1515

1616

17-
class Encode(BlockAlgorithm):
18-
"""
19-
The algorithm to encode data according to CF conventions.
20-
"""
21-
22-
def __init__(self, dtype: np.dtype, m: int):
23-
"""
24-
Creates a new algorithm instance.
25-
26-
:param dtype: The result data type.
27-
:param m: The number of input data dimensions.
28-
"""
29-
super().__init__(dtype, m, m)
30-
31-
@override
32-
def chunks(self, *inputs: da.Array) -> tuple[int, ...] | None:
33-
return None
34-
35-
@property
36-
@override
37-
def created_axes(self) -> list[int] | None:
38-
return None
39-
40-
@property
41-
@override
42-
def dropped_axes(self) -> list[int]:
43-
return []
44-
45-
# noinspection PyMethodMayBeStatic
46-
def encode(
47-
self,
48-
x: np.ndarray,
49-
*,
50-
add_offset: Any = None,
51-
scale_factor: Any = None,
52-
fill_value: Any = None,
53-
valid_min: Any = None,
54-
valid_max: Any = None,
55-
) -> np.ndarray:
56-
"""
57-
Encodes data.
58-
59-
:param x: The data.
60-
:param add_offset: The add-offset.
61-
:param scale_factor: The scale factor.
62-
:param fill_value: The fill value.
63-
:param valid_min: The valid minimum.
64-
:param valid_max: The valid maximum.
65-
:return: The encoded data.
66-
"""
67-
if (
68-
fill_value is None
69-
and add_offset is None
70-
and scale_factor is None
71-
and valid_min is None
72-
and valid_max is None
73-
):
74-
y = x
75-
else:
76-
y = x.astype(np.double)
77-
if add_offset is not None:
78-
y = y - add_offset
79-
if scale_factor is not None:
80-
y = y / scale_factor
81-
if valid_max is not None:
82-
y[y > valid_max] = valid_max
83-
if valid_min is not None:
84-
y[y < valid_min] = valid_min
85-
if fill_value is not None:
86-
y[np.isnan(x)] = fill_value
87-
return y
88-
89-
compute_block = encode
90-
91-
@property
92-
@override
93-
def name(self) -> str:
94-
return "encode"
95-
96-
9717
class Decode(BlockAlgorithm):
9818
"""
9919
The algorithm to decode data according to CF conventions.
10020
"""
101-
10221
def __init__(self, dtype: np.dtype, m: int):
10322
"""
10423
Creates a new algorithm instance.

kaleidoscope/config/config.random.json

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,11 @@
160160
"glorys": {
161161
"so": {
162162
"uncertainty": 0.1,
163-
"distribution": "normal"
163+
"distribution": "normal",
164+
"clip": [
165+
0.0,
166+
41.73711352050302
167+
]
164168
}
165169
}
166170
}

kaleidoscope/config/config.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
# Copyright (c) Brockmann Consult GmbH, 2025
22
# License: MIT
33

4-
## No default product type.
5-
product_type:
4+
## No default source type.
5+
source_type:
66

77
## The default selector.
88
selector: 0

kaleidoscope/operators/randomizeop.py

Lines changed: 21 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
from xarray import Dataset
1717

1818
from ..algorithms.codec import Decode
19-
from ..algorithms.codec import Encode
2019
from ..algorithms.randomize import Randomize
2120
from ..generators import DefaultGenerator
2221
from ..interface.logging import Logging
@@ -37,26 +36,13 @@ def _hash(name: str) -> int:
3736

3837

3938
def _decode(
40-
x: da.Array, a: dict[str:Any], dtype: np.dtype = np.single
39+
x: da.Array, a: dict[str:Any], dtype: np.dtype = np.double
4140
) -> da.Array:
4241
f = Decode(dtype, x.ndim)
4342
y = f.apply_to(
4443
x,
4544
add_offset=a.get("add_offset", None),
46-
scale_factor=a.get("add_offset", None),
47-
fill_value=a.get("_FillValue", None),
48-
valid_min=a.get("valid_min", None),
49-
valid_max=a.get("valid_max", None),
50-
)
51-
return y
52-
53-
54-
def _encode(x: da.Array, a: dict[str:Any], dtype: np.dtype) -> da.Array:
55-
f = Encode(dtype, x.ndim)
56-
y = f.apply_to(
57-
x,
58-
add_offset=a.get("add_offset", None),
59-
scale_factor=a.get("add_offset", None),
45+
scale_factor=a.get("scale_factor", None),
6046
fill_value=a.get("_FillValue", None),
6147
valid_min=a.get("valid_min", None),
6248
valid_max=a.get("valid_max", None),
@@ -95,20 +81,18 @@ def run(self, source: Dataset) -> Dataset: # noqa: D102
9581
"tracking_id",
9682
source.attrs.get("uuid", self._args.source_file.stem),
9783
)
98-
target = Dataset(
84+
target: Dataset = Dataset(
9985
data_vars=source.data_vars,
10086
coords=source.coords,
10187
attrs=source.attrs,
10288
)
10389
config: dict[str : dict[str:Any]] = self.config.get(
104-
self._args.product_type, {}
90+
self._args.source_type, {}
10591
)
10692
for v, x in target.data_vars.items():
107-
if v not in config:
93+
if v not in config or self._args.selector == 0:
10894
continue
109-
11095
get_logger().info(f"starting graph for variable: {v}")
111-
11296
a: dict[str:Any] = config[v]
11397
f = Randomize(
11498
m=x.ndim,
@@ -144,31 +128,24 @@ def run(self, source: Dataset) -> Dataset: # noqa: D102
144128
_decode(b.data, b.attrs),
145129
clip=a.get("clip", None),
146130
)
147-
131+
if get_logger().is_enabled(Logging.DEBUG):
132+
get_logger().debug(f"min: {da.nanmin(z).compute() :.3f}")
133+
get_logger().debug(f"max: {da.nanmax(z).compute() :.3f}")
134+
get_logger().debug(f"mean: {da.nanmean(z).compute() :.3f}")
135+
get_logger().debug(f"std: {da.nanstd(z).compute() :.3f}")
148136
target[v] = DataArray(
149-
data=_encode(
150-
z,
151-
x.attrs,
152-
x.dtype,
153-
),
154-
coords=x.coords,
155-
dims=x.dims,
156-
attrs=x.attrs,
137+
data=z, coords=x.coords, dims=x.dims, attrs=x.attrs
138+
)
139+
# target[v].attrs.pop("valid_min", None)
140+
# target[v].attrs.pop("valid_max", None)
141+
target[v].attrs["dtype"] = x.dtype
142+
target[v].attrs["actual_range"] = np.array(
143+
[
144+
da.nanmin(z).compute(),
145+
da.nanmax(z).compute(),
146+
],
147+
dtype=z.dtype,
157148
)
158-
if "actual_range" in target[v].attrs:
159-
target[v].attrs["actual_range"] = np.array(
160-
[
161-
da.nanmin(z).compute(),
162-
da.nanmax(z).compute(),
163-
],
164-
dtype=x.dtype,
165-
)
166-
167-
if get_logger().is_enabled(Logging.DEBUG):
168-
get_logger().debug(f"min: {da.nanmin(z).compute() :.6f}")
169-
get_logger().debug(f"max: {da.nanmax(z).compute() :.6f}")
170-
get_logger().debug(f"mean: {da.nanmean(z).compute() :.6f}")
171-
get_logger().debug(f"std: {da.nanstd(z).compute() :.6f}")
172149
get_logger().info(f"finished graph for variable: {v}")
173150
return target
174151

0 commit comments

Comments
 (0)