Skip to content

Commit 50905d7

Browse files
committed
Update: randomization of ESA scope exchange
1 parent e2790f9 commit 50905d7

File tree

3 files changed

+56
-29
lines changed

3 files changed

+56
-29
lines changed

README.md

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,9 @@ processor. The processor is invoked from the command line. Typing
2121
will print a detailed usage message to the screen
2222

2323
usage: kaleidoscope [-h] [--chunk-size-lat CHUNK_SIZE_LAT]
24-
[--chunk-size-lon CHUNK_SIZE_LON] [--selector SELECTOR]
24+
[--chunk-size-lon CHUNK_SIZE_LON]
25+
[--product-type {esa-cci-oc,esa-scope-exchange,ghrsst,glorys}]
26+
[--selector SELECTOR]
2527
[--engine-reader {h5netcdf,netcdf4,zarr}]
2628
[--engine-writer {h5netcdf,netcdf4,zarr}]
2729
[--log-level {debug,info,warning,error,off}]
@@ -51,7 +53,10 @@ will print a detailed usage message to the screen
5153
value of `-1` refers to full longitudinal chunk size
5254
and a value of `0` refers to the chunk size used in
5355
the source file. (default: None)
54-
--selector SELECTOR the Monte Carlo stream selector. (default: None)
56+
--product-type {esa-cci-oc,esa-scope-exchange,ghrsst,glorys}
57+
the product type. (default: None)
58+
--selector SELECTOR the Monte Carlo stream selector. An integral number
59+
whichmust not be negative. (default: None)
5560
--engine-reader {h5netcdf,netcdf4,zarr}
5661
specify the engine used to read the source product
5762
file. (default: None)
@@ -76,9 +81,8 @@ will print a detailed usage message to the screen
7681
--tmpdir TMPDIR specify the path to the temporary directory.
7782
(default: None)
7883
-v, --version show program's version number and exit
79-
80-
Copyright (c) Brockmann Consult GmbH, 2025. License: MIT
8184

85+
Copyright (c) Brockmann Consult GmbH, 2025. License: MIT
8286
### Normal operations
8387

8488
TBD.

kaleidoscope/algorithms/randomize.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -106,16 +106,18 @@ def dropped_axes(self) -> list[int]:
106106
def randomize(
107107
self,
108108
*data: np.ndarray,
109-
coverage_factor: Any = 1.0,
109+
coverage: Any = 1.0,
110110
relative: bool = False,
111+
clip: tuple[Any, Any] | None = None,
111112
**kwargs,
112113
) -> np.ndarray:
113114
"""
114115
Randomizes data.
115116
116117
:param data: The data.
117-
:param coverage_factor: The uncertainty coverage factor.
118+
:param coverage: The uncertainty coverage factor.
118119
:param relative: Uncertainty is given in relative terms.
120+
:param clip: Where to clip measurement errors.
119121
:return: The measurement values randomized.
120122
"""
121123
seed = _block_seed(kwargs["block_id"], self._root_seed)
@@ -126,11 +128,10 @@ def randomize(
126128
if len(data) == 2
127129
else np.sqrt(np.square(data[1]) - np.square(data[2]))
128130
)
129-
if coverage_factor != 1.0:
130-
u = u / coverage_factor
131+
if coverage != 1.0:
132+
u = u / coverage
131133
if relative:
132134
u = u * x
133-
134135
match self._dist:
135136
case "normal":
136137
y = _normal(seed, x, u)
@@ -140,6 +141,8 @@ def randomize(
140141
y = _chlorophyll(seed, x, u)
141142
case _:
142143
y = x
144+
if clip is not None:
145+
y = np.clip(y, a_min=clip[0], a_max=clip[1])
143146
return y
144147

145148
compute_block = randomize

kaleidoscope/operators/randomizeop.py

Lines changed: 40 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ def run(self, source: Dataset) -> Dataset: # noqa: D102
6464
for v, x in target.data_vars.items():
6565
if v not in config:
6666
continue
67+
get_logger().info(f"starting graph for variable: {v}")
6768
attrs: dict[str:Any] = config[v]
6869
f = Randomize(
6970
x.dtype,
@@ -87,8 +88,9 @@ def run(self, source: Dataset) -> Dataset: # noqa: D102
8788
data=f.apply_to(
8889
x.data,
8990
u.data,
90-
coverage_factor=attrs.get("coverage_factor", 1.0),
91+
coverage=attrs.get("coverage", 1.0),
9192
relative=attrs.get("relative", False),
93+
clip=attrs.get("clip", None),
9294
),
9395
coords=x.coords,
9496
dims=x.dims,
@@ -99,37 +101,51 @@ def run(self, source: Dataset) -> Dataset: # noqa: D102
99101
b = target[attrs["bias"]]
100102
r = target[attrs["rmsd"]]
101103
target[v] = DataArray(
102-
data=f.apply_to(x.data, r.data, b.data),
104+
data=f.apply_to(
105+
x.data,
106+
r.data,
107+
b.data,
108+
clip=attrs.get("clip", [None, None]),
109+
),
103110
coords=x.coords,
104111
dims=x.dims,
105112
name=x.name,
106113
attrs=x.attrs,
107114
)
108115
if get_logger().is_enabled(Logging.DEBUG):
109116
get_logger().debug(
110-
f"source[{v}] min: {source[v].min().values.item() :.6f}"
117+
f"source[{v}] min: "
118+
f"{source[v].quantile(0.0001).values.item() :.6f}"
111119
)
112120
get_logger().debug(
113-
f"target[{v}] min: {target[v].min().values.item() :.6f}"
121+
f"target[{v}] min: "
122+
f"{target[v].quantile(0.0001).values.item() :.6f}"
114123
)
115124
get_logger().debug(
116-
f"source[{v}] max: {source[v].max().values.item() :.6f}"
125+
f"source[{v}] max: "
126+
f"{source[v].quantile(0.9999).values.item() :.6f}"
117127
)
118128
get_logger().debug(
119-
f"target[{v}] max: {target[v].max().values.item() :.6f}"
129+
f"target[{v}] max: "
130+
f"{target[v].quantile(0.9999).values.item() :.6f}"
120131
)
121132
get_logger().debug(
122-
f"source[{v}] mean: {source[v].mean().values.item() :.6f}"
133+
f"source[{v}] mean: "
134+
f"{source[v].mean().values.item() :.6f}"
123135
)
124136
get_logger().debug(
125-
f"target[{v}] mean: {target[v].mean().values.item() :.6f}"
137+
f"target[{v}] mean: "
138+
f"{target[v].mean().values.item() :.6f}"
126139
)
127140
get_logger().debug(
128-
f"source[{v}] std: {source[v].std().values.item() :.6f}"
141+
f"source[{v}] std: "
142+
f"{source[v].std().values.item() :.6f}"
129143
)
130144
get_logger().debug(
131-
f"target[{v}] std: {target[v].std().values.item() :.6f}"
145+
f"target[{v}] std: "
146+
f"{target[v].std().values.item() :.6f}"
132147
)
148+
get_logger().info(f"finished graph for variable: {v}")
133149
return target
134150

135151
@property
@@ -168,7 +184,7 @@ def config(self) -> dict[str : dict[str:Any]]:
168184
"Rrs_665": {
169185
"bias": "Rrs_665_bias",
170186
"rmsd": "Rrs_665_rmsd",
171-
"distribution": "lognormal",
187+
"distribution": "normal",
172188
},
173189
"adg_412": {
174190
"bias": "adg_412_bias",
@@ -245,35 +261,39 @@ def config(self) -> dict[str : dict[str:Any]]:
245261
"fco2": {
246262
"uncertainty": "fco2_tot_unc",
247263
# the uncertainty interval coverage factor
248-
"coverage_factor": 2.0,
264+
"coverage": 2.0,
249265
"distribution": "lognormal",
250266
},
251267
"flux": {
252268
"uncertainty": "flux_unc",
253269
# uncertainty is stated in relative terms
254270
"relative": True,
255-
"coverage_factor": 2.0,
271+
"coverage": 2.0,
256272
"distribution": "normal",
257273
},
258274
"ta": {
259275
"uncertainty": "ta_tot_unc",
260-
"coverage_factor": 2.0,
276+
"coverage": 2.0,
261277
"distribution": "lognormal",
278+
# clip to interval
279+
"clip": (None, 3400.0),
262280
},
263281
"dic": {
264282
"uncertainty": "dic_tot_unc",
265-
"coverage_factor": 2.0,
283+
"coverage": 2.0,
266284
"distribution": "lognormal",
285+
"clip": (None, 3200.0),
267286
},
268-
"ph": {
269-
"uncertainty": "ph_tot_unc",
270-
"coverage_factor": 2.0,
271-
"distribution": "lognormal",
287+
"pH": {
288+
"uncertainty": "pH_tot_unc",
289+
"coverage": 2.0,
290+
"distribution": "normal",
272291
},
273292
"saturation_aragonite": {
274293
"uncertainty": "saturation_aragonite_tot_unc",
275-
"coverage_factor": 2.0,
294+
"coverage": 2.0,
276295
"distribution": "lognormal",
296+
"clip": (None, 6.0),
277297
},
278298
},
279299
"ghrsst": {

0 commit comments

Comments
 (0)