Skip to content

Commit e2790f9

Browse files
committed
Update: randomization
1 parent be725e6 commit e2790f9

File tree

12 files changed

+451
-158
lines changed

12 files changed

+451
-158
lines changed

README.md

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,17 +21,17 @@ processor. The processor is invoked from the command line. Typing
2121
will print a detailed usage message to the screen
2222

2323
usage: kaleidoscope [-h] [--chunk-size-lat CHUNK_SIZE_LAT]
24-
[--chunk-size-lon CHUNK_SIZE_LON]
24+
[--chunk-size-lon CHUNK_SIZE_LON] [--selector SELECTOR]
2525
[--engine-reader {h5netcdf,netcdf4,zarr}]
2626
[--engine-writer {h5netcdf,netcdf4,zarr}]
2727
[--log-level {debug,info,warning,error,off}]
2828
[--mode {multithreading,synchronous}]
2929
[--workers {1,2,3,4,5,6,7,8}] [--progress]
3030
[--no-progress] [--stack-traces] [--no-stack-traces]
31-
[--test] [--no-test] [--tmpdir TMPDIR] [-v]
31+
[--tmpdir TMPDIR] [-v]
3232
source_file target_file
3333

34-
This scientific processor simulates measurement errors.
34+
This scientific processor simulates measurementerrors.
3535

3636
positional arguments:
3737
source_file the file path of the source dataset.
@@ -51,6 +51,7 @@ will print a detailed usage message to the screen
5151
value of `-1` refers to full longitudinal chunk size
5252
and a value of `0` refers to the chunk size used in
5353
the source file. (default: None)
54+
--selector SELECTOR the Monte Carlo stream selector. (default: None)
5455
--engine-reader {h5netcdf,netcdf4,zarr}
5556
specify the engine used to read the source product
5657
file. (default: None)
@@ -72,8 +73,6 @@ will print a detailed usage message to the screen
7273
--no-progress disable progress bar display. (default: True)
7374
--stack-traces enable Python stack traces. (default: False)
7475
--no-stack-traces disable Python stack traces. (default: True)
75-
--test enable test mode. (default: False)
76-
--no-test disable test mode. (default: True)
7776
--tmpdir TMPDIR specify the path to the temporary directory.
7877
(default: None)
7978
-v, --version show program's version number and exit

kaleidoscope/algorithms/randomize.py

Lines changed: 104 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -4,19 +4,91 @@
44
"""
55
This module provides the algorithm to randomize data.
66
"""
7+
from typing import Any
8+
from typing import Literal
79

810
import dask.array as da
911
import numpy as np
12+
from numpy.random import SeedSequence
1013
from typing_extensions import override
1114

12-
from ..interface.algorithm import BlockAlgorithm
15+
from ..generators import DefaultNormal
16+
from ..interface.algorithm import InformedBlockAlgorithm
17+
from ..interface.generating import Normal
1318

1419

15-
class Randomize(BlockAlgorithm):
20+
def _block_seed(
21+
block_id: tuple[int, ...], root_seed: np.ndarray
22+
) -> np.ndarray:
23+
"""Returns a random seed array for a given block."""
24+
work_seed = SeedSequence(_hash(block_id)).generate_state(1)
25+
return np.array([i for i in work_seed] + [i for i in root_seed])
26+
27+
28+
def _hash(block_id: tuple[int, ...]) -> int:
29+
"""Returns a positive hash value."""
30+
h = 1
31+
for i in block_id:
32+
h = 31 * h + i
33+
return h
34+
35+
36+
def _chlorophyll(
37+
seed: np.ndarray, x: np.ndarray, u: np.ndarray
38+
) -> np.ndarray:
39+
"""
40+
Returns randomized values for ESA CCI ocean colour chlorophyll.
41+
42+
Uses ESA CCI OC PUG (Equation 2.10).
43+
"""
44+
return _lognormal(
45+
seed, x, x * np.sqrt(np.exp(np.square(np.log(10.0) * u)) - 1.0)
46+
)
47+
48+
49+
def _lognormal(seed: np.ndarray, x: np.ndarray, u: np.ndarray) -> np.ndarray:
50+
"""Returns randomized values for log-normally distributed errors."""
51+
v = np.log(1.0 + np.square(u / x))
52+
m = np.log(x) - 0.5 * v
53+
return np.exp(_normal(seed, m, np.sqrt(v)))
54+
55+
56+
def _normal(seed: np.ndarray, x: np.ndarray, u: np.ndarray) -> np.ndarray:
57+
"""Returns randomized values for normally distributed errors."""
58+
z: Normal = DefaultNormal(seed)
59+
return x + u * z.randoms(np.empty(x.shape, x.dtype))
60+
61+
62+
class Randomize(InformedBlockAlgorithm):
1663
"""
1764
The algorithm to randomize data.
1865
"""
1966

67+
_dist: Literal["normal", "lognormal", "chlorophyll"] | str
68+
"""The type of measurement error distribution."""
69+
70+
_root_seed: np.ndarray
71+
"""The root seed."""
72+
73+
def __init__(
74+
self,
75+
dtype: np.dtype,
76+
m: int,
77+
dist: Literal["normal", "lognormal", "chlorophyll"] | str = "normal",
78+
entropy: int | list[int] | None = None,
79+
):
80+
"""
81+
Creates a new algorithm instance.
82+
83+
:param dtype: The result data type.
84+
:param m: The number of input array dimensions.
85+
:param dist: The type of measurement error distribution.
86+
:param entropy: The entropy to create the seed sequence.
87+
"""
88+
super().__init__(dtype, m, m)
89+
self._dist = dist
90+
self._root_seed = SeedSequence(entropy).generate_state(8)
91+
2092
@override
2193
def chunks(self, *inputs: da.Array) -> tuple[int, ...] | None:
2294
return None
@@ -31,31 +103,46 @@ def created_axes(self) -> list[int] | None:
31103
def dropped_axes(self) -> list[int]:
32104
return []
33105

34-
# noinspection PyMethodMayBeStatic
35106
def randomize(
36-
self, data: np.ndarray, *, test: bool = False
107+
self,
108+
*data: np.ndarray,
109+
coverage_factor: Any = 1.0,
110+
relative: bool = False,
111+
**kwargs,
37112
) -> np.ndarray:
38113
"""
39114
Randomizes data.
40115
41116
:param data: The data.
42-
:param test: Run in test mode.
43-
:return: The randomized data.
117+
:param coverage_factor: The uncertainty coverage factor.
118+
:param relative: Uncertainty is given in relative terms.
119+
:return: The measurement values randomized.
44120
"""
45-
return data if test else self.simulate(data, data)
121+
seed = _block_seed(kwargs["block_id"], self._root_seed)
46122

47-
compute_block = randomize
123+
x = data[0]
124+
u = (
125+
data[1]
126+
if len(data) == 2
127+
else np.sqrt(np.square(data[1]) - np.square(data[2]))
128+
)
129+
if coverage_factor != 1.0:
130+
u = u / coverage_factor
131+
if relative:
132+
u = u * x
48133

49-
# noinspection PyMethodMayBeStatic
50-
def simulate(self, x: np.ndarray, u: np.ndarray):
51-
"""
52-
Simulates measurement errors.
134+
match self._dist:
135+
case "normal":
136+
y = _normal(seed, x, u)
137+
case "lognormal":
138+
y = _lognormal(seed, x, u)
139+
case "chlorophyll":
140+
y = _chlorophyll(seed, x, u)
141+
case _:
142+
y = x
143+
return y
53144

54-
:param x: The measurement.
55-
:param u: The measurement uncertainty.
56-
:return: The simulated measurements.
57-
"""
58-
return x
145+
compute_block = randomize
59146

60147
@property
61148
@override

kaleidoscope/config/config.reader.json

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,19 @@
88
"config.reader.chunks": {
99
"time": 1,
1010
"lat": 720,
11-
"lon": 720
11+
"lon": 720,
12+
13+
"esa-cci-oc": {
14+
"time": 0,
15+
"lat": 2160,
16+
"lon": 2160
17+
},
18+
19+
"_": {
20+
"time": 0,
21+
"lat": 0,
22+
"lon": 0
23+
}
1224
},
1325

1426
"config.reader.engine": "h5netcdf",

kaleidoscope/config/config.writer.json

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,19 @@
88
"config.writer.chunks": {
99
"time": 0,
1010
"lat": 0,
11-
"lon": 0
11+
"lon": 0,
12+
13+
"esa-cci-oc": {
14+
"time": 0,
15+
"lat": 270,
16+
"lon": 270
17+
},
18+
19+
"_": {
20+
"time": 0,
21+
"lat": 0,
22+
"lon": 0
23+
}
1224
},
1325

1426
"config.writer.engine": "h5netcdf",

kaleidoscope/config/config.yml

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,12 @@ chunk_size_lat:
77
## The chunk size is determined by the reader
88
chunk_size_lon:
99

10+
## No default product type.
11+
product_type:
12+
13+
## The default selector.
14+
selector: 0
15+
1016
## Detect the reader engine automatically
1117
engine_reader:
1218

@@ -28,8 +34,5 @@ progress: False
2834
## Disable stack traces.
2935
stack_traces: False
3036

31-
## Disable test mode.
32-
test: False
33-
3437
## Use the working directory for temporary files.
3538
tmpdir: "."

kaleidoscope/generators.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ def __init__(self, seed: int | np.ndarray | BitGenerator | None = None):
5858
self._g = default_generator(seed)
5959

6060
@overrides
61-
def next_int64(self) -> int:
61+
def next(self) -> int:
6262
return self._g.integers(0x8000000000000000)
6363

6464

@@ -80,13 +80,13 @@ def get(self, i: int) -> Univariate:
8080
return self
8181

8282
@overrides
83-
def next_double(self) -> float:
83+
def random(self) -> float:
8484
return self._g.random()
8585

8686
@overrides
87-
def next_doubles(self, doubles: np.ndarray) -> np.ndarray:
88-
self._g.random(out=doubles)
89-
return doubles
87+
def randoms(self, randoms: np.ndarray) -> np.ndarray:
88+
self._g.standard_normal(dtype=randoms.dtype, out=randoms)
89+
return randoms
9090

9191

9292
class DefaultNormal(Normal):
@@ -107,10 +107,10 @@ def get(self, i: int) -> Univariate:
107107
return self
108108

109109
@overrides
110-
def next_double(self) -> float:
110+
def random(self) -> float:
111111
return self._g.standard_normal()
112112

113113
@overrides
114-
def next_doubles(self, doubles: np.ndarray) -> np.ndarray:
115-
self._g.standard_normal(out=doubles)
116-
return doubles
114+
def randoms(self, randoms: np.ndarray) -> np.ndarray:
115+
self._g.standard_normal(dtype=randoms.dtype, out=randoms)
116+
return randoms

0 commit comments

Comments
 (0)