44"""
55This module provides the algorithm to randomize data.
66"""
7+ from typing import Any
8+ from typing import Literal
79
810import dask .array as da
911import numpy as np
12+ from numpy .random import SeedSequence
1013from typing_extensions import override
1114
12- from ..interface .algorithm import BlockAlgorithm
15+ from ..generators import DefaultNormal
16+ from ..interface .algorithm import InformedBlockAlgorithm
17+ from ..interface .generating import Normal
1318
1419
15- class Randomize (BlockAlgorithm ):
20+ def _block_seed (
21+ block_id : tuple [int , ...], root_seed : np .ndarray
22+ ) -> np .ndarray :
23+ """Returns a random seed array for a given block."""
24+ work_seed = SeedSequence (_hash (block_id )).generate_state (1 )
25+ return np .array ([i for i in work_seed ] + [i for i in root_seed ])
26+
27+
28+ def _hash (block_id : tuple [int , ...]) -> int :
29+ """Returns a positive hash value."""
30+ h = 1
31+ for i in block_id :
32+ h = 31 * h + i
33+ return h
34+
35+
36+ def _chlorophyll (
37+ seed : np .ndarray , x : np .ndarray , u : np .ndarray
38+ ) -> np .ndarray :
39+ """
40+ Returns randomized values for ESA CCI ocean colour chlorophyll.
41+
42+ Uses ESA CCI OC PUG (Equation 2.10).
43+ """
44+ return _lognormal (
45+ seed , x , x * np .sqrt (np .exp (np .square (np .log (10.0 ) * u )) - 1.0 )
46+ )
47+
48+
49+ def _lognormal (seed : np .ndarray , x : np .ndarray , u : np .ndarray ) -> np .ndarray :
50+ """Returns randomized values for log-normally distributed errors."""
51+ v = np .log (1.0 + np .square (u / x ))
52+ m = np .log (x ) - 0.5 * v
53+ return np .exp (_normal (seed , m , np .sqrt (v )))
54+
55+
56+ def _normal (seed : np .ndarray , x : np .ndarray , u : np .ndarray ) -> np .ndarray :
57+ """Returns randomized values for normally distributed errors."""
58+ z : Normal = DefaultNormal (seed )
59+ return x + u * z .randoms (np .empty (x .shape , x .dtype ))
60+
61+
62+ class Randomize (InformedBlockAlgorithm ):
1663 """
1764 The algorithm to randomize data.
1865 """
1966
67+ _dist : Literal ["normal" , "lognormal" , "chlorophyll" ] | str
68+ """The type of measurement error distribution."""
69+
70+ _root_seed : np .ndarray
71+ """The root seed."""
72+
73+ def __init__ (
74+ self ,
75+ dtype : np .dtype ,
76+ m : int ,
77+ dist : Literal ["normal" , "lognormal" , "chlorophyll" ] | str = "normal" ,
78+ entropy : int | list [int ] | None = None ,
79+ ):
80+ """
81+ Creates a new algorithm instance.
82+
83+ :param dtype: The result data type.
84+ :param m: The number of input array dimensions.
85+ :param dist: The type of measurement error distribution.
86+ :param entropy: The entropy to create the seed sequence.
87+ """
88+ super ().__init__ (dtype , m , m )
89+ self ._dist = dist
90+ self ._root_seed = SeedSequence (entropy ).generate_state (8 )
91+
2092 @override
2193 def chunks (self , * inputs : da .Array ) -> tuple [int , ...] | None :
2294 return None
@@ -31,31 +103,46 @@ def created_axes(self) -> list[int] | None:
31103 def dropped_axes (self ) -> list [int ]:
32104 return []
33105
34- # noinspection PyMethodMayBeStatic
35106 def randomize (
36- self , data : np .ndarray , * , test : bool = False
107+ self ,
108+ * data : np .ndarray ,
109+ coverage_factor : Any = 1.0 ,
110+ relative : bool = False ,
111+ ** kwargs ,
37112 ) -> np .ndarray :
38113 """
39114 Randomizes data.
40115
41116 :param data: The data.
42- :param test: Run in test mode.
43- :return: The randomized data.
117+ :param coverage_factor: The uncertainty coverage factor.
118+ :param relative: Uncertainty is given in relative terms.
119+ :return: The measurement values randomized.
44120 """
45- return data if test else self . simulate ( data , data )
121+ seed = _block_seed ( kwargs [ "block_id" ], self . _root_seed )
46122
47- compute_block = randomize
123+ x = data [0 ]
124+ u = (
125+ data [1 ]
126+ if len (data ) == 2
127+ else np .sqrt (np .square (data [1 ]) - np .square (data [2 ]))
128+ )
129+ if coverage_factor != 1.0 :
130+ u = u / coverage_factor
131+ if relative :
132+ u = u * x
48133
49- # noinspection PyMethodMayBeStatic
50- def simulate (self , x : np .ndarray , u : np .ndarray ):
51- """
52- Simulates measurement errors.
134+ match self ._dist :
135+ case "normal" :
136+ y = _normal (seed , x , u )
137+ case "lognormal" :
138+ y = _lognormal (seed , x , u )
139+ case "chlorophyll" :
140+ y = _chlorophyll (seed , x , u )
141+ case _:
142+ y = x
143+ return y
53144
54- :param x: The measurement.
55- :param u: The measurement uncertainty.
56- :return: The simulated measurements.
57- """
58- return x
145+ compute_block = randomize
59146
60147 @property
61148 @override
0 commit comments