2424from ..logger import get_logger
2525
2626
27- def _hash (name : str ) -> int :
28- """
29- Daniel J. Bernstein hash function.
30-
31- Returns a positive hash value.
32- """
33- h = 5381
34- for c in name :
35- h = ((h << 5 ) + h ) + ord (c )
36- return h
37-
38-
3927def _decode (x : da .Array , a : dict [str :Any ]) -> da .Array :
4028 """Returns decoded data."""
4129 f = Decode (np .single if x .dtype == np .single else np .double , x .ndim )
@@ -101,7 +89,7 @@ def run(self, source: Dataset) -> Dataset: # noqa: D102
10189 if v not in config or self ._args .selector == 0 :
10290 continue
10391 get_logger ().info (f"starting graph for variable: { v } " )
104- self .randomize (source , target , v , x , config [v ])
92+ self .randomize (target , v , x , config [v ])
10593 get_logger ().info (f"finished graph for variable: { v } " )
10694 return target
10795
@@ -116,28 +104,8 @@ def config(self) -> dict[str : dict[str:Any]]:
116104 config = json .load (r )
117105 return config
118106
119- # noinspection PyShadowingNames
120- def entropy (self , name : str , uuid : str , n : int = 4 ) -> list [int ]:
121- """
122- Returns the entropy of the seed sequence used for a given variable.
123-
124- Entropy is generated using the Philox bit generator, which produces
125- truly independent sequences for different values of the seed.
126-
127- :param name: The variable name.
128- :param uuid: The dataset UUID.
129- :param n: The length of the seed sequence.
130- :return: The entropy.
131- """
132- from numpy .random import Philox
133-
134- seed = _hash (f"{ name } -{ uuid } " ) + self ._args .selector
135- g = DefaultGenerator (Philox (seed ))
136- return [g .next () for _ in range (n )]
137-
138107 def randomize (
139108 self ,
140- source : Dataset ,
141109 target : Dataset ,
142110 v : str ,
143111 x : DataArray ,
@@ -146,22 +114,14 @@ def randomize(
146114 """
147115 Creates the graph to randomize a variable.
148116
149- :param source: The source dataset.
150117 :param target: The target dataset.
151118 :param v: The name of the variable.
152119 :param x: The data of the variable.
153120 :param config: The randomization configuration.
154121 """
155- if "total" in config :
156- s : list [int ] = []
157- z = _decode (x .data , x .attrs )
158- for ref in config ["total" ]:
159- a = _decode (target [ref ].data , target [ref ].attrs )
160- b = _decode (source [ref ].data , source [ref ].attrs )
161- z = z + (a - b )
162- elif "uncertainty" in config :
163- s : list [int ] = self .entropy (v , self .uuid )
164- f = Randomize (m = x .ndim , dist = config ["distribution" ], entropy = s )
122+ if "uncertainty" in config :
123+ s = self .seed (self .uuid (v ))
124+ f = Randomize (m = x .ndim , dist = config ["distribution" ], seed = s )
165125 u = (
166126 target [config ["uncertainty" ]]
167127 if isinstance (config ["uncertainty" ], str )
@@ -182,8 +142,8 @@ def randomize(
182142 clip = config .get ("clip" , None ),
183143 )
184144 else :
185- s : list [ int ] = self .entropy ( v , self .uuid )
186- f = Randomize (m = x .ndim , dist = config ["distribution" ], entropy = s )
145+ s = self .seed ( self .uuid ( v ) )
146+ f = Randomize (m = x .ndim , dist = config ["distribution" ], seed = s )
187147 b = target [config ["bias" ]]
188148 r = target [config ["rmsd" ]]
189149 z = f .apply_to (
@@ -206,20 +166,38 @@ def randomize(
206166 ],
207167 dtype = z .dtype ,
208168 )
209- if s :
210- target [v ].attrs ["entropy" ] = np .array (s , dtype = np .int64 )
169+ target [v ].attrs ["seed" ] = s
211170 if get_logger ().is_enabled (Logging .DEBUG ):
212- get_logger ().debug (f"entropy : { s } " )
171+ get_logger ().debug (f"seed : { s } " )
213172 get_logger ().debug (f"min: { da .nanmin (z ).compute () :.3f} " )
214173 get_logger ().debug (f"max: { da .nanmax (z ).compute () :.3f} " )
215174 get_logger ().debug (f"mean: { da .nanmean (z ).compute () :.3f} " )
216175 get_logger ().debug (f"std: { da .nanstd (z ).compute () :.3f} " )
217176
218- @property
219- def uuid (self ) -> str :
177+ # noinspection PyShadowingNames
178+ def seed (self , uuid : uuid .UUID , n : int = 4 ) -> np .ndarray :
179+ """
180+ Returns the seed sequence used for a given variable.
181+
182+ The seed sequence is generated using the Philox bit generator,
183+ which produces truly independent sequences of random numbers for
184+ different values of the seed.
185+
186+ :param uuid: The variable and dataset UUID.
187+ :param n: The length of the seed sequence.
188+ :return: The seed sequence.
189+ """
190+ from numpy .random import Philox
191+
192+ seed = uuid .int + self ._args .selector
193+ g = DefaultGenerator (Philox (seed ))
194+ return np .array ([g .next () for _ in range (n )], dtype = np .int64 )
195+
196+ def uuid (self , v : str ) -> uuid .UUID :
220197 """
221- Returns a UUID constructed from the basename of the source file.
198+ Returns a UUID constructed from the variable name and the
199+ basename of the source file.
222200 """
223- return (
224- f" { uuid .uuid5 ( uuid . NAMESPACE_URL , self ._args .source_file .stem ) } "
201+ return uuid . uuid5 (
202+ uuid .NAMESPACE_URL , f" { v } - { self ._args .source_file .stem } "
225203 )
0 commit comments