@@ -178,7 +178,7 @@ def sample_from_poisson(n_sampled_years, lam, seed=None):
178178 return np .round (np .random .poisson (lam = lam , size = n_sampled_years )).astype ("int" )
179179
180180
181- def sample_events (events_per_year , freqs_orig , seed = None ):
181+ def sample_events (events_per_year , freqs_orig , with_replacement = False , seed = None ):
182182 """Sample events uniformely from an array (indices_orig) without replacement
183183 (if sum(events_per_year) > n_input_events the input events are repeated
184184 (tot_n_events/n_input_events) times, by ensuring that the same events doens't
@@ -190,6 +190,10 @@ def sample_events(events_per_year, freqs_orig, seed=None):
190190 Number of events per sampled year
191191 freqs_orig : np.ndarray
192192 Frequency of each input event
193+ with_replacement : bool, optional
194+ If False and all frequencies of freqs_orig are constant, events are sampled
195+ without replacement. Otherwise, events are sampled with replacement.
196+ Defaults to False.
193197 seed : Any, optional
194198 seed for the default bit generator.
195199 Default: None
@@ -203,47 +207,64 @@ def sample_events(events_per_year, freqs_orig, seed=None):
203207 """
204208
205209 sampling_vect = []
206-
207210 indices_orig = np .arange (len (freqs_orig ))
208211
209- freqs = freqs_orig
210- indices = indices_orig
212+ # this is the previous way of sampling
213+ # (without replacement, works well if frequencies are constant)
214+ if np .unique (freqs_orig ).size == 1 and not with_replacement :
211215
212- # sample events for each sampled year
213- for amount_events in events_per_year :
214- # if there are not enough input events, choice with no replace will fail
215- if amount_events > len (freqs_orig ):
216- raise ValueError (
217- f"cannot sample { amount_events } distinct events for a single year"
218- f" when there are only { len (freqs_orig )} input events"
219- )
216+ indices = indices_orig
217+ rng = default_rng (seed )
220218
221- # add the original indices and frequencies to the pool if there are less events
222- # in the pool than needed to fill the year one is sampling for
223- # or if the pool is empty (not covered in case amount_events is 0)
224- if len (np .unique (indices )) < amount_events or len (indices ) == 0 :
225- indices = np .append (indices , indices_orig )
226- freqs = np .append (freqs , freqs_orig )
219+ # sample events for each sampled year
220+ for amount_events in events_per_year :
221+ # if there are not enough input events, choice with no replace will fail
222+ if amount_events > len (freqs_orig ):
223+ raise ValueError (
224+ f"cannot sample { amount_events } distinct events for a single year"
225+ f" when there are only { len (freqs_orig )} input events. Set "
226+ "with_replacement=True if you want to sample with replacmeent."
227+ )
228+
229+ # if not enough events remaining, use original events
230+ if len (indices ) < amount_events or len (indices ) == 0 :
231+ indices = indices_orig
232+
233+ # sample events
234+ selected_events = rng .choice (
235+ indices , size = amount_events , replace = False
236+ ).astype ("int" )
237+
238+ # determine used events to remove them from sampling pool
239+ idx_to_remove = [
240+ np .where (indices == event )[0 ][0 ] for event in selected_events
241+ ]
242+ indices = np .delete (indices , idx_to_remove )
243+
244+ # save sampled events in sampling vector
245+ sampling_vect .append (selected_events )
227246
228- # ensure that each event only occurs once per sampled year
229- unique_events = np .unique (indices , return_index = True )[0 ]
230- probab_dis = freqs [np .unique (indices , return_index = True )[1 ]] / (
231- np .sum (freqs [np .unique (indices , return_index = True )[1 ]])
232- )
247+ else :
248+ # easier method if we allow for replacement sample with replacement
249+ if with_replacement is False :
250+ LOGGER .warning (
251+ "Sampling without replacement not implemented for events with varying "
252+ "frequencies. Events are sampled with replacement."
253+ )
233254
234- # sample events
255+ probab_dis = freqs_orig / sum ( freqs_orig )
235256 rng = default_rng (seed )
257+
258+ # sample events for each sampled year
236259 selected_events = rng .choice (
237- unique_events , size = amount_events , replace = False , p = probab_dis
260+ indices_orig , size = sum ( events_per_year ) , replace = True , p = probab_dis
238261 ).astype ("int" )
239262
240- # determine used events to remove them from sampling pool
241- idx_to_remove = [np .where (indices == event )[0 ][0 ] for event in selected_events ]
242- indices = np .delete (indices , idx_to_remove )
243- freqs = np .delete (freqs , idx_to_remove )
244-
245- # save sampled events in sampling vector
246- sampling_vect .append (selected_events )
263+ # group to list of lists
264+ index = 0
265+ for amount_events in events_per_year :
266+ sampling_vect .append (selected_events [index : index + amount_events ])
267+ index += amount_events
247268
248269 return sampling_vect
249270
0 commit comments