Skip to content

Commit 948bfc2

Browse files
add optinal replacement parameter to sample year sets
1 parent c3b5376 commit 948bfc2

File tree

1 file changed

+53
-32
lines changed

1 file changed

+53
-32
lines changed

climada/util/yearsets.py

Lines changed: 53 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -178,7 +178,7 @@ def sample_from_poisson(n_sampled_years, lam, seed=None):
178178
return np.round(np.random.poisson(lam=lam, size=n_sampled_years)).astype("int")
179179

180180

181-
def sample_events(events_per_year, freqs_orig, seed=None):
181+
def sample_events(events_per_year, freqs_orig, with_replacement=False, seed=None):
182182
"""Sample events uniformely from an array (indices_orig) without replacement
183183
(if sum(events_per_year) > n_input_events the input events are repeated
184184
(tot_n_events/n_input_events) times, by ensuring that the same events doens't
@@ -190,6 +190,10 @@ def sample_events(events_per_year, freqs_orig, seed=None):
190190
Number of events per sampled year
191191
freqs_orig : np.ndarray
192192
Frequency of each input event
193+
with_replacement : bool, optional
194+
If False and all frequencies of freqs_orig are constant, events are sampled
195+
without replacement. Otherwise, events are sampled with replacement.
196+
Defaults to False.
193197
seed : Any, optional
194198
seed for the default bit generator.
195199
Default: None
@@ -203,47 +207,64 @@ def sample_events(events_per_year, freqs_orig, seed=None):
203207
"""
204208

205209
sampling_vect = []
206-
207210
indices_orig = np.arange(len(freqs_orig))
208211

209-
freqs = freqs_orig
210-
indices = indices_orig
212+
# this is the previous way of sampling
213+
# (without replacement, works well if frequencies are constant)
214+
if np.unique(freqs_orig).size == 1 and not with_replacement:
211215

212-
# sample events for each sampled year
213-
for amount_events in events_per_year:
214-
# if there are not enough input events, choice with no replace will fail
215-
if amount_events > len(freqs_orig):
216-
raise ValueError(
217-
f"cannot sample {amount_events} distinct events for a single year"
218-
f" when there are only {len(freqs_orig)} input events"
219-
)
216+
indices = indices_orig
217+
rng = default_rng(seed)
220218

221-
# add the original indices and frequencies to the pool if there are less events
222-
# in the pool than needed to fill the year one is sampling for
223-
# or if the pool is empty (not covered in case amount_events is 0)
224-
if len(np.unique(indices)) < amount_events or len(indices) == 0:
225-
indices = np.append(indices, indices_orig)
226-
freqs = np.append(freqs, freqs_orig)
219+
# sample events for each sampled year
220+
for amount_events in events_per_year:
221+
# if there are not enough input events, choice with no replace will fail
222+
if amount_events > len(freqs_orig):
223+
raise ValueError(
224+
f"cannot sample {amount_events} distinct events for a single year"
225+
f" when there are only {len(freqs_orig)} input events. Set "
226+
"with_replacement=True if you want to sample with replacmeent."
227+
)
228+
229+
# if not enough events remaining, use original events
230+
if len(indices) < amount_events or len(indices) == 0:
231+
indices = indices_orig
232+
233+
# sample events
234+
selected_events = rng.choice(
235+
indices, size=amount_events, replace=False
236+
).astype("int")
237+
238+
# determine used events to remove them from sampling pool
239+
idx_to_remove = [
240+
np.where(indices == event)[0][0] for event in selected_events
241+
]
242+
indices = np.delete(indices, idx_to_remove)
243+
244+
# save sampled events in sampling vector
245+
sampling_vect.append(selected_events)
227246

228-
# ensure that each event only occurs once per sampled year
229-
unique_events = np.unique(indices, return_index=True)[0]
230-
probab_dis = freqs[np.unique(indices, return_index=True)[1]] / (
231-
np.sum(freqs[np.unique(indices, return_index=True)[1]])
232-
)
247+
else:
248+
# easier method if we allow for replacement sample with replacement
249+
if with_replacement is False:
250+
LOGGER.warning(
251+
"Sampling without replacement not implemented for events with varying "
252+
"frequencies. Events are sampled with replacement."
253+
)
233254

234-
# sample events
255+
probab_dis = freqs_orig / sum(freqs_orig)
235256
rng = default_rng(seed)
257+
258+
# sample events for each sampled year
236259
selected_events = rng.choice(
237-
unique_events, size=amount_events, replace=False, p=probab_dis
260+
indices_orig, size=sum(events_per_year), replace=True, p=probab_dis
238261
).astype("int")
239262

240-
# determine used events to remove them from sampling pool
241-
idx_to_remove = [np.where(indices == event)[0][0] for event in selected_events]
242-
indices = np.delete(indices, idx_to_remove)
243-
freqs = np.delete(freqs, idx_to_remove)
244-
245-
# save sampled events in sampling vector
246-
sampling_vect.append(selected_events)
263+
# group to list of lists
264+
index = 0
265+
for amount_events in events_per_year:
266+
sampling_vect.append(selected_events[index : index + amount_events])
267+
index += amount_events
247268

248269
return sampling_vect
249270

0 commit comments

Comments
 (0)