Skip to content

Commit d0c1819

Browse files
changed default to sample with replacement and adapt tutorial
1 parent 596111c commit d0c1819

File tree

2 files changed

+41
-38
lines changed

2 files changed

+41
-38
lines changed

climada/util/yearsets.py

Lines changed: 24 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525

2626

2727
def impact_yearset(
28-
imp, sampled_years, lam=None, correction_fac=True, with_replacement=False, seed=None
28+
imp, sampled_years, lam=None, correction_fac=False, with_replacement=True, seed=None
2929
):
3030
"""Create a yearset of impacts (yimp) containing a probabilistic impact for each year
3131
in the sampled_years list by sampling events from the impact received as input with a
@@ -41,9 +41,9 @@ def impact_yearset(
4141
sampled_years : list
4242
A list of years that shall be covered by the resulting yimp.
4343
with_replacement : bool, optional
44-
If False and all frequencies of freqs_orig are constant, events are sampled
45-
without replacement. Otherwise, events are sampled with replacement.
46-
Defaults to False.
44+
If True, impact events are sampled with replacement. If False, events are sampled
45+
without replacement. Sampling without replacement can yield distorted samples if
46+
frequencies of different events are unqual. Defaults to True.
4747
seed : Any, optional
4848
seed for the default bit generator
4949
default: None
@@ -87,7 +87,7 @@ def impact_yearset(
8787

8888

8989
def impact_yearset_from_sampling_vect(
90-
imp, sampled_years, sampling_vect, correction_fac=True
90+
imp, sampled_years, sampling_vect, correction_fac=False
9191
):
9292
"""Create a yearset of impacts (yimp) containing a probabilistic impact for each year
9393
in the sampled_years list by sampling events from the impact received as input following
@@ -171,7 +171,7 @@ def sample_from_poisson(n_sampled_years, lam, seed=None):
171171
return np.round(np.random.poisson(lam=lam, size=n_sampled_years)).astype("int")
172172

173173

174-
def sample_events(events_per_year, freqs_orig, with_replacement=False, seed=None):
174+
def sample_events(events_per_year, freqs_orig, with_replacement=True, seed=None):
175175
"""Sample events uniformely from an array (indices_orig) without replacement
176176
(if sum(events_per_year) > n_input_events the input events are repeated
177177
(tot_n_events/n_input_events) times, by ensuring that the same events doens't
@@ -184,9 +184,9 @@ def sample_events(events_per_year, freqs_orig, with_replacement=False, seed=None
184184
freqs_orig : np.ndarray
185185
Frequency of each input event
186186
with_replacement : bool, optional
187-
If False and all frequencies of freqs_orig are constant, events are sampled
188-
without replacement. Otherwise, events are sampled with replacement.
189-
Defaults to False.
187+
If True, impact events are sampled with replacement. If False, events are sampled
188+
without replacement. Sampling without replacement can yield distorted samples if
189+
frequencies of different events are unqual. Defaults to True.
190190
seed : Any, optional
191191
seed for the default bit generator.
192192
Default: None
@@ -201,13 +201,20 @@ def sample_events(events_per_year, freqs_orig, with_replacement=False, seed=None
201201

202202
sampling_vect = []
203203
indices_orig = np.arange(len(freqs_orig))
204+
rng = default_rng(seed)
204205

205-
# this is the previous way of sampling
206-
# (without replacement, works well if frequencies are constant)
207-
if np.unique(freqs_orig).size == 1 and not with_replacement:
206+
# sample without replacement, works well if event frequencies are equal
207+
if with_replacement is False:
208+
# warn if frequencies of different events are not equal
209+
if np.unique(freqs_orig).size != 1:
210+
LOGGER.warning(
211+
"The frequencies of the different events are not equal. This can lead to "
212+
"distorted sampling if the frequencies vary significantly. To avoid this, "
213+
"please set with_replacement=True to sample with replacement instead."
214+
)
208215

209216
indices = indices_orig
210-
rng = default_rng(seed)
217+
freqs = freqs_orig
211218

212219
# sample events for each sampled year
213220
for amount_events in events_per_year:
@@ -222,31 +229,27 @@ def sample_events(events_per_year, freqs_orig, with_replacement=False, seed=None
222229
# if not enough events remaining, use original events
223230
if len(indices) < amount_events or len(indices) == 0:
224231
indices = indices_orig
232+
freqs = freqs_orig
225233

226234
# sample events
235+
probab_dis = freqs / sum(freqs)
227236
selected_events = rng.choice(
228-
indices, size=amount_events, replace=False
237+
indices, size=amount_events, replace=False, p=probab_dis
229238
).astype("int")
230239

231240
# determine used events to remove them from sampling pool
232241
idx_to_remove = [
233242
np.where(indices == event)[0][0] for event in selected_events
234243
]
235244
indices = np.delete(indices, idx_to_remove)
245+
freqs = np.delete(freqs, idx_to_remove)
236246

237247
# save sampled events in sampling vector
238248
sampling_vect.append(selected_events)
239249

240250
else:
241251
# easier method if we allow for replacement sample with replacement
242-
if with_replacement is False:
243-
LOGGER.warning(
244-
"Sampling without replacement not implemented for events with varying "
245-
"frequencies. Events are sampled with replacement."
246-
)
247-
248252
probab_dis = freqs_orig / sum(freqs_orig)
249-
rng = default_rng(seed)
250253

251254
# sample events for each sampled year
252255
selected_events = rng.choice(

doc/user-guide/climada_util_yearsets.ipynb

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,9 @@
1111
"The function `impact_yearset` performs all these computational steps, taking an `imp` and the list of sampled_years (`sampled_years`) as input. The output of the function is the `yimp` object and the `sampling_vect`.\n",
1212
"Moreover, a `sampling_vect` (generated in a previous run) can be provided as optional input and the user can custom-define the Poisson parameter `lam`. Reapplying the same sampling_vect does not only allow to reproduce the generated `yimp`, but also for a physically consistent way of sampling impacts caused by different hazards. \n",
1313
"\n",
14-
"*Sampling options.* Per default, impact events are sampled without replacement (if the original impact object contains enough events), such that the different yearly impacts stem from different events. This is only implemented if the event frequencies of the original impact object (`imp.frequency`) are constant. If the events differ in their frequency, they are sampled with replacement. This sampling behaviour can also be achieved by setting `with_replacement=True`. \n",
14+
"*Sampling options.* Per default, impact events are sampled with replacement. When setting `with_replacement=False`, the impact events are sampled without replacement (given that the original impact object contains enough events). Note that sampling without replacement can lead to distorted sampling if the frequencies of the different impacts (`imp.frequency`) are not equal.\n",
1515
"\n",
16-
"*Correction factor.* Per default, a correction factor is applied uniformly to all yearly impacts, such that the final `yimp` object has the same average annual impact than `imp`, the original impact object. When setting `correction_fac=False`, the correction factor is not applied.\n",
16+
"*Correction factor.* By setting `correction_fac=False`, a correction factor is applied uniformly to all yearly impacts, such that the final `yimp` object has the same average annual impact than `imp`, the original impact object.\n",
1717
"\n"
1818
]
1919
},
@@ -64,7 +64,7 @@
6464
{
6565
"data": {
6666
"text/plain": [
67-
"array([1, 3, 0, 0, 2, 0, 1, 2, 1, 3])"
67+
"array([1, 2, 1, 0, 1, 0, 0, 2, 1, 2])"
6868
]
6969
},
7070
"execution_count": 2,
@@ -91,15 +91,15 @@
9191
"data": {
9292
"text/plain": [
9393
"[array([2]),\n",
94-
" array([0, 4, 3]),\n",
94+
" array([1, 0]),\n",
95+
" array([2]),\n",
9596
" array([], dtype=int64),\n",
97+
" array([2]),\n",
9698
" array([], dtype=int64),\n",
97-
" array([5, 1]),\n",
9899
" array([], dtype=int64),\n",
99-
" array([3]),\n",
100-
" array([5, 1]),\n",
101-
" array([0]),\n",
102-
" array([2, 1, 5])]"
100+
" array([1, 4]),\n",
101+
" array([2]),\n",
102+
" array([4, 4])]"
103103
]
104104
},
105105
"execution_count": 3,
@@ -121,7 +121,7 @@
121121
{
122122
"data": {
123123
"text/plain": [
124-
"array([ 4, 66, 0, 0, 64, 0, 6, 64, 0, 68])"
124+
"array([ 4, 2, 4, 0, 4, 0, 0, 62, 4, 120])"
125125
]
126126
},
127127
"execution_count": 4,
@@ -143,7 +143,7 @@
143143
{
144144
"data": {
145145
"text/plain": [
146-
"0.9852941176470589"
146+
"1.34"
147147
]
148148
},
149149
"execution_count": 5,
@@ -180,9 +180,9 @@
180180
"name": "stdout",
181181
"output_type": "stream",
182182
"text": [
183-
"yimp.at_event = [ 60 0 0 8 128 0 0 130 122 8]\n",
184-
"imp_per_year = [ 60 0 0 8 128 0 0 130 122 8]\n",
185-
"The expected annual impact 45.6 differs from the one of the original impact (26.8).\n"
183+
"yimp.at_event = [ 2 0 0 122 14 0 0 246 4 124]\n",
184+
"imp_per_year = [ 2 0 0 122 14 0 0 246 4 124]\n",
185+
"The expected annual impact 51.2 differs from the one of the original impact (26.8).\n"
186186
]
187187
}
188188
],
@@ -228,9 +228,9 @@
228228
"name": "stdout",
229229
"output_type": "stream",
230230
"text": [
231-
"2025-07-17 14:56:28,995 - climada.util.yearsets - INFO - The correction factor is 0.5877192982456141.\n",
232-
"yimp.at_event = [35.26 0. 0. 4.7 75.23 0. 0. 76.4 71.7 4.7 ]\n",
233-
"imp_per_year = [35.26 0. 0. 4.7 75.23 0. 0. 76.4 71.7 4.7 ]\n",
231+
"2025-07-22 17:10:20,134 - climada.util.yearsets - INFO - The correction factor is 0.5234375.\n",
232+
"yimp.at_event = [ 1.05 0. 0. 63.86 7.33 0. 0. 128.77 2.09 64.91]\n",
233+
"imp_per_year = [ 1.05 0. 0. 63.86 7.33 0. 0. 128.77 2.09 64.91]\n",
234234
"The expected annual impact 26.8 is equal to the one of the original impact (26.8).\n"
235235
]
236236
}

0 commit comments

Comments
 (0)