1313Define functions to handle impact_yearsets
1414"""
1515
16- import copy
1716import logging
1817
1918import numpy as np
2019from numpy .random import default_rng
2120
2221import climada .util .dates_times as u_dt
22+ from climada .engine import Impact
2323
2424LOGGER = logging .getLogger (__name__ )
2525
2626
27- def impact_yearset (imp , sampled_years , lam = None , correction_fac = True , seed = None ):
27+ def impact_yearset (
28+ imp , sampled_years , lam = None , correction_fac = True , with_replacement = False , seed = None
29+ ):
2830 """Create a yearset of impacts (yimp) containing a probabilistic impact for each year
2931 in the sampled_years list by sampling events from the impact received as input with a
3032 Poisson distribution centered around lam per year (lam = sum(imp.frequency)).
@@ -38,19 +40,22 @@ def impact_yearset(imp, sampled_years, lam=None, correction_fac=True, seed=None)
3840 impact object containing impacts per event
3941 sampled_years : list
4042 A list of years that shall be covered by the resulting yimp.
43+ lam: float, optional
44+ The parameter (i.e., the mean) of the Poisson distribution to sample the number of
45+ events per year. If no lambda value is given, the default lam = sum(imp.frequency)
46+ is used. Default is None.
47+ correction_fac : boolean, optional
48+ If True, a correction factor is applied to the resulting yimp. It is
49+ scaled in such a way that the expected annual impact (eai) of the yimp
50+ equals the eai of the input impact. Defaults to True.
51+ with_replacement : bool, optional
52+ If True, impact events are sampled with replacement. If False, events are sampled
53+ without replacement. Sampling without replacement can yield distorted samples if
54+ frequencies of different events are unqual. Defaults to False.
4155 seed : Any, optional
4256 seed for the default bit generator
4357 default: None
4458
45- Optional parameters
46- lam: int
47- The applied Poisson distribution is centered around lam events per year.
48- If no lambda value is given, the default lam = sum(imp.frequency) is used.
49- correction_fac : boolean
50- If True a correction factor is applied to the resulting yimp. It is
51- scaled in such a way that the expected annual impact (eai) of the yimp
52- equals the eai of the input impact
53-
5459 Returns
5560 -------
5661 yimp : climada.engine.Impact()
@@ -68,27 +73,13 @@ def impact_yearset(imp, sampled_years, lam=None, correction_fac=True, seed=None)
6873 if not lam :
6974 lam = np .sum (imp .frequency )
7075 events_per_year = sample_from_poisson (n_sampled_years , lam , seed = seed )
71- sampling_vect = sample_events (events_per_year , imp .frequency , seed = seed )
76+ sampling_vect = sample_events (
77+ events_per_year , imp .frequency , with_replacement = with_replacement , seed = seed
78+ )
7279
7380 # compute impact per sampled_year
74- imp_per_year = compute_imp_per_year (imp , sampling_vect )
75-
76- # copy imp object as basis for the yimp object
77- yimp = copy .deepcopy (imp )
78-
79- # save imp_per_year in yimp
80- if correction_fac : # adjust for sampling error
81- yimp .at_event = imp_per_year / calculate_correction_fac (imp_per_year , imp )
82- else :
83- yimp .at_event = imp_per_year
84-
85- # save calculations in yimp
86- yimp .event_id = np .arange (1 , n_sampled_years + 1 )
87- yimp .date = u_dt .str_to_date ([str (date ) + "-01-01" for date in sampled_years ])
88- yimp .frequency = (
89- np .ones (n_sampled_years )
90- * sum (len (row ) for row in sampling_vect )
91- / n_sampled_years
81+ yimp = impact_yearset_from_sampling_vect (
82+ imp , sampled_years , sampling_vect , correction_fac = correction_fac
9283 )
9384
9485 return yimp , sampling_vect
@@ -118,12 +109,10 @@ def impact_yearset_from_sampling_vect(
118109 i.e. [yimp, sampling_vect] = climada_yearsets.impact_yearset(...)
119110 and can then be provided in this function to obtain the exact same sampling
120111 (also for a different imp object)
121-
122- Optional parameter
123- correction_fac : boolean
112+ correction_fac : boolean, optional
124113 If True a correction factor is applied to the resulting yimp. It is
125114 scaled in such a way that the expected annual impact (eai) of the yimp
126- equals the eai of the input impact
115+ equals the eai of the input impact. Defaults to True.
127116
128117 Returns
129118 -------
@@ -134,22 +123,24 @@ def impact_yearset_from_sampling_vect(
134123
135124 # compute impact per sampled_year
136125 imp_per_year = compute_imp_per_year (imp , sampling_vect )
137-
138- # copy imp object as basis for the yimp object
139- yimp = copy .deepcopy (imp )
126+ frequency = np .ones (len (sampled_years )) / len (sampled_years )
140127
141128 if correction_fac : # adjust for sampling error
142- imp_per_year = imp_per_year / calculate_correction_fac (imp_per_year , imp )
143-
144- # save calculations in yimp
145- yimp .at_event = imp_per_year
146- n_sampled_years = len (sampled_years )
147- yimp .event_id = np .arange (1 , n_sampled_years + 1 )
148- yimp .date = u_dt .str_to_date ([str (date ) + "-01-01" for date in sampled_years ])
149- yimp .frequency = (
150- np .ones (n_sampled_years )
151- * sum (len (row ) for row in sampling_vect )
152- / n_sampled_years
129+ correction_factor = calculate_correction_fac (imp_per_year , imp )
130+ imp_per_year = imp_per_year .astype (float ) * correction_factor
131+ LOGGER .info ("The correction factor is %s." , correction_factor )
132+
133+ # create yearly impact object
134+ yimp = Impact (
135+ at_event = imp_per_year ,
136+ date = u_dt .str_to_date ([str (date ) + "-01-01" for date in sampled_years ]),
137+ event_name = np .arange (1 , len (sampled_years ) + 1 ),
138+ event_id = np .arange (1 , len (sampled_years ) + 1 ),
139+ unit = imp .unit ,
140+ haz_type = imp .haz_type ,
141+ frequency = frequency ,
142+ frequency_unit = "1/year" ,
143+ aai_agg = np .sum (frequency * imp_per_year ),
153144 )
154145
155146 return yimp
@@ -163,7 +154,8 @@ def sample_from_poisson(n_sampled_years, lam, seed=None):
163154 n_sampled_years : int
164155 The target number of years the impact yearset shall contain.
165156 lam: float
166- the applied Poisson distribution is centered around lambda events per year
157+ The parameter (i.e., the mean) of the Poisson distribution to sample the number of
158+ events per year.
167159 seed : int, optional
168160 seed for numpy.random, will be set if not None
169161 default: None
@@ -178,7 +170,7 @@ def sample_from_poisson(n_sampled_years, lam, seed=None):
178170 return np .round (np .random .poisson (lam = lam , size = n_sampled_years )).astype ("int" )
179171
180172
181- def sample_events (events_per_year , freqs_orig , seed = None ):
173+ def sample_events (events_per_year , freqs_orig , with_replacement = False , seed = None ):
182174 """Sample events uniformely from an array (indices_orig) without replacement
183175 (if sum(events_per_year) > n_input_events the input events are repeated
184176 (tot_n_events/n_input_events) times, by ensuring that the same events doens't
@@ -190,6 +182,10 @@ def sample_events(events_per_year, freqs_orig, seed=None):
190182 Number of events per sampled year
191183 freqs_orig : np.ndarray
192184 Frequency of each input event
185+ with_replacement : bool, optional
186+ If True, impact events are sampled with replacement. If False, events are sampled
187+ without replacement. Sampling without replacement can yield distorted samples if
188+ frequencies of different events are unqual. Defaults to False.
193189 seed : Any, optional
194190 seed for the default bit generator.
195191 Default: None
@@ -203,47 +199,72 @@ def sample_events(events_per_year, freqs_orig, seed=None):
203199 """
204200
205201 sampling_vect = []
206-
207202 indices_orig = np .arange (len (freqs_orig ))
203+ rng = default_rng (seed )
204+
205+ # sample without replacement, works well if event frequencies are equal
206+ if with_replacement is False :
207+ # warn if frequencies of different events are not equal
208+ if np .unique (freqs_orig ).size != 1 :
209+ LOGGER .warning (
210+ "The frequencies of the different events are not equal. This can lead to "
211+ "distorted sampling if the frequencies vary significantly. To avoid this, "
212+ "please set `with_replacement=True` to sample with replacement instead."
213+ )
208214
209- freqs = freqs_orig
210- indices = indices_orig
211-
212- # sample events for each sampled year
213- for amount_events in events_per_year :
214- # if there are not enough input events, choice with no replace will fail
215- if amount_events > len (freqs_orig ):
216- raise ValueError (
217- f"cannot sample { amount_events } distinct events for a single year"
218- f" when there are only { len (freqs_orig )} input events"
215+ indices = indices_orig
216+ freqs = freqs_orig
217+
218+ # sample events for each sampled year
219+ for amount_events in events_per_year :
220+ # if there are not enough input events, choice with no replace will fail
221+ if amount_events > len (freqs_orig ):
222+ raise ValueError (
223+ f"cannot sample { amount_events } distinct events for a single year"
224+ f" when there are only { len (freqs_orig )} input events. Set "
225+ "with_replacement=True if you want to sample with replacmeent."
226+ )
227+
228+ # if not enough events remaining, append original events
229+ if len (np .unique (indices )) < amount_events or len (indices ) == 0 :
230+ indices = np .append (indices , indices_orig )
231+ freqs = np .append (freqs , freqs_orig )
232+
233+ # ensure that each event only occurs once per sampled year
234+ unique_events = np .unique (indices , return_index = True )[0 ]
235+ probab_dis = freqs [np .unique (indices , return_index = True )[1 ]] / (
236+ np .sum (freqs [np .unique (indices , return_index = True )[1 ]])
219237 )
220238
221- # add the original indices and frequencies to the pool if there are less events
222- # in the pool than needed to fill the year one is sampling for
223- # or if the pool is empty (not covered in case amount_events is 0)
224- if len (np .unique (indices )) < amount_events or len (indices ) == 0 :
225- indices = np .append (indices , indices_orig )
226- freqs = np .append (freqs , freqs_orig )
227-
228- # ensure that each event only occurs once per sampled year
229- unique_events = np .unique (indices , return_index = True )[0 ]
230- probab_dis = freqs [np .unique (indices , return_index = True )[1 ]] / (
231- np .sum (freqs [np .unique (indices , return_index = True )[1 ]])
232- )
233-
234- # sample events
235- rng = default_rng (seed )
239+ # sample events
240+ selected_events = rng .choice (
241+ unique_events , size = amount_events , replace = False , p = probab_dis
242+ ).astype ("int" )
243+
244+ # determine used events to remove them from sampling pool
245+ idx_to_remove = [
246+ np .where (indices == event )[0 ][0 ] for event in selected_events
247+ ]
248+ indices = np .delete (indices , idx_to_remove )
249+ freqs = np .delete (freqs , idx_to_remove )
250+
251+ # save sampled events in sampling vector
252+ sampling_vect .append (selected_events )
253+
254+ else :
255+ # easier method if we allow for replacement sample with replacement
256+ probab_dis = freqs_orig / sum (freqs_orig )
257+
258+ # sample events for each sampled year
236259 selected_events = rng .choice (
237- unique_events , size = amount_events , replace = False , p = probab_dis
260+ indices_orig , size = sum ( events_per_year ) , replace = True , p = probab_dis
238261 ).astype ("int" )
239262
240- # determine used events to remove them from sampling pool
241- idx_to_remove = [np .where (indices == event )[0 ][0 ] for event in selected_events ]
242- indices = np .delete (indices , idx_to_remove )
243- freqs = np .delete (freqs , idx_to_remove )
244-
245- # save sampled events in sampling vector
246- sampling_vect .append (selected_events )
263+ # group to list of lists
264+ index = 0
265+ for amount_events in events_per_year :
266+ sampling_vect .append (selected_events [index : index + amount_events ])
267+ index += amount_events
247268
248269 return sampling_vect
249270
@@ -291,10 +312,9 @@ def calculate_correction_fac(imp_per_year, imp):
291312 The correction factor is calculated as imp_eai/yimp_eai
292313 """
293314
294- yimp_eai = np .sum ( imp_per_year ) / len (imp_per_year )
315+ yimp_eai = np .mean (imp_per_year )
295316 imp_eai = np .sum (imp .frequency * imp .at_event )
296317 correction_factor = imp_eai / yimp_eai
297- LOGGER .info ("The correction factor amounts to %s" , (correction_factor - 1 ) * 100 )
298318
299319 # if correction_factor > 0.1:
300320 # tex = raw_input("Do you want to exclude small events?")
0 commit comments