Skip to content

Commit 4ad866f

Browse files
author
Chahan Kropf
committed
Merge remote-tracking branch 'origin/develop' into feature/relative_matching_distance
2 parents d3ab604 + 16a5ca7 commit 4ad866f

File tree

7 files changed

+253
-158
lines changed

7 files changed

+253
-158
lines changed

.github/pull_request_template.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,10 @@ This PR fixes #
1919
### PR Reviewer Checklist
2020

2121
- [ ] Read the [Contribution Guide][contrib]
22-
- [ ] [CLIMADA Reviewer Checklist](https://climada-python.readthedocs.io/en/latest/guide/Guide_Reviewer_Checklist.html) passed
22+
- [ ] [CLIMADA Reviewer Checklist](https://climada-python.readthedocs.io/en/latest/development/Guide_Review.html) passed
2323
- [ ] [Tests][testing] passing
2424
- [ ] No new [linter issues][linter]
2525

2626
[contrib]: https://github.com/CLIMADA-project/climada_python/blob/main/CONTRIBUTING.md
27-
[testing]: https://climada-python.readthedocs.io/en/latest/guide/Guide_Continuous_Integration_and_Testing.html
28-
[linter]: https://climada-python.readthedocs.io/en/stable/guide/Guide_Continuous_Integration_and_Testing.html#3.C.--Static-Code-Analysis
27+
[testing]: https://climada-python.readthedocs.io/en/latest/development/Guide_continuous_integration_GitHub_actions.html
28+
[linter]: https://climada-python.readthedocs.io/en/latest/development/Guide_continuous_integration_GitHub_actions.html#static-code-analysis

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ geographic coordinates as input (e.g. `util.coordinates.dist_to_coast`, `util.co
4444
- `Exposures.write_hdf5` pickles geometry data in WKB format, which is faster and more sustainable. [#1051](https://github.com/CLIMADA-project/climada_python/pull/1051)
4545
- The online documentation has been completely overhauled, now uses PyData theme: [#977](https://github.com/CLIMADA-project/climada_python/pull/977)
4646
- Add `climada.hazard.xarray` module with helper structures for reading Hazard objects from `xarray` data [#1063](https://github.com/CLIMADA-project/climada_python/pull/1063)
47+
- The output of the `impact_yearset` was changed to only contain attributes corresponding to the yearly impact set. The application of the correction factor and the frequency of the resulting yearly impact object are corrected. [#1075](https://github.com/CLIMADA-project/climada_python/pull/1075)
4748

4849
### Fixed
4950

climada/util/test/test_yearsets.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,9 @@ def test_sample_events(self):
9090
)
9191
frequencies = np.array(np.ones(20) * 0.2)
9292

93-
sampling_vect = yearsets.sample_events(events_per_year, frequencies)
93+
sampling_vect = yearsets.sample_events(
94+
events_per_year, frequencies, with_replacement=False
95+
)
9496

9597
self.assertEqual(len(sampling_vect), len(events_per_year))
9698
self.assertEqual(

climada/util/yearsets.py

Lines changed: 106 additions & 86 deletions
Original file line numberDiff line numberDiff line change
@@ -13,18 +13,20 @@
1313
Define functions to handle impact_yearsets
1414
"""
1515

16-
import copy
1716
import logging
1817

1918
import numpy as np
2019
from numpy.random import default_rng
2120

2221
import climada.util.dates_times as u_dt
22+
from climada.engine import Impact
2323

2424
LOGGER = logging.getLogger(__name__)
2525

2626

27-
def impact_yearset(imp, sampled_years, lam=None, correction_fac=True, seed=None):
27+
def impact_yearset(
28+
imp, sampled_years, lam=None, correction_fac=True, with_replacement=False, seed=None
29+
):
2830
"""Create a yearset of impacts (yimp) containing a probabilistic impact for each year
2931
in the sampled_years list by sampling events from the impact received as input with a
3032
Poisson distribution centered around lam per year (lam = sum(imp.frequency)).
@@ -38,19 +40,22 @@ def impact_yearset(imp, sampled_years, lam=None, correction_fac=True, seed=None)
3840
impact object containing impacts per event
3941
sampled_years : list
4042
A list of years that shall be covered by the resulting yimp.
43+
lam: float, optional
44+
The parameter (i.e., the mean) of the Poisson distribution to sample the number of
45+
events per year. If no lambda value is given, the default lam = sum(imp.frequency)
46+
is used. Default is None.
47+
correction_fac : boolean, optional
48+
If True, a correction factor is applied to the resulting yimp. It is
49+
scaled in such a way that the expected annual impact (eai) of the yimp
50+
equals the eai of the input impact. Defaults to True.
51+
with_replacement : bool, optional
52+
If True, impact events are sampled with replacement. If False, events are sampled
53+
without replacement. Sampling without replacement can yield distorted samples if
54+
frequencies of different events are unqual. Defaults to False.
4155
seed : Any, optional
4256
seed for the default bit generator
4357
default: None
4458
45-
Optional parameters
46-
lam: int
47-
The applied Poisson distribution is centered around lam events per year.
48-
If no lambda value is given, the default lam = sum(imp.frequency) is used.
49-
correction_fac : boolean
50-
If True a correction factor is applied to the resulting yimp. It is
51-
scaled in such a way that the expected annual impact (eai) of the yimp
52-
equals the eai of the input impact
53-
5459
Returns
5560
-------
5661
yimp : climada.engine.Impact()
@@ -68,27 +73,13 @@ def impact_yearset(imp, sampled_years, lam=None, correction_fac=True, seed=None)
6873
if not lam:
6974
lam = np.sum(imp.frequency)
7075
events_per_year = sample_from_poisson(n_sampled_years, lam, seed=seed)
71-
sampling_vect = sample_events(events_per_year, imp.frequency, seed=seed)
76+
sampling_vect = sample_events(
77+
events_per_year, imp.frequency, with_replacement=with_replacement, seed=seed
78+
)
7279

7380
# compute impact per sampled_year
74-
imp_per_year = compute_imp_per_year(imp, sampling_vect)
75-
76-
# copy imp object as basis for the yimp object
77-
yimp = copy.deepcopy(imp)
78-
79-
# save imp_per_year in yimp
80-
if correction_fac: # adjust for sampling error
81-
yimp.at_event = imp_per_year / calculate_correction_fac(imp_per_year, imp)
82-
else:
83-
yimp.at_event = imp_per_year
84-
85-
# save calculations in yimp
86-
yimp.event_id = np.arange(1, n_sampled_years + 1)
87-
yimp.date = u_dt.str_to_date([str(date) + "-01-01" for date in sampled_years])
88-
yimp.frequency = (
89-
np.ones(n_sampled_years)
90-
* sum(len(row) for row in sampling_vect)
91-
/ n_sampled_years
81+
yimp = impact_yearset_from_sampling_vect(
82+
imp, sampled_years, sampling_vect, correction_fac=correction_fac
9283
)
9384

9485
return yimp, sampling_vect
@@ -118,12 +109,10 @@ def impact_yearset_from_sampling_vect(
118109
i.e. [yimp, sampling_vect] = climada_yearsets.impact_yearset(...)
119110
and can then be provided in this function to obtain the exact same sampling
120111
(also for a different imp object)
121-
122-
Optional parameter
123-
correction_fac : boolean
112+
correction_fac : boolean, optional
124113
If True a correction factor is applied to the resulting yimp. It is
125114
scaled in such a way that the expected annual impact (eai) of the yimp
126-
equals the eai of the input impact
115+
equals the eai of the input impact. Defaults to True.
127116
128117
Returns
129118
-------
@@ -134,22 +123,24 @@ def impact_yearset_from_sampling_vect(
134123

135124
# compute impact per sampled_year
136125
imp_per_year = compute_imp_per_year(imp, sampling_vect)
137-
138-
# copy imp object as basis for the yimp object
139-
yimp = copy.deepcopy(imp)
126+
frequency = np.ones(len(sampled_years)) / len(sampled_years)
140127

141128
if correction_fac: # adjust for sampling error
142-
imp_per_year = imp_per_year / calculate_correction_fac(imp_per_year, imp)
143-
144-
# save calculations in yimp
145-
yimp.at_event = imp_per_year
146-
n_sampled_years = len(sampled_years)
147-
yimp.event_id = np.arange(1, n_sampled_years + 1)
148-
yimp.date = u_dt.str_to_date([str(date) + "-01-01" for date in sampled_years])
149-
yimp.frequency = (
150-
np.ones(n_sampled_years)
151-
* sum(len(row) for row in sampling_vect)
152-
/ n_sampled_years
129+
correction_factor = calculate_correction_fac(imp_per_year, imp)
130+
imp_per_year = imp_per_year.astype(float) * correction_factor
131+
LOGGER.info("The correction factor is %s.", correction_factor)
132+
133+
# create yearly impact object
134+
yimp = Impact(
135+
at_event=imp_per_year,
136+
date=u_dt.str_to_date([str(date) + "-01-01" for date in sampled_years]),
137+
event_name=np.arange(1, len(sampled_years) + 1),
138+
event_id=np.arange(1, len(sampled_years) + 1),
139+
unit=imp.unit,
140+
haz_type=imp.haz_type,
141+
frequency=frequency,
142+
frequency_unit="1/year",
143+
aai_agg=np.sum(frequency * imp_per_year),
153144
)
154145

155146
return yimp
@@ -163,7 +154,8 @@ def sample_from_poisson(n_sampled_years, lam, seed=None):
163154
n_sampled_years : int
164155
The target number of years the impact yearset shall contain.
165156
lam: float
166-
the applied Poisson distribution is centered around lambda events per year
157+
The parameter (i.e., the mean) of the Poisson distribution to sample the number of
158+
events per year.
167159
seed : int, optional
168160
seed for numpy.random, will be set if not None
169161
default: None
@@ -178,7 +170,7 @@ def sample_from_poisson(n_sampled_years, lam, seed=None):
178170
return np.round(np.random.poisson(lam=lam, size=n_sampled_years)).astype("int")
179171

180172

181-
def sample_events(events_per_year, freqs_orig, seed=None):
173+
def sample_events(events_per_year, freqs_orig, with_replacement=False, seed=None):
182174
"""Sample events uniformely from an array (indices_orig) without replacement
183175
(if sum(events_per_year) > n_input_events the input events are repeated
184176
(tot_n_events/n_input_events) times, by ensuring that the same events doens't
@@ -190,6 +182,10 @@ def sample_events(events_per_year, freqs_orig, seed=None):
190182
Number of events per sampled year
191183
freqs_orig : np.ndarray
192184
Frequency of each input event
185+
with_replacement : bool, optional
186+
If True, impact events are sampled with replacement. If False, events are sampled
187+
without replacement. Sampling without replacement can yield distorted samples if
188+
frequencies of different events are unqual. Defaults to False.
193189
seed : Any, optional
194190
seed for the default bit generator.
195191
Default: None
@@ -203,47 +199,72 @@ def sample_events(events_per_year, freqs_orig, seed=None):
203199
"""
204200

205201
sampling_vect = []
206-
207202
indices_orig = np.arange(len(freqs_orig))
203+
rng = default_rng(seed)
204+
205+
# sample without replacement, works well if event frequencies are equal
206+
if with_replacement is False:
207+
# warn if frequencies of different events are not equal
208+
if np.unique(freqs_orig).size != 1:
209+
LOGGER.warning(
210+
"The frequencies of the different events are not equal. This can lead to "
211+
"distorted sampling if the frequencies vary significantly. To avoid this, "
212+
"please set `with_replacement=True` to sample with replacement instead."
213+
)
208214

209-
freqs = freqs_orig
210-
indices = indices_orig
211-
212-
# sample events for each sampled year
213-
for amount_events in events_per_year:
214-
# if there are not enough input events, choice with no replace will fail
215-
if amount_events > len(freqs_orig):
216-
raise ValueError(
217-
f"cannot sample {amount_events} distinct events for a single year"
218-
f" when there are only {len(freqs_orig)} input events"
215+
indices = indices_orig
216+
freqs = freqs_orig
217+
218+
# sample events for each sampled year
219+
for amount_events in events_per_year:
220+
# if there are not enough input events, choice with no replace will fail
221+
if amount_events > len(freqs_orig):
222+
raise ValueError(
223+
f"cannot sample {amount_events} distinct events for a single year"
224+
f" when there are only {len(freqs_orig)} input events. Set "
225+
"with_replacement=True if you want to sample with replacmeent."
226+
)
227+
228+
# if not enough events remaining, append original events
229+
if len(np.unique(indices)) < amount_events or len(indices) == 0:
230+
indices = np.append(indices, indices_orig)
231+
freqs = np.append(freqs, freqs_orig)
232+
233+
# ensure that each event only occurs once per sampled year
234+
unique_events = np.unique(indices, return_index=True)[0]
235+
probab_dis = freqs[np.unique(indices, return_index=True)[1]] / (
236+
np.sum(freqs[np.unique(indices, return_index=True)[1]])
219237
)
220238

221-
# add the original indices and frequencies to the pool if there are less events
222-
# in the pool than needed to fill the year one is sampling for
223-
# or if the pool is empty (not covered in case amount_events is 0)
224-
if len(np.unique(indices)) < amount_events or len(indices) == 0:
225-
indices = np.append(indices, indices_orig)
226-
freqs = np.append(freqs, freqs_orig)
227-
228-
# ensure that each event only occurs once per sampled year
229-
unique_events = np.unique(indices, return_index=True)[0]
230-
probab_dis = freqs[np.unique(indices, return_index=True)[1]] / (
231-
np.sum(freqs[np.unique(indices, return_index=True)[1]])
232-
)
233-
234-
# sample events
235-
rng = default_rng(seed)
239+
# sample events
240+
selected_events = rng.choice(
241+
unique_events, size=amount_events, replace=False, p=probab_dis
242+
).astype("int")
243+
244+
# determine used events to remove them from sampling pool
245+
idx_to_remove = [
246+
np.where(indices == event)[0][0] for event in selected_events
247+
]
248+
indices = np.delete(indices, idx_to_remove)
249+
freqs = np.delete(freqs, idx_to_remove)
250+
251+
# save sampled events in sampling vector
252+
sampling_vect.append(selected_events)
253+
254+
else:
255+
# easier method if we allow for replacement sample with replacement
256+
probab_dis = freqs_orig / sum(freqs_orig)
257+
258+
# sample events for each sampled year
236259
selected_events = rng.choice(
237-
unique_events, size=amount_events, replace=False, p=probab_dis
260+
indices_orig, size=sum(events_per_year), replace=True, p=probab_dis
238261
).astype("int")
239262

240-
# determine used events to remove them from sampling pool
241-
idx_to_remove = [np.where(indices == event)[0][0] for event in selected_events]
242-
indices = np.delete(indices, idx_to_remove)
243-
freqs = np.delete(freqs, idx_to_remove)
244-
245-
# save sampled events in sampling vector
246-
sampling_vect.append(selected_events)
263+
# group to list of lists
264+
index = 0
265+
for amount_events in events_per_year:
266+
sampling_vect.append(selected_events[index : index + amount_events])
267+
index += amount_events
247268

248269
return sampling_vect
249270

@@ -291,10 +312,9 @@ def calculate_correction_fac(imp_per_year, imp):
291312
The correction factor is calculated as imp_eai/yimp_eai
292313
"""
293314

294-
yimp_eai = np.sum(imp_per_year) / len(imp_per_year)
315+
yimp_eai = np.mean(imp_per_year)
295316
imp_eai = np.sum(imp.frequency * imp.at_event)
296317
correction_factor = imp_eai / yimp_eai
297-
LOGGER.info("The correction factor amounts to %s", (correction_factor - 1) * 100)
298318

299319
# if correction_factor > 0.1:
300320
# tex = raw_input("Do you want to exclude small events?")

0 commit comments

Comments
 (0)