Skip to content

Commit 0a04ab9

Browse files
committed
Use of PeriodIndex instead of DateTimeIndex
1 parent 21cc433 commit 0a04ab9

File tree

4 files changed

+69
-51
lines changed

4 files changed

+69
-51
lines changed

climada/trajectories/risk_trajectory.py

Lines changed: 42 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
import matplotlib.pyplot as plt
3030
import matplotlib.ticker as ticker
3131
import pandas as pd
32+
from pandas.tseries.frequencies import to_offset
3233

3334
from climada.entity.disc_rates.base import DiscRates
3435
from climada.trajectories.interpolation import InterpolationStrategyBase
@@ -62,7 +63,7 @@ def __init__(
6263
self,
6364
snapshots_list: list[Snapshot],
6465
*,
65-
time_resolution: str = "YS",
66+
time_resolution: str = "Y",
6667
all_groups_name: str = "All",
6768
risk_disc: DiscRates | None = None,
6869
interpolation_strategy: InterpolationStrategyBase | None = None,
@@ -409,7 +410,10 @@ def risk_components_metrics(self, npv: bool = True, **kwargs) -> pd.DataFrame:
409410
if len(self._snapshots) > 2:
410411
tmp.set_index(["group", "date", "measure", "metric"], inplace=True)
411412
start_dates = [snap.date for snap in self._snapshots[:-1]]
412-
end_dates = [snap.date for snap in self._snapshots[1:]]
413+
end_dates = [
414+
snap.date - to_offset(self._time_resolution)
415+
for snap in self._snapshots[1:]
416+
]
413417
periods_dates = list(zip(start_dates, end_dates))
414418
tmp.loc[pd.IndexSlice[:, :, :, "base risk"]] = tmp.loc[
415419
pd.IndexSlice[:, str(self.start_date), :, "base risk"]
@@ -435,7 +439,7 @@ def risk_components_metrics(self, npv: bool = True, **kwargs) -> pd.DataFrame:
435439
].iloc[0]
436440

437441
tmp.reset_index(inplace=True)
438-
return tmp
442+
return tmp.drop("index", axis=1, errors="ignore")
439443

440444
def per_date_risk_metrics(
441445
self,
@@ -486,6 +490,7 @@ def _get_risk_periods(
486490
risk_periods: list[CalcRiskPeriod],
487491
start_date: datetime.date,
488492
end_date: datetime.date,
493+
strict: bool = True,
489494
):
490495
"""Returns risk periods from the given list that are within `start_date` and `end_date`.
491496
@@ -495,16 +500,28 @@ def _get_risk_periods(
495500
The list of risk periods to look through
496501
start_date : datetime.date
497502
end_date : datetime.date
498-
503+
strict: bool, default True
504+
If true, only returns periods stricly within start and end dates. Else,
505+
returns periods that have an overlap within start and end.
499506
"""
500-
return [
501-
period
502-
for period in risk_periods
503-
if (
504-
start_date <= period.snapshot_start.date
505-
or end_date >= period.snapshot_end.date
506-
)
507-
]
507+
if strict:
508+
return [
509+
period
510+
for period in risk_periods
511+
if (
512+
start_date <= period.snapshot_start.date
513+
and end_date >= period.snapshot_end.date
514+
)
515+
]
516+
else:
517+
return [
518+
period
519+
for period in risk_periods
520+
if not (
521+
start_date >= period.snapshot_end.date
522+
or end_date <= period.snapshot_start.date
523+
)
524+
]
508525

509526
@staticmethod
510527
def identify_continuous_periods(group, time_unit):
@@ -605,8 +622,8 @@ def _calc_waterfall_plot_data(
605622
end_date = self.end_date if end_date is None else end_date
606623
risk_components = self.risk_components_metrics(npv)
607624
risk_components = risk_components.loc[
608-
(risk_components["date"].dt.date >= start_date)
609-
& (risk_components["date"].dt.date <= end_date)
625+
(risk_components["date"] >= str(start_date))
626+
& (risk_components["date"] <= str(end_date))
610627
]
611628
risk_components = risk_components.set_index(["date", "metric"])[
612629
"risk"
@@ -664,7 +681,7 @@ def plot_per_date_waterfall(
664681
risk_component["base risk"] = risk_component.iloc[0]["base risk"]
665682
# risk_component.plot(x="date", ax=ax, kind="bar", stacked=True)
666683
ax.stackplot(
667-
risk_component.index,
684+
risk_component.index.to_timestamp(),
668685
[risk_component[col] for col in risk_component.columns],
669686
labels=risk_component.columns,
670687
)
@@ -717,23 +734,25 @@ def plot_waterfall(
717734
"""
718735
start_date = self.start_date if start_date is None else start_date
719736
end_date = self.end_date if end_date is None else end_date
737+
start_date_p = pd.to_datetime(start_date).to_period(self._time_resolution)
738+
end_date_p = pd.to_datetime(end_date).to_period(self._time_resolution)
720739
risk_component = self._calc_waterfall_plot_data(
721740
start_date=start_date, end_date=end_date, npv=npv
722741
)
723742
if ax is None:
724743
_, ax = plt.subplots(figsize=(8, 5))
725744

726745
risk_component = risk_component.loc[
727-
(risk_component.index.date == end_date)
746+
(risk_component.index == str(end_date))
728747
].squeeze()
729748

730749
labels = [
731-
f"Risk {start_date}",
732-
f"Exposure contribution {end_date}",
733-
f"Hazard contribution {end_date}",
734-
f"Vulnerability contribution {end_date}",
735-
f"Interaction contribution {end_date}",
736-
f"Total Risk {end_date}",
750+
f"Risk {start_date_p}",
751+
f"Exposure contribution {end_date_p}",
752+
f"Hazard contribution {end_date_p}",
753+
f"Vulnerability contribution {end_date_p}",
754+
f"Interaction contribution {end_date_p}",
755+
f"Total Risk {end_date_p}",
737756
]
738757
values = [
739758
risk_component["base risk"],
@@ -783,7 +802,7 @@ def plot_waterfall(
783802

784803
# Construct y-axis label and title based on parameters
785804
value_label = "USD"
786-
title_label = f"Evolution of the components of risk between {start_date} and {end_date} (Average impact)"
805+
title_label = f"Evolution of the components of risk between {start_date_p} and {end_date_p} (Average impact)"
787806
ax.yaxis.set_major_formatter(ticker.EngFormatter())
788807
ax.set_title(title_label)
789808
ax.set_ylabel(value_label)

climada/trajectories/riskperiod.py

Lines changed: 16 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -79,15 +79,15 @@ class CalcRiskPeriod:
7979
This object handles the interpolations and computations of risk metrics in
8080
between two given snapshots, along a DateTimeIndex build from either a
8181
`time_resolution` (which must be a valid "freq" string to build a DateTimeIndex)
82-
and defaults to "AS-JAN" (start of the year) or `time_points` integer argument, in which case
82+
and defaults to "Y" (start of the year) or `time_points` integer argument, in which case
8383
the DateTimeIndex will have that many periods.
8484
8585
Note that most attribute like members are properties with their own docstring.
8686
8787
Attributes
8888
----------
8989
90-
date_idx: pd.DatetimeIndex
90+
date_idx: pd.PeriodIndex
9191
The date index for the different interpolated points between the two snapshots
9292
interpolation_strategy: InterpolationStrategy, optional
9393
The approach used to interpolate impact matrices in between the two snapshots, linear by default.
@@ -107,7 +107,7 @@ def __init__(
107107
self,
108108
snapshot0: Snapshot,
109109
snapshot1: Snapshot,
110-
time_resolution: str | None = "AS-JAN",
110+
time_resolution: str | None = "Y",
111111
time_points: int | None = None,
112112
interpolation_strategy: InterpolationStrategyBase | None = None,
113113
impact_computation_strategy: ImpactComputationStrategy | None = None,
@@ -126,7 +126,7 @@ def __init__(
126126
time_resolution : str, optional
127127
One of pandas date offset strings or corresponding objects. See :func:`pandas.date_range`.
128128
time_points : int, optional
129-
Number of periods to generate for the DatetimeIndex.
129+
Number of periods to generate for the PeriodIndex.
130130
interpolation_strategy: InterpolationStrategy, optional
131131
The approach used to interpolate impact matrices in between the two snapshots, linear by default.
132132
impact_computation_strategy: ImpactComputationStrategy, optional
@@ -192,7 +192,7 @@ def _set_date_idx(
192192
periods: int | None = None,
193193
freq: str | None = None,
194194
name: str | None = None,
195-
) -> pd.DatetimeIndex:
195+
) -> pd.PeriodIndex:
196196
"""Generate a date range index based on the provided parameters.
197197
198198
Parameters
@@ -210,8 +210,8 @@ def _set_date_idx(
210210
211211
Returns
212212
-------
213-
pd.DatetimeIndex
214-
A DatetimeIndex representing the date range.
213+
pd.PeriodIndex
214+
A PeriodIndex representing the date range.
215215
216216
Raises
217217
------
@@ -223,22 +223,21 @@ def _set_date_idx(
223223
else:
224224
points = periods
225225

226-
ret = pd.date_range(
226+
ret = pd.period_range(
227227
date1,
228228
date2,
229229
periods=points,
230230
freq=freq, # type: ignore
231231
name=name,
232-
normalize=True,
233232
)
234233
if periods is not None and len(ret) != periods:
235234
raise ValueError(
236235
"Number of periods and frequency given to date_range are inconsistent."
237236
)
238237

239-
if pd.infer_freq(ret) != freq:
238+
if ret.freq != freq:
240239
LOGGER.debug(
241-
f"Given interval frequency ( {pd.infer_freq(ret)} ) and infered interval frequency differ ( {freq} )."
240+
f"Given interval frequency ( {ret.freq} ) and infered interval frequency differ ( {freq} )."
242241
)
243242

244243
return ret
@@ -254,18 +253,18 @@ def snapshot_end(self) -> Snapshot:
254253
return self._snapshot1
255254

256255
@property
257-
def date_idx(self) -> pd.DatetimeIndex:
258-
"""The pandas DatetimeIndex representing the time dimension of the risk period."""
256+
def date_idx(self) -> pd.PeriodIndex:
257+
"""The pandas PeriodIndex representing the time dimension of the risk period."""
259258
return self._date_idx
260259

261260
@date_idx.setter
262261
def date_idx(self, value, /):
263-
if not isinstance(value, pd.DatetimeIndex):
264-
raise ValueError("Not a DatetimeIndex")
262+
if not isinstance(value, pd.PeriodIndex):
263+
raise ValueError("Not a PeriodIndex")
265264

266-
self._date_idx = value.normalize() # Avoids weird hourly data
265+
self._date_idx = value # Avoids weird hourly data
267266
self._time_points = len(self.date_idx)
268-
self._time_resolution = pd.infer_freq(self.date_idx)
267+
self._time_resolution = self.date_idx.freq
269268
self._reset_impact_data()
270269

271270
@property

climada/trajectories/test/test_risk_trajectory.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -155,14 +155,14 @@ def test_risk_periods_lazy_computation(self, MockCalcRiskPeriod):
155155
call(
156156
self.mock_snapshot1,
157157
self.mock_snapshot2,
158-
time_resolution="YS",
158+
time_resolution="Y",
159159
interpolation_strategy=self.mock_interpolation_strategy,
160160
impact_computation_strategy=self.mock_impact_computation_strategy,
161161
),
162162
call(
163163
self.mock_snapshot2,
164164
self.mock_snapshot3,
165-
time_resolution="YS",
165+
time_resolution="Y",
166166
interpolation_strategy=self.mock_interpolation_strategy,
167167
impact_computation_strategy=self.mock_impact_computation_strategy,
168168
),

climada/trajectories/test/test_riskperiod.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ def setUp(self):
115115
self.calc_risk_period = CalcRiskPeriod(
116116
self.mock_snapshot_start,
117117
self.mock_snapshot_end,
118-
time_resolution="AS-JAN",
118+
time_resolution="Y",
119119
interpolation_strategy=AllLinearStrategy(),
120120
impact_computation_strategy=ImpactCalcComputation(),
121121
# These will have to be tested when implemented
@@ -127,7 +127,7 @@ def setUp(self):
127127
def test_init(self):
128128
self.assertEqual(self.calc_risk_period.snapshot_start, self.mock_snapshot_start)
129129
self.assertEqual(self.calc_risk_period.snapshot_end, self.mock_snapshot_end)
130-
self.assertEqual(self.calc_risk_period.time_resolution, "AS-JAN")
130+
self.assertEqual(self.calc_risk_period.time_resolution, "Y")
131131
self.assertEqual(
132132
self.calc_risk_period.time_points, self.future_date - self.present_date + 1
133133
)
@@ -145,7 +145,7 @@ def test_init(self):
145145
self.calc_risk_period._group_id_E1,
146146
self.mock_snapshot_end.exposure.gdf["group_id"].values,
147147
)
148-
self.assertIsInstance(self.calc_risk_period.date_idx, pd.DatetimeIndex)
148+
self.assertIsInstance(self.calc_risk_period.date_idx, pd.PeriodIndex)
149149
self.assertEqual(
150150
len(self.calc_risk_period.date_idx),
151151
self.future_date - self.present_date + 1,
@@ -175,8 +175,8 @@ def test_set_time_points(self):
175175
self.assertEqual(len(self.calc_risk_period.date_idx), 10)
176176
pd.testing.assert_index_equal(
177177
self.calc_risk_period.date_idx,
178-
pd.DatetimeIndex(
179-
pd.DatetimeIndex(
178+
pd.PeriodIndex(
179+
pd.PeriodIndex(
180180
[
181181
"2020-01-01",
182182
"2020-07-22",
@@ -203,8 +203,8 @@ def test_set_time_resolution(self):
203203
self.assertEqual(self.calc_risk_period.time_resolution, "MS")
204204
pd.testing.assert_index_equal(
205205
self.calc_risk_period.date_idx,
206-
pd.DatetimeIndex(
207-
pd.DatetimeIndex(
206+
pd.PeriodIndex(
207+
pd.PeriodIndex(
208208
[
209209
"2020-01-01",
210210
"2020-02-01",
@@ -599,7 +599,7 @@ def setUp(self):
599599
self.calc_risk_period.per_date_aai_H0V1 = np.array([2, 3, 9])
600600
self.calc_risk_period.per_date_aai_H1V1 = np.array([4, 6, 24])
601601

602-
self.calc_risk_period.date_idx = pd.DatetimeIndex(
602+
self.calc_risk_period.date_idx = pd.PeriodIndex(
603603
["2020-01-01", "2025-01-01", "2030-01-01"], name="date"
604604
)
605605
self.calc_risk_period.snapshot_start.exposure.gdf = gpd.GeoDataFrame(
@@ -700,7 +700,7 @@ def test_calc_aai_per_group_metric(self):
700700
self.calc_risk_period._groups_id = np.array([1, 2])
701701
self.calc_risk_period.eai_gdf = pd.DataFrame(
702702
{
703-
"date": pd.DatetimeIndex(
703+
"date": pd.PeriodIndex(
704704
["2020-01-01"] * 3 + ["2025-01-01"] * 3 + ["2030-01-01"] * 3,
705705
name="date",
706706
),

0 commit comments

Comments
 (0)