Use of PeriodIndex instead of DateTimeIndex

spjuhel · spjuhel · commit 0a04ab9f9076 · 2025-09-22T16:53:27.000+02:00
diff --git a/climada/trajectories/risk_trajectory.py b/climada/trajectories/risk_trajectory.py
@@ -29,6 +29,7 @@
 import matplotlib.pyplot as plt
 import matplotlib.ticker as ticker
 import pandas as pd
+from pandas.tseries.frequencies import to_offset
 
 from climada.entity.disc_rates.base import DiscRates
 from climada.trajectories.interpolation import InterpolationStrategyBase
@@ -62,7 +63,7 @@ def __init__(
         self,
         snapshots_list: list[Snapshot],
         *,
-        time_resolution: str = "YS",
+        time_resolution: str = "Y",
         all_groups_name: str = "All",
         risk_disc: DiscRates | None = None,
         interpolation_strategy: InterpolationStrategyBase | None = None,
@@ -409,7 +410,10 @@ def risk_components_metrics(self, npv: bool = True, **kwargs) -> pd.DataFrame:
         if len(self._snapshots) > 2:
             tmp.set_index(["group", "date", "measure", "metric"], inplace=True)
             start_dates = [snap.date for snap in self._snapshots[:-1]]
-            end_dates = [snap.date for snap in self._snapshots[1:]]
+            end_dates = [
+                snap.date - to_offset(self._time_resolution)
+                for snap in self._snapshots[1:]
+            ]
             periods_dates = list(zip(start_dates, end_dates))
             tmp.loc[pd.IndexSlice[:, :, :, "base risk"]] = tmp.loc[
                 pd.IndexSlice[:, str(self.start_date), :, "base risk"]
@@ -435,7 +439,7 @@ def risk_components_metrics(self, npv: bool = True, **kwargs) -> pd.DataFrame:
                     ].iloc[0]
 
         tmp.reset_index(inplace=True)
-        return tmp
+        return tmp.drop("index", axis=1, errors="ignore")
 
     def per_date_risk_metrics(
         self,
@@ -486,6 +490,7 @@ def _get_risk_periods(
         risk_periods: list[CalcRiskPeriod],
         start_date: datetime.date,
         end_date: datetime.date,
+        strict: bool = True,
     ):
         """Returns risk periods from the given list that are within `start_date` and `end_date`.
 
@@ -495,16 +500,28 @@ def _get_risk_periods(
             The list of risk periods to look through
         start_date : datetime.date
         end_date : datetime.date
-
+        strict: bool, default True
+            If true, only returns periods stricly within start and end dates. Else,
+            returns periods that have an overlap within start and end.
         """
-        return [
-            period
-            for period in risk_periods
-            if (
-                start_date <= period.snapshot_start.date
-                or end_date >= period.snapshot_end.date
-            )
-        ]
+        if strict:
+            return [
+                period
+                for period in risk_periods
+                if (
+                    start_date <= period.snapshot_start.date
+                    and end_date >= period.snapshot_end.date
+                )
+            ]
+        else:
+            return [
+                period
+                for period in risk_periods
+                if not (
+                    start_date >= period.snapshot_end.date
+                    or end_date <= period.snapshot_start.date
+                )
+            ]
 
     @staticmethod
     def identify_continuous_periods(group, time_unit):
@@ -605,8 +622,8 @@ def _calc_waterfall_plot_data(
         end_date = self.end_date if end_date is None else end_date
         risk_components = self.risk_components_metrics(npv)
         risk_components = risk_components.loc[
-            (risk_components["date"].dt.date >= start_date)
-            & (risk_components["date"].dt.date <= end_date)
+            (risk_components["date"] >= str(start_date))
+            & (risk_components["date"] <= str(end_date))
         ]
         risk_components = risk_components.set_index(["date", "metric"])[
             "risk"
@@ -664,7 +681,7 @@ def plot_per_date_waterfall(
         risk_component["base risk"] = risk_component.iloc[0]["base risk"]
         # risk_component.plot(x="date", ax=ax, kind="bar", stacked=True)
         ax.stackplot(
-            risk_component.index,
+            risk_component.index.to_timestamp(),
             [risk_component[col] for col in risk_component.columns],
             labels=risk_component.columns,
         )
@@ -717,23 +734,25 @@ def plot_waterfall(
         """
         start_date = self.start_date if start_date is None else start_date
         end_date = self.end_date if end_date is None else end_date
+        start_date_p = pd.to_datetime(start_date).to_period(self._time_resolution)
+        end_date_p = pd.to_datetime(end_date).to_period(self._time_resolution)
         risk_component = self._calc_waterfall_plot_data(
             start_date=start_date, end_date=end_date, npv=npv
         )
         if ax is None:
             _, ax = plt.subplots(figsize=(8, 5))
 
         risk_component = risk_component.loc[
-            (risk_component.index.date == end_date)
+            (risk_component.index == str(end_date))
         ].squeeze()
 
         labels = [
-            f"Risk {start_date}",
-            f"Exposure contribution {end_date}",
-            f"Hazard contribution {end_date}",
-            f"Vulnerability contribution {end_date}",
-            f"Interaction contribution {end_date}",
-            f"Total Risk {end_date}",
+            f"Risk {start_date_p}",
+            f"Exposure contribution {end_date_p}",
+            f"Hazard contribution {end_date_p}",
+            f"Vulnerability contribution {end_date_p}",
+            f"Interaction contribution {end_date_p}",
+            f"Total Risk {end_date_p}",
         ]
         values = [
             risk_component["base risk"],
@@ -783,7 +802,7 @@ def plot_waterfall(
 
         # Construct y-axis label and title based on parameters
         value_label = "USD"
-        title_label = f"Evolution of the components of risk between {start_date} and {end_date} (Average impact)"
+        title_label = f"Evolution of the components of risk between {start_date_p} and {end_date_p} (Average impact)"
         ax.yaxis.set_major_formatter(ticker.EngFormatter())
         ax.set_title(title_label)
         ax.set_ylabel(value_label)
diff --git a/climada/trajectories/riskperiod.py b/climada/trajectories/riskperiod.py
@@ -79,15 +79,15 @@ class CalcRiskPeriod:
     This object handles the interpolations and computations of risk metrics in
     between two given snapshots, along a DateTimeIndex build from either a
     `time_resolution` (which must be a valid "freq" string to build a DateTimeIndex)
-    and defaults to "AS-JAN" (start of the year) or `time_points` integer argument, in which case
+    and defaults to "Y" (start of the year) or `time_points` integer argument, in which case
     the DateTimeIndex will have that many periods.
 
     Note that most attribute like members are properties with their own docstring.
 
     Attributes
     ----------
 
-    date_idx: pd.DatetimeIndex
+    date_idx: pd.PeriodIndex
         The date index for the different interpolated points between the two snapshots
     interpolation_strategy: InterpolationStrategy, optional
         The approach used to interpolate impact matrices in between the two snapshots, linear by default.
@@ -107,7 +107,7 @@ def __init__(
         self,
         snapshot0: Snapshot,
         snapshot1: Snapshot,
-        time_resolution: str | None = "AS-JAN",
+        time_resolution: str | None = "Y",
         time_points: int | None = None,
         interpolation_strategy: InterpolationStrategyBase | None = None,
         impact_computation_strategy: ImpactComputationStrategy | None = None,
@@ -126,7 +126,7 @@ def __init__(
         time_resolution : str, optional
             One of pandas date offset strings or corresponding objects. See :func:`pandas.date_range`.
         time_points : int, optional
-            Number of periods to generate for the DatetimeIndex.
+            Number of periods to generate for the PeriodIndex.
         interpolation_strategy: InterpolationStrategy, optional
             The approach used to interpolate impact matrices in between the two snapshots, linear by default.
         impact_computation_strategy: ImpactComputationStrategy, optional
@@ -192,7 +192,7 @@ def _set_date_idx(
         periods: int | None = None,
         freq: str | None = None,
         name: str | None = None,
-    ) -> pd.DatetimeIndex:
+    ) -> pd.PeriodIndex:
         """Generate a date range index based on the provided parameters.
 
         Parameters
@@ -210,8 +210,8 @@ def _set_date_idx(
 
         Returns
         -------
-        pd.DatetimeIndex
-            A DatetimeIndex representing the date range.
+        pd.PeriodIndex
+            A PeriodIndex representing the date range.
 
         Raises
         ------
@@ -223,22 +223,21 @@ def _set_date_idx(
         else:
             points = periods
 
-        ret = pd.date_range(
+        ret = pd.period_range(
             date1,
             date2,
             periods=points,
             freq=freq,  # type: ignore
             name=name,
-            normalize=True,
         )
         if periods is not None and len(ret) != periods:
             raise ValueError(
                 "Number of periods and frequency given to date_range are inconsistent."
             )
 
-        if pd.infer_freq(ret) != freq:
+        if ret.freq != freq:
             LOGGER.debug(
-                f"Given interval frequency ( {pd.infer_freq(ret)} ) and infered interval frequency differ ( {freq} )."
+                f"Given interval frequency ( {ret.freq} ) and infered interval frequency differ ( {freq} )."
             )
 
         return ret
@@ -254,18 +253,18 @@ def snapshot_end(self) -> Snapshot:
         return self._snapshot1
 
     @property
-    def date_idx(self) -> pd.DatetimeIndex:
-        """The pandas DatetimeIndex representing the time dimension of the risk period."""
+    def date_idx(self) -> pd.PeriodIndex:
+        """The pandas PeriodIndex representing the time dimension of the risk period."""
         return self._date_idx
 
     @date_idx.setter
     def date_idx(self, value, /):
-        if not isinstance(value, pd.DatetimeIndex):
-            raise ValueError("Not a DatetimeIndex")
+        if not isinstance(value, pd.PeriodIndex):
+            raise ValueError("Not a PeriodIndex")
 
-        self._date_idx = value.normalize()  # Avoids weird hourly data
+        self._date_idx = value  # Avoids weird hourly data
         self._time_points = len(self.date_idx)
-        self._time_resolution = pd.infer_freq(self.date_idx)
+        self._time_resolution = self.date_idx.freq
         self._reset_impact_data()
 
     @property
diff --git a/climada/trajectories/test/test_risk_trajectory.py b/climada/trajectories/test/test_risk_trajectory.py
@@ -155,14 +155,14 @@ def test_risk_periods_lazy_computation(self, MockCalcRiskPeriod):
                 call(
                     self.mock_snapshot1,
                     self.mock_snapshot2,
-                    time_resolution="YS",
+                    time_resolution="Y",
                     interpolation_strategy=self.mock_interpolation_strategy,
                     impact_computation_strategy=self.mock_impact_computation_strategy,
                 ),
                 call(
                     self.mock_snapshot2,
                     self.mock_snapshot3,
-                    time_resolution="YS",
+                    time_resolution="Y",
                     interpolation_strategy=self.mock_interpolation_strategy,
                     impact_computation_strategy=self.mock_impact_computation_strategy,
                 ),
diff --git a/climada/trajectories/test/test_riskperiod.py b/climada/trajectories/test/test_riskperiod.py
@@ -115,7 +115,7 @@ def setUp(self):
         self.calc_risk_period = CalcRiskPeriod(
             self.mock_snapshot_start,
             self.mock_snapshot_end,
-            time_resolution="AS-JAN",
+            time_resolution="Y",
             interpolation_strategy=AllLinearStrategy(),
             impact_computation_strategy=ImpactCalcComputation(),
             # These will have to be tested when implemented
@@ -127,7 +127,7 @@ def setUp(self):
     def test_init(self):
         self.assertEqual(self.calc_risk_period.snapshot_start, self.mock_snapshot_start)
         self.assertEqual(self.calc_risk_period.snapshot_end, self.mock_snapshot_end)
-        self.assertEqual(self.calc_risk_period.time_resolution, "AS-JAN")
+        self.assertEqual(self.calc_risk_period.time_resolution, "Y")
         self.assertEqual(
             self.calc_risk_period.time_points, self.future_date - self.present_date + 1
         )
@@ -145,7 +145,7 @@ def test_init(self):
             self.calc_risk_period._group_id_E1,
             self.mock_snapshot_end.exposure.gdf["group_id"].values,
         )
-        self.assertIsInstance(self.calc_risk_period.date_idx, pd.DatetimeIndex)
+        self.assertIsInstance(self.calc_risk_period.date_idx, pd.PeriodIndex)
         self.assertEqual(
             len(self.calc_risk_period.date_idx),
             self.future_date - self.present_date + 1,
@@ -175,8 +175,8 @@ def test_set_time_points(self):
         self.assertEqual(len(self.calc_risk_period.date_idx), 10)
         pd.testing.assert_index_equal(
             self.calc_risk_period.date_idx,
-            pd.DatetimeIndex(
-                pd.DatetimeIndex(
+            pd.PeriodIndex(
+                pd.PeriodIndex(
                     [
                         "2020-01-01",
                         "2020-07-22",
@@ -203,8 +203,8 @@ def test_set_time_resolution(self):
         self.assertEqual(self.calc_risk_period.time_resolution, "MS")
         pd.testing.assert_index_equal(
             self.calc_risk_period.date_idx,
-            pd.DatetimeIndex(
-                pd.DatetimeIndex(
+            pd.PeriodIndex(
+                pd.PeriodIndex(
                     [
                         "2020-01-01",
                         "2020-02-01",
@@ -599,7 +599,7 @@ def setUp(self):
         self.calc_risk_period.per_date_aai_H0V1 = np.array([2, 3, 9])
         self.calc_risk_period.per_date_aai_H1V1 = np.array([4, 6, 24])
 
-        self.calc_risk_period.date_idx = pd.DatetimeIndex(
+        self.calc_risk_period.date_idx = pd.PeriodIndex(
             ["2020-01-01", "2025-01-01", "2030-01-01"], name="date"
         )
         self.calc_risk_period.snapshot_start.exposure.gdf = gpd.GeoDataFrame(
@@ -700,7 +700,7 @@ def test_calc_aai_per_group_metric(self):
         self.calc_risk_period._groups_id = np.array([1, 2])
         self.calc_risk_period.eai_gdf = pd.DataFrame(
             {
-                "date": pd.DatetimeIndex(
+                "date": pd.PeriodIndex(
                     ["2020-01-01"] * 3 + ["2025-01-01"] * 3 + ["2030-01-01"] * 3,
                     name="date",
                 ),