Merge branch 'forecast-class' into implement_unique_selection_reduction

luseverin · luseverin · commit 001f34038c89 · 2025-12-10T16:35:31.000+01:00
diff --git a/climada/engine/impact.py b/climada/engine/impact.py
@@ -1431,6 +1431,8 @@ def write_attribute(group, name, value):
 
         def write_dataset(group, name, value):
             """Write a dataset"""
+            if name == "lead_time":
+                value = value.astype("timedelta64[ns]").astype("int64")
             group.create_dataset(name, data=value, dtype=_str_type_helper(value))
 
         def write_dict(group, name, value):
@@ -1618,7 +1620,9 @@ def read_excel(self, *args, **kwargs):
         self.__dict__ = Impact.from_excel(*args, **kwargs).__dict__
 
     @classmethod
-    def from_hdf5(cls, file_path: Union[str, Path]):
+    def from_hdf5(
+        cls, file_path: Union[str, Path], *, add_scalar_attrs=None, add_array_attrs=None
+    ):
         """Create an impact object from an H5 file.
 
         This assumes a specific layout of the file. If values are not found in the
@@ -1663,6 +1667,10 @@ def from_hdf5(cls, file_path: Union[str, Path]):
         ----------
         file_path : str or Path
             The file path of the file to read.
+        add_scalar_attrs : Iterable of str, optional
+            Scalar attributes to read from file. Defaults to None.
+        add_array_attrs : Iterable of str, optional
+            Array attributes to read from file. Defaults to None.
 
         Returns
         -------
@@ -1691,17 +1699,27 @@ def from_hdf5(cls, file_path: Union[str, Path]):
             # Scalar attributes
             scalar_attrs = set(
                 ("crs", "tot_value", "unit", "aai_agg", "frequency_unit", "haz_type")
-            ).intersection(file.attrs.keys())
+            )
+            if add_scalar_attrs is not None:
+                scalar_attrs = scalar_attrs.union(add_scalar_attrs)
+            scalar_attrs = scalar_attrs.intersection(file.attrs.keys())
             kwargs.update({attr: file.attrs[attr] for attr in scalar_attrs})
 
             # Array attributes
             # NOTE: Need [:] to copy array data. Otherwise, it would be a view that is
             #       invalidated once we close the file.
             array_attrs = set(
                 ("event_id", "date", "coord_exp", "eai_exp", "at_event", "frequency")
-            ).intersection(file.keys())
+            )
+            if add_array_attrs is not None:
+                array_attrs = array_attrs.union(add_array_attrs)
+            array_attrs = array_attrs.intersection(file.keys())
             kwargs.update({attr: file[attr][:] for attr in array_attrs})
-
+            # correct lead_time attribut to timedelta
+            if "lead_time" in kwargs:
+                kwargs["lead_time"] = np.array(file["lead_time"][:]).astype(
+                    "timedelta64[ns]"
+                )
             # Special handling for 'event_name' because it should be a list of strings
             if "event_name" in file:
                 # pylint: disable=no-member
diff --git a/climada/engine/impact_forecast.py b/climada/engine/impact_forecast.py
@@ -20,6 +20,8 @@
 """
 
 import logging
+from pathlib import Path
+from typing import Union
 
 import numpy as np
 import scipy.sparse as sparse
@@ -173,6 +175,62 @@ def calc_freq_curve(self, return_per=None):
         LOGGER.error("calc_freq_curve is not defined for ImpactForecast")
         raise NotImplementedError("calc_freq_curve is not defined for ImpactForecast")
 
+    @classmethod
+    def from_hdf5(cls, file_path: Union[str, Path]):
+        """Create an ImpactForecast object from an H5 file.
+
+        This assumes a specific layout of the file. If values are not found in the
+        expected places, they will be set to the default values for an ``Impact`` object.
+
+        The following H5 file structure is assumed (H5 groups are terminated with ``/``,
+        attributes are denoted by ``.attrs/``)::
+
+            file.h5
+            ├─ at_event
+            ├─ coord_exp
+            ├─ eai_exp
+            ├─ event_id
+            ├─ event_name
+            ├─ frequency
+            ├─ imp_mat
+            ├─ lead_time
+            ├─ member
+            ├─ .attrs/
+            │  ├─ aai_agg
+            │  ├─ crs
+            │  ├─ frequency_unit
+            │  ├─ haz_type
+            │  ├─ tot_value
+            │  ├─ unit
+
+        As per the :py:func:`climada.engine.impact.Impact.__init__`, any of these entries
+        is optional. If it is not found, the default value will be used when constructing
+        the Impact.
+
+        The impact matrix ``imp_mat`` can either be an H5 dataset, in which case it is
+        interpreted as dense representation of the matrix, or an H5 group, in which case
+        the group is expected to contain the following data for instantiating a
+        `scipy.sparse.csr_matrix <https://docs.scipy.org/doc/scipy/reference/generated/scipy.sparse.csr_matrix.html>`_::
+
+            imp_mat/
+            ├─ data
+            ├─ indices
+            ├─ indptr
+            ├─ .attrs/
+            │  ├─ shape
+
+        Parameters
+        ----------
+        file_path : str or Path
+            The file path of the file to read.
+
+        Returns
+        -------
+        imp : ImpactForecast
+            ImpactForecast with data from the given file
+        """
+        return super().from_hdf5(file_path, add_array_attrs={"member", "lead_time"})
+
     def _check_sizes(self):
         """Check sizes of forecast data vs. impact data.
 
@@ -354,3 +412,56 @@ def select(
             coord_exp=coord_exp,
             reset_frequency=reset_frequency,
         )
+
+    def _quantile(self, q: float, event_name: str | None = None):
+        """
+        Reduce the impact matrix and at_event of an ImpactForecast to the quantile value.
+        """
+        red_imp_mat = sparse.csr_matrix(np.quantile(self.imp_mat.toarray(), q, axis=0))
+        red_at_event = np.array([red_imp_mat.sum()])
+        if event_name is None:
+            event_name = f"quantile_{q}"
+        return ImpactForecast(
+            frequency_unit=self.frequency_unit,
+            coord_exp=self.coord_exp,
+            crs=self.crs,
+            eai_exp=self.eai_exp,
+            at_event=red_at_event,
+            tot_value=self.tot_value,
+            aai_agg=self.aai_agg,
+            unit=self.unit,
+            imp_mat=red_imp_mat,
+            haz_type=self.haz_type,
+            **self._reduce_attrs(event_name),
+        )
+
+    def quantile(self, q: float):
+        """
+        Reduce the impact matrix and at_event of an ImpactForecast to the quantile value.
+
+        Parameters
+        ----------
+        q : float
+            The quantile to compute, which must be between 0 and 1.
+
+        Returns
+        -------
+        ImpactForecast
+            An ImpactForecast object with the quantile impact matrix and at_event.
+        """
+        return self._quantile(q=q)
+
+    def median(self):
+        """
+        Reduce the impact matrix and at_event of an ImpactForecast to the median value.
+
+        Parameters
+        ----------
+        None
+
+        Returns
+        -------
+        ImpactForecast
+            An ImpactForecast object with the median impact matrix and at_event.
+        """
+        return self._quantile(q=0.5, event_name="median")
diff --git a/climada/engine/test/test_impact_forecast.py b/climada/engine/test/test_impact_forecast.py
@@ -233,6 +233,36 @@ def test_impact_forecast_blocked_methods(impact_forecast):
         impact_forecast.calc_freq_curve(np.array([10, 50, 100]))
 
 
+@pytest.mark.parametrize("dense", [True, False])
+def test_write_read_hdf5(impact_forecast, tmp_path, dense):
+
+    file_name = tmp_path / "test_hazard_forecast.h5"
+    # replace dummy_impact event_names with strings
+    impact_forecast.event_name = [str(name) for name in impact_forecast.event_name]
+    impact_forecast.write_hdf5(file_name, dense_imp_mat=dense)
+
+    def compare_attr(obj, attr):
+        actual = getattr(obj, attr)
+        expected = getattr(impact_forecast, attr)
+        if isinstance(actual, csr_matrix):
+            npt.assert_array_equal(actual.todense(), expected.todense())
+        else:
+            npt.assert_array_equal(actual, expected)
+
+    # Read ImpactForecast
+    impact_forecast_read = ImpactForecast.from_hdf5(file_name)
+    assert impact_forecast_read.lead_time.dtype.kind == np.dtype("timedelta64").kind
+    for attr in impact_forecast.__dict__.keys():
+        compare_attr(impact_forecast_read, attr)
+
+    # Read Impact
+    impact_read = Impact.from_hdf5(file_name)
+    for attr in impact_read.__dict__.keys():
+        compare_attr(impact_read, attr)
+    assert "member" not in impact_read.__dict__
+    assert "lead_time" not in impact_read.__dict__
+
+
 @pytest.fixture
 def impact_forecast_stats(impact_kwargs, lead_time, member):
     max_index = 4
@@ -268,3 +298,49 @@ def test_impact_forecast_min_mean_max(impact_forecast_stats, attr):
     npt.assert_array_equal(imp_fc_reduced.event_id, [0])
     npt.assert_array_equal(imp_fc_reduced.frequency, [1])
     npt.assert_array_equal(imp_fc_reduced.date, [0])
+
+
+@pytest.mark.parametrize("quantile", [0.3, 0.6, 0.8])
+def test_impact_forecast_quantile(impact_forecast, quantile):
+    """Check quantile method for ImpactForecast"""
+    imp_fcst_quantile = impact_forecast.quantile(q=quantile)
+
+    # assert imp_mat
+    npt.assert_array_equal(
+        imp_fcst_quantile.imp_mat.toarray().squeeze(),
+        np.quantile(impact_forecast.imp_mat.toarray(), quantile, axis=0),
+    )
+    # assert at_event
+    npt.assert_array_equal(
+        imp_fcst_quantile.at_event,
+        np.quantile(impact_forecast.at_event, quantile, axis=0).sum(),
+    )
+
+    # check that attributes where reduced correctly
+    npt.assert_array_equal(imp_fcst_quantile.member, np.array([-1]))
+    npt.assert_array_equal(
+        imp_fcst_quantile.lead_time, np.array([np.timedelta64("NaT")])
+    )
+    npt.assert_array_equal(imp_fcst_quantile.event_id, np.array([0]))
+    npt.assert_array_equal(
+        imp_fcst_quantile.event_name, np.array([f"quantile_{quantile}"])
+    )
+    npt.assert_array_equal(imp_fcst_quantile.frequency, np.array([1]))
+    npt.assert_array_equal(imp_fcst_quantile.date, np.array([0]))
+
+
+def test_median(impact_forecast):
+    imp_fcst_median = impact_forecast.median()
+    imp_fcst_quantile = impact_forecast.quantile(q=0.5)
+    npt.assert_array_equal(
+        imp_fcst_median.imp_mat.toarray(), imp_fcst_quantile.imp_mat.toarray()
+    )
+    npt.assert_array_equal(imp_fcst_median.imp_mat.toarray(), [[2.5, 2.5]])
+
+    # check that attributes where reduced correctly
+    npt.assert_array_equal(imp_fcst_median.member, np.array([-1]))
+    npt.assert_array_equal(imp_fcst_median.lead_time, np.array([np.timedelta64("NaT")]))
+    npt.assert_array_equal(imp_fcst_median.event_id, np.array([0]))
+    npt.assert_array_equal(imp_fcst_median.event_name, np.array(["median"]))
+    npt.assert_array_equal(imp_fcst_median.frequency, np.array([1]))
+    npt.assert_array_equal(imp_fcst_median.date, np.array([0]))
diff --git a/climada/hazard/forecast.py b/climada/hazard/forecast.py
@@ -299,3 +299,60 @@ def select(
             extent=extent,
             reset_frequency=reset_frequency,
         )
+
+    def _quantile(self, q: float, event_name: str | None = None):
+        """
+        Reduce the impact matrix and at_event of a HazardForecast to the quantile value.
+        """
+        red_intensity = sparse.csr_matrix(
+            np.quantile(self.intensity.toarray(), q, axis=0)
+        )
+        red_fraction = sparse.csr_matrix(
+            np.quantile(self.fraction.toarray(), q, axis=0)
+        )
+        if event_name is None:
+            event_name = f"quantile_{q}"
+        return HazardForecast(
+            haz_type=self.haz_type,
+            pool=self.pool,
+            units=self.units,
+            centroids=self.centroids,
+            frequency_unit=self.frequency_unit,
+            intensity=red_intensity,
+            fraction=red_fraction,
+            **self._reduce_attrs(event_name),
+        )
+
+    def quantile(self, q: float):
+        """
+        Reduce the impact matrix and at_event of a HazardForecast to the quantile value.
+
+        The quantile value is computed by taking the quantile of the impact matrix
+        along the event dimension axis (axis=0) and then taking the quantile of the
+        resulting array.
+
+        Parameters
+        ----------
+        q : float
+            The quantile to compute, between 0 and 1.
+
+        Returns
+        -------
+        HazardForecast
+            A HazardForecast object with the quantile intensity and fraction.
+        """
+        return self._quantile(q=q)
+
+    def median(self):
+        """
+        Reduce the impact matrix and at_event of a HazardForecast to the median value.
+
+        The median value is computed by taking the median of the impact matrix along the
+        event dimension axis (axis=0) and then taking the median of the resulting array.
+
+        Returns
+        -------
+        HazardForecast
+            A HazardForecast object with the median intensity and fraction.
+        """
+        return self._quantile(q=0.5, event_name="median")
diff --git a/climada/hazard/test/test_forecast.py b/climada/hazard/test/test_forecast.py