Merge pull request #478 from The-Strategy-Unit/add_delivery_episode_in_spell_aggregation

tomjemmett · web-flow · commit 4688f3ee08f7 · 2025-08-22T10:10:55.000+01:00
diff --git a/src/nhp/model/aae.py b/src/nhp/model/aae.py
@@ -158,28 +158,18 @@ def process_results(data: pd.DataFrame) -> pd.DataFrame:
         )
         return data
 
-    def aggregate(self, model_iteration: ModelIteration) -> tuple[pd.DataFrame, list[list[str]]]:
-        """Aggregate the model results.
+    def specific_aggregations(self, model_results: pd.DataFrame) -> dict[str, pd.Series]:
+        """Create other aggregations specific to the model type.
 
-        Can also be used to aggregate the baseline data by passing in a `ModelIteration` with
-        the `model_run` argument set `-1`.
-
-        :param model_iteration: an instance of the `ModelIteration` class
-        :type model_iteration: model.model_iteration.ModelIteration
-
-        :returns: a tuple containing the model results, and a list of lists which contain the
-            aggregations to perform
-        :rtype: tuple[pd.DataFrame, list[list[str]]]
+        :param model_results: the results of a model run
+        :type model_results: pd.DataFrame
+        :return: dictionary containing the specific aggregations
+        :rtype: dict[str, pd.Series]
         """
-        model_results = self.process_results(model_iteration.get_model_results())
-
-        return (
-            model_results,
-            [
-                ["acuity"],
-                ["attendance_category"],
-            ],
-        )
+        return {
+            "acuity": self.get_agg(model_results, "acuity"),
+            "attendance_category": self.get_agg(model_results, "attendance_category"),
+        }
 
     def calculate_avoided_activity(
         self, data: pd.DataFrame, data_resampled: pd.DataFrame
diff --git a/src/nhp/model/inpatients.py b/src/nhp/model/inpatients.py
@@ -207,6 +207,7 @@ def process_results(data: pd.DataFrame) -> pd.DataFrame:
                     "tretspef",
                     "tretspef_grouped",
                     "los_group",
+                    "maternity_delivery_in_spell",
                 ],
                 dropna=False,
             )[["admissions", "beddays", "procedures"]]
@@ -241,29 +242,22 @@ def process_results(data: pd.DataFrame) -> pd.DataFrame:
 
         return data
 
-    def aggregate(self, model_iteration: ModelIteration) -> tuple[pd.DataFrame, list[list[str]]]:
-        """Aggregate the model results.
+    def specific_aggregations(self, model_results: pd.DataFrame) -> dict[str, pd.Series]:
+        """Create other aggregations specific to the model type.
 
-        Can also be used to aggregate the baseline data by passing in a `ModelIteration` with
-        the `model_run` argument set `-1`.
-
-        :param model_iteration: an instance of the `ModelIteration` class
-        :type model_iteration: model.model_iteration.ModelIteration
-
-        :returns: a tuple containing the model results, and a list of lists which contain the
-            aggregations to perform
-        :rtype: tuple[pd.DataFrame, list[list[str]]]
+        :param model_results: the results of a model run
+        :type model_results: pd.DataFrame
+        :return: dictionary containing the specific aggregations
+        :rtype: dict[str, pd.Series]
         """
-        model_results = self.process_results(model_iteration.get_model_results())
-
-        return (
-            model_results,
-            [
-                ["sex", "tretspef_grouped"],
-                ["tretspef"],
-                ["tretspef", "los_group"],
-            ],
-        )
+        return {
+            "sex+tretspef_grouped": self.get_agg(model_results, "sex", "tretspef_grouped"),
+            "tretspef": self.get_agg(model_results, "tretspef"),
+            "tretspef+los_group": self.get_agg(model_results, "tretspef", "los_group"),
+            "delivery_episode_in_spell": self.get_agg(
+                model_results[model_results["maternity_delivery_in_spell"]]
+            ),
+        }
 
     def calculate_avoided_activity(
         self, data: pd.DataFrame, data_resampled: pd.DataFrame
diff --git a/src/nhp/model/model.py b/src/nhp/model/model.py
@@ -427,3 +427,46 @@ def apply_resampling(self, row_samples: np.ndarray, data: pd.DataFrame) -> pd.Da
         :rtype: pd.DataFrame
         """
         raise NotImplementedError()
+
+    def aggregate(self, model_iteration: ModelIteration) -> dict[str, pd.Series]:
+        """Aggregate the model results.
+
+        Can also be used to aggregate the baseline data by passing in a `ModelIteration` with
+        the `model_run` argument set `-1`.
+
+        :param model_iteration: an instance of the `ModelIteration` class
+        :type model_iteration: model.model_iteration.ModelIteration
+
+        :returns: a tuple containing the model results, and a list of lists which contain the
+            aggregations to perform
+        :rtype: tuple[pd.DataFrame, list[list[str]]]
+        """
+        model_results = self.process_results(model_iteration.get_model_results())
+
+        base_aggregations = {
+            "default": self.get_agg(model_results),
+            "sex+age_group": self.get_agg(model_results, "sex", "age_group"),
+            "age": self.get_agg(model_results, "age"),
+        }
+
+        return {**base_aggregations, **self.specific_aggregations(model_results)}
+
+    def process_results(self, data: pd.DataFrame) -> pd.DataFrame:
+        """Processes the data into a format suitable for aggregation in results files.
+
+        :param data: Data to be processed. Format should be similar to Model.data
+        :type data: pd.DataFrame
+        :return: Processed results
+        :rtype: pd.DataFrame
+        """
+        raise NotImplementedError()
+
+    def specific_aggregations(self, model_results: pd.DataFrame) -> dict[str, pd.Series]:
+        """Create other aggregations specific to the model type.
+
+        :param model_results: the results of a model run
+        :type model_results: pd.DataFrame
+        :return: dictionary containing the specific aggregations
+        :rtype: dict[str, pd.Series]
+        """
+        raise NotImplementedError()
diff --git a/src/nhp/model/model_iteration.py b/src/nhp/model/model_iteration.py
@@ -165,18 +165,15 @@ def get_aggregate_results(self) -> ModelRunResult:
         :returns: a tuple containing a dictionary of results, and the step counts
         :rtype: tuple[dict[str, pd.Series], pd.Series | None]:
         """
-        model_results, aggregations = self.model.aggregate(self)
-
-        aggs = {
-            "default" if not v else "+".join(v): self.model.get_agg(model_results, *v)
-            for v in [[], ["sex", "age_group"], ["age"], *aggregations]
-        }
+        aggregations = self.model.aggregate(self)
 
         if not self.avoided_activity.empty:
             avoided_activity_agg = self.model.process_results(self.avoided_activity)
-            aggs["avoided_activity"] = self.model.get_agg(avoided_activity_agg, "sex", "age_group")
+            aggregations["avoided_activity"] = self.model.get_agg(
+                avoided_activity_agg, "sex", "age_group"
+            )
 
-        return aggs, self.get_step_counts()
+        return aggregations, self.get_step_counts()
 
     def get_step_counts(self) -> pd.Series | None:
         """Get the step counts of a model run."""
diff --git a/src/nhp/model/outpatients.py b/src/nhp/model/outpatients.py
@@ -228,28 +228,18 @@ def process_results(data: pd.DataFrame) -> pd.DataFrame:
         )
         return data
 
-    def aggregate(self, model_iteration: ModelIteration) -> tuple[pd.DataFrame, list[list[str]]]:
-        """Aggregate the model results.
+    def specific_aggregations(self, model_results: pd.DataFrame) -> dict[str, pd.Series]:
+        """Create other aggregations specific to the model type.
 
-        Can also be used to aggregate the baseline data by passing in a `ModelIteration` with
-        the `model_run` argument set `-1`.
-
-        :param model_iteration: an instance of the `ModelIteration` class
-        :type model_iteration: model.model_iteration.ModelIteration
-
-        :returns: a tuple containing the model results, and a list of lists which contain the
-            aggregations to perform
-        :rtype: tuple[pd.DataFrame, list[list[str]]]
+        :param model_results: the results of a model run
+        :type model_results: pd.DataFrame
+        :return: dictionary containing the specific aggregations
+        :rtype: dict[str, pd.Series]
         """
-        model_results = self.process_results(model_iteration.get_model_results())
-
-        return (
-            model_results,
-            [
-                ["sex", "tretspef_grouped"],
-                ["tretspef"],
-            ],
-        )
+        return {
+            "sex+tretspef_grouped": self.get_agg(model_results, "sex", "tretspef_grouped"),
+            "tretspef": self.get_agg(model_results, "tretspef"),
+        }
 
     def save_results(self, model_iteration: ModelIteration, path_fn: Callable[[str], str]) -> None:
         """Save the results of running the model.
diff --git a/tests/integration/nhp/model/test_run_model.py b/tests/integration/nhp/model/test_run_model.py
@@ -13,11 +13,7 @@
     [
         (
             InpatientsModel,
-            {
-                "sex+tretspef_grouped",
-                "tretspef",
-                "tretspef+los_group",
-            },
+            {"sex+tretspef_grouped", "tretspef", "tretspef+los_group", "delivery_episode_in_spell"},
         ),
         (
             OutpatientsModel,
@@ -79,6 +75,7 @@ def test_all_model_runs(params_path, data_dir):
                 "attendance_category",
                 "avoided_activity",
                 "default",
+                "delivery_episode_in_spell",
                 "sex+age_group",
                 "sex+tretspef_grouped",
                 "tretspef",
diff --git a/tests/unit/nhp/model/test_aae.py b/tests/unit/nhp/model/test_aae.py
@@ -157,30 +157,25 @@ def test_efficiencies(mock_model):
     assert actual == ("data", None)
 
 
-def test_aggregate(mock_model):
+def test_specific_aggregations(mocker, mock_model):
     """Test that it aggregates the results correctly."""
-
     # arrange
-    def create_agg_stub(model_results, cols=None):
-        name = "+".join(cols) if cols else "default"
-        return {name: model_results.to_dict(orient="list")}
+    m = mocker.patch("nhp.model.AaEModel.get_agg", return_value="agg_data")
 
     mdl = mock_model
-    mdl._create_agg = Mock(wraps=create_agg_stub)
-    mdl.process_results = Mock(return_value="processed_data")
-
-    mr_mock = Mock()
-    mr_mock.get_model_results.return_value = "model_results"
 
     # act
-    actual_mr, actual_aggs = mdl.aggregate(mr_mock)
+    actual = mdl.specific_aggregations("results")  # type: ignore
 
     # assert
-    mdl.process_results.assert_called_once_with("model_results")
-    assert actual_mr == "processed_data"
-    assert actual_aggs == [
-        ["acuity"],
-        ["attendance_category"],
+    assert actual == {
+        "acuity": "agg_data",
+        "attendance_category": "agg_data",
+    }
+
+    assert m.call_args_list == [
+        call("results", "acuity"),
+        call("results", "attendance_category"),
     ]
 
 
diff --git a/tests/unit/nhp/model/test_inpatients.py b/tests/unit/nhp/model/test_inpatients.py
@@ -5,6 +5,7 @@
 import numpy as np
 import pandas as pd
 import pytest
+from pandas.testing import assert_frame_equal
 
 from nhp.model.inpatients import InpatientsModel
 
@@ -239,6 +240,7 @@ def test_process_results(mock_model):
             "rn": [1] * 12,
             "has_procedure": [0, 1] * 6,
             "speldur": list(range(12)),
+            "maternity_delivery_in_spell": [True, False] * 6,
         }
     )
     df["pod"] = "ip_" + df["group"] + "_admission"
@@ -431,6 +433,21 @@ def test_process_results(mock_model):
                 "8-14 days",
                 np.nan,
             ],
+            "maternity_delivery_in_spell": [
+                True,
+                True,
+                False,
+                False,
+                False,
+                True,
+                False,
+                False,
+                False,
+                True,
+                True,
+                False,
+            ]
+            * 2,
             "measure": [
                 "admissions",
                 "beddays",
@@ -491,32 +508,31 @@ def test_process_results(mock_model):
     pd.testing.assert_frame_equal(actual, expected)
 
 
-def test_aggregate(mock_model):
+def test_specific_aggregations(mocker, mock_model):
     """Test that it aggregates the results correctly."""
-
     # arrange
-    def create_agg_stub(model_results, cols=None):
-        name = "+".join(cols) if cols else "default"
-        return {name: model_results.to_dict(orient="list")}
+    m = mocker.patch("nhp.model.InpatientsModel.get_agg", return_value="agg_data")
 
     mdl = mock_model
-    mdl._create_agg = Mock(wraps=create_agg_stub)
-    mdl.process_results = Mock(return_value="processed_data")
 
-    mr_mock = Mock()
-    mr_mock.get_model_results.return_value = "nhp.model_data"
+    mock_data = pd.DataFrame({"maternity_delivery_in_spell": [True, False], "value": [1, 2]})
 
     # act
-    actual_mr, actual_aggs = mdl.aggregate(mr_mock)
+    actual = mdl.specific_aggregations(mock_data)
 
     # assert
+    assert actual == {
+        "sex+tretspef_grouped": "agg_data",
+        "tretspef": "agg_data",
+        "tretspef+los_group": "agg_data",
+        "delivery_episode_in_spell": "agg_data",
+    }
 
-    mdl.process_results.assert_called_once_with("nhp.model_data")
-    assert actual_mr == "processed_data"
-    assert actual_aggs == [
-        ["sex", "tretspef_grouped"],
-        ["tretspef"],
-        ["tretspef", "los_group"],
+    assert [(len(i[0][0]), *i[0][1:]) for i in m.call_args_list] == [
+        (2, "sex", "tretspef_grouped"),
+        (2, "tretspef"),
+        (2, "tretspef", "los_group"),
+        (1,),
     ]
 
 
diff --git a/tests/unit/nhp/model/test_model.py b/tests/unit/nhp/model/test_model.py
@@ -775,3 +775,49 @@ def test_apply_resampling(mock_model):
     # act & assert
     with pytest.raises(NotImplementedError):
         mock_model.apply_resampling(None, None)
+
+
+def test_aggregate(mock_model):
+    # arrange
+    mdl = mock_model
+    mdl.process_results = Mock(return_value="processed_results")
+    mdl.get_agg = Mock(return_value="agg")
+    mdl.specific_aggregations = Mock(return_value={"1": "agg", "2": "agg"})
+
+    mi_mock = Mock()
+    mi_mock.get_model_results.return_value = "results"
+
+    # act
+    actual = mdl.aggregate(mi_mock)
+
+    # assert
+    mi_mock.get_model_results.assert_called()
+    mdl.process_results.assert_called_once_with("results")
+    assert mdl.get_agg.call_args_list == [
+        call("processed_results"),
+        call("processed_results", "sex", "age_group"),
+        call("processed_results", "age"),
+    ]
+    mdl.specific_aggregations.assert_called_once_with("processed_results")
+
+    assert actual == {
+        "default": "agg",
+        "sex+age_group": "agg",
+        "age": "agg",
+        "1": "agg",
+        "2": "agg",
+    }
+
+
+def test_process_results(mock_model):
+    # arrange
+    # act & assert
+    with pytest.raises(NotImplementedError):
+        mock_model.process_results(None)
+
+
+def test_specific_aggregations(mock_model):
+    # arrange
+    # act & assert
+    with pytest.raises(NotImplementedError):
+        mock_model.specific_aggregations(None)
diff --git a/tests/unit/nhp/model/test_model_iteration.py b/tests/unit/nhp/model/test_model_iteration.py
diff --git a/tests/unit/nhp/model/test_outpatients.py b/tests/unit/nhp/model/test_outpatients.py