Merge pull request #57 from The-Strategy-Unit/add_results_tracking

morganle-48 · web-flow · commit 908162560e39 · 2025-10-01T13:41:51.000+01:00
editing trial results to align with target trial output. Closes #50.
diff --git a/renal_capacity_model/config.py b/renal_capacity_model/config.py
@@ -12,9 +12,10 @@ class Config:
     def __init__(self, config_dict={}):
         self.trace = config_dict.get("trace", False)
         self.number_of_runs = config_dict.get("number_of_runs", 10)
-        self.sim_duration = config_dict.get("sim_duration", 1000)
+        self.sim_duration = config_dict.get("sim_duration", int(2*365))     # in days, but should be a multiple of 365 i.e. years
         self.random_seed = config_dict.get("random_seed", 0)
         self.arrival_rate = config_dict.get("arrival_rate", 1)
+        self.snapshot_interval = config_dict.get("snapshot_interval", int(365))  # how often to take a snapshot of the results_df
 
         # distributions for calculating interarrival times
         self.age_dist = config_dict.get(
diff --git a/renal_capacity_model/model.py b/renal_capacity_model/model.py
@@ -31,8 +31,11 @@ def __init__(self, run_number, rng, config):
         self.inter_arrival_times = get_interarrival_times(self.config)
         self.patients_in_system = {k: 0 for k in self.inter_arrival_times.keys()}
         self.results_df = self._setup_results_df()
+        self.snapshot_results_df = self._setup_snapshot_df()
+        self.snapshot_interval = self.config.snapshot_interval  # how often to take a snapshot of the results_df
 
     def _setup_results_df(self):
+
         """Sets up DataFrame for recording model results
 
         Returns:
@@ -45,21 +48,63 @@ def _setup_results_df(self):
                 "entry_time",
                 "diverted_to_con_care",
                 "suitable_for_transplant",
-                "live_transplant_count",  ## should this instead count number of live transplants per patient?
-                "cadaver_transplant_count",  ## should this instead count number of cadaver transplants per patient?
+                "live_transplant_count",  
+                "cadaver_transplant_count", 
                 "pre_emptive_transplant",
                 "transplant_count",
-                "ichd_dialysis_count",  ## this is what we'll use to track the number in ichd over time
+                "ichd_dialysis_count",  
                 "hhd_dialysis_count",
                 "pd_dialysis_count",
                 "time_of_death",
+                "death_from_con_care",
+                "death_from_ichd",
+                "death_from_hhd",  
+                "death_from_pd",
+                "death_post_live_transplant",
+                "death_post_cadaver_transplant",
             ]
         )
         results_df["patient ID"] = [1]
         results_df.set_index("patient ID", inplace=True)
 
         return results_df
 
+    def _setup_snapshot_df(self):
+
+        """Sets up DataFrame for recording snapshot model results
+
+        Returns:
+            pd.DataFrame: Empty DataFrame for recording model results
+        """
+        snapshot_results_df = pd.DataFrame(
+            columns=[
+                "snapshot_time",
+                "age_group",
+                "referral_type",
+                "entry_time",
+                "diverted_to_con_care",
+                "suitable_for_transplant",
+                "live_transplant_count",  
+                "cadaver_transplant_count", 
+                "pre_emptive_transplant",
+                "transplant_count",
+                "ichd_dialysis_count",  
+                "hhd_dialysis_count",
+                "pd_dialysis_count",
+                "time_of_death",
+                "death_from_con_care",
+                "death_from_ichd",
+                "death_from_hhd",  
+                "death_from_pd",
+                "death_post_live_transplant",
+                "death_post_cadaver_transplant",
+            ]
+        )
+        snapshot_results_df["patient ID"] = [1]
+        snapshot_results_df.set_index("patient ID", inplace=True)
+
+        return snapshot_results_df
+
     def generator_patient_arrivals(self, patient_type):
         """Generator function for arriving patients
 
@@ -73,11 +118,21 @@ def generator_patient_arrivals(self, patient_type):
             self.patient_counter += 1
 
             p = Patient(self.patient_counter, patient_type)
-            start_time_in_system_patient = self.rng.exponential(
-                1 / self.inter_arrival_times[patient_type]
-            )  # self.env.now
-            p.last_dialysis_modality = "none"
-            p.transplant_count = 0
+
+
+            if self.patient_counter <= 12:
+                start_time_in_system_patient = self.rng.exponential(
+                    1 / self.inter_arrival_times[patient_type]
+                )  
+                yield self.env.timeout(start_time_in_system_patient)
+            else:
+                start_time_in_system_patient = self.env.now
+            
+            self.patients_in_system[patient_type] += 1
+            
+            p.last_dialysis_modality = "none" 
+            p.transplant_count = 0 
+
             self.results_df.loc[p.id, "entry_time"] = start_time_in_system_patient
             self.results_df.loc[p.id, "age_group"] = int(p.age_group)
             self.results_df.loc[p.id, "referral_type"] = p.referral_type
@@ -90,19 +145,19 @@ def generator_patient_arrivals(self, patient_type):
 
             if self.rng.uniform(0, 1) > self.config.con_care_dist[p.age_group]:
                 # If the patient is not diverted to conservative care they start KRT
-                self.patients_in_system[patient_type] += 1
                 self.env.process(self.start_krt(p))
             else:
                 # these patients are diverted to conservative care. We don't need a process here as all these patients do is wait a while before leaving the system
                 self.results_df.loc[p.id, "diverted_to_con_care"] = True
-                yield self.env.timeout(start_time_in_system_patient)
                 sampled_con_care_time = (
                     self.config.ttd_con_care_scale
                     * self.rng.weibull(a=self.config.ttd_con_care_shape, size=1)
                 )
                 yield self.env.timeout(sampled_con_care_time)
                 self.results_df.loc[p.id, "time_of_death"] = self.env.now
                 self.patients_in_system[patient_type] -= 1
+                self.results_df.loc[p.id, "diverted_to_con_care"] = False # as they've left conservative care
+                self.results_df.loc[p.id, "death_from_con_care"] = True
                 if self.config.trace:
                     print(
                         f"Patient {p.id} of age group {p.age_group} diverted to conservative care and left the system after {sampled_con_care_time} time units."
@@ -302,6 +357,7 @@ def start_transplant(self, patient):
                 self.results_df.loc[patient.id, "live_transplant_count"] -= 1
                 self.patients_in_system[patient.patient_type] -= 1
                 self.results_df.loc[patient.id, "time_of_death"] = self.env.now
+                self.results_df.loc[patient.id, "death_post_live_transplant"] = True
                 if self.config.trace:
                     print(
                         f"Patient {patient.id} of age group {patient.age_group} died after live transplant at time {self.env.now}."
@@ -337,6 +393,7 @@ def start_transplant(self, patient):
                 self.results_df.loc[patient.id, "cadaver_transplant_count"] -= 1
                 self.patients_in_system[patient.patient_type] -= 1
                 self.results_df.loc[patient.id, "time_of_death"] = self.env.now
+                self.results_df.loc[patient.id, "death_post_cadaver_transplant"] = True
                 if self.config.trace:
                     print(
                         f"Patient {patient.id} of age group {patient.age_group} died after cadaver transplant at time {self.env.now}."
@@ -455,6 +512,7 @@ def start_ichd(self, patient):
                     self.patients_in_system[patient.patient_type] -= 1
                     self.results_df.loc[patient.id, "ichd_dialysis_count"] -= 1
                     self.results_df.loc[patient.id, "time_of_death"] = self.env.now
+                    self.results_df.loc[patient.id, "death_from_ichd"] = True
                     if self.config.trace:
                         print(
                             f"Patient {patient.id} of age group {patient.age_group} died and left the system at time {self.env.now}."
@@ -466,6 +524,7 @@ def start_ichd(self, patient):
                 patient.time_on_ichd_dialysis = sampled_ichd_time
                 self.patients_in_system[patient.patient_type] -= 1
                 self.results_df.loc[patient.id, "ichd_dialysis_count"] -= 1
+                self.results_df.loc[patient.id, "death_from_ichd"] = True
                 self.results_df.loc[patient.id, "time_of_death"] = self.env.now
                 if self.config.trace:
                     print(
@@ -546,26 +605,31 @@ def start_pd(self, patient):
         yield self.env.timeout(5)
         patient.last_dialysis_modality = "pd"
 
-    def calculate_run_results(self):
-        # TODO: what do we want to count?
-        pass
+    def snapshot_results(self):
+        while True:
+            self.snapshot_results_df = pd.concat([self.snapshot_results_df, self.results_df.assign(snapshot_time=self.env.now)])
+            if self.config.trace:
+                print(f"Taking results snapshot of the results_df at time {self.env.now}")
+            yield self.env.timeout(self.snapshot_interval) 
 
     def run(self):
         """Runs the model"""
         # We set up a generator for each of the patient types we have an IAT for
         for patient_type in self.inter_arrival_times.keys():
             self.env.process(self.generator_patient_arrivals(patient_type))
+        
+        self.env.process(self.snapshot_results())
 
         self.env.run(until=self.config.sim_duration)
 
-        self.calculate_run_results()
+        #self.calculate_run_results()
 
         # Show results (optional - set in config)
         if self.config.trace:
             print(f"Run Number {self.run_number}")
             print(self.patients_in_system)
             print(self.results_df)
-            # print(test_arrival_processes(self.results_df,self.config))
+            print(self.snapshot_results_df)
 
 
 if __name__ == "__main__":
diff --git a/renal_capacity_model/trial.py b/renal_capacity_model/trial.py
@@ -21,90 +21,84 @@ def __init__(self, config):
 
     def print_trial_results(self):
         print("Trial Results")
-        print(self.df_trial_results.mean())
+        output_means = self.df_trial_results.mean().to_frame()
+        output_means['Time']=output_means.index.str.split('_').str[-1]
+        output_means.index = output_means.index.str.rsplit('_', n=1).str[0]
+        reshaped_trial_results = output_means.pivot(columns='Time', values=0)
+        print(reshaped_trial_results)
+        print(reshaped_trial_results.diff(axis=1))   ### could use for plotting mortality over time instead of cumulative mortality
 
     def setup_trial_results(self):
         df_trial_results = pd.DataFrame()
         df_trial_results["Run Number"] = [0]
         df_trial_results.set_index("Run Number", inplace=True)
         return df_trial_results
+    
+    def process_model_results(self,model,run):
+ 
+        self.df_trial_results.loc[run, "total_entries"] = model.results_df["entry_time"].count()   
+        self.df_trial_results.loc[run, "prevalence_con_care"] = model.results_df["diverted_to_con_care"].sum()
+        self.df_trial_results.loc[run, "prevalence_ichd"] = model.results_df["ichd_dialysis_count"].sum()
+        self.df_trial_results.loc[run, "prevalence_hhd"] = model.results_df["hhd_dialysis_count"].sum()
+        self.df_trial_results.loc[run, "prevalence_pd"] = model.results_df["pd_dialysis_count"].sum()
+        self.df_trial_results.loc[run, "prevalence_live_Tx"] = model.results_df["live_transplant_count"].sum()
+        self.df_trial_results.loc[run, "prevalence_cadaver_Tx"] = model.results_df["cadaver_transplant_count"].sum()
+
+        self.df_trial_results.loc[run, "total_deaths"] = model.results_df["time_of_death"].count()   
+        self.df_trial_results.loc[run, "mortality_con_care"] = model.results_df["death_from_con_care"].sum()
+        self.df_trial_results.loc[run, "mortality_ichd"] = model.results_df["death_from_ichd"].sum()
+        self.df_trial_results.loc[run, "mortality_hhd"] = model.results_df["death_from_hhd"].sum()
+        self.df_trial_results.loc[run, "mortality_pd"] = model.results_df["death_from_pd"].sum()
+        self.df_trial_results.loc[run, "mortality_live_Tx"] = model.results_df["death_post_live_transplant"].sum()
+        self.df_trial_results.loc[run, "mortality_cadaver_Tx"] = model.results_df["death_post_cadaver_transplant"].sum()
+
+    def process_snapshot_results(self,model,run):
+        ## this groups the results by the time the snapshot was taken, so we can see how prevalence and mortality change over time
+        results_grouped_by_time = (
+            model.snapshot_results_df.groupby("snapshot_time")
+            .agg(
+                {
+                    "entry_time": "count",
+                    "diverted_to_con_care": "sum",
+                    "ichd_dialysis_count": "sum",
+                    "hhd_dialysis_count": "sum",
+                    "pd_dialysis_count": "sum",
+                    "live_transplant_count": "sum",
+                    "cadaver_transplant_count": "sum",
+                    "time_of_death": "count",
+                    "death_from_con_care": "sum",
+                    "death_from_ichd": "sum",
+                    "death_from_hhd": "sum",
+                    "death_from_pd": "sum",
+                    "death_post_live_transplant": "count",
+                    "death_post_cadaver_transplant": "count",
+                }
+            )
+            .rename(columns={"entry_time": "total_entries","time_of_death": "total_deaths"})
+        )
+
+        for snapshot_time in results_grouped_by_time.index:
+            self.df_trial_results.loc[run, f"total_entries_{snapshot_time}"] = results_grouped_by_time.loc[snapshot_time, "total_entries"]
+            self.df_trial_results.loc[run, f"prevalence_con_care_{snapshot_time}"] = results_grouped_by_time.loc[snapshot_time, "diverted_to_con_care"]
+            self.df_trial_results.loc[run, f"prevalence_ichd_{snapshot_time}"] = results_grouped_by_time.loc[snapshot_time, "ichd_dialysis_count"]
+            self.df_trial_results.loc[run, f"prevalence_hhd_{snapshot_time}"] = results_grouped_by_time.loc[snapshot_time, "hhd_dialysis_count"]
+            self.df_trial_results.loc[run, f"prevalence_pd_{snapshot_time}"] = results_grouped_by_time.loc[snapshot_time, "pd_dialysis_count"]
+            self.df_trial_results.loc[run, f"prevalence_live_Tx_{snapshot_time}"] = results_grouped_by_time.loc[snapshot_time, "live_transplant_count"]
+            self.df_trial_results.loc[run, f"prevalence_cadaver_Tx_{snapshot_time}"] = results_grouped_by_time.loc[snapshot_time, "cadaver_transplant_count"]
+            self.df_trial_results.loc[run, f"total_deaths_{snapshot_time}"] = results_grouped_by_time.loc[snapshot_time, "total_deaths"]
+            self.df_trial_results.loc[run, f"mortality_con_care_{snapshot_time}"] = results_grouped_by_time.loc[snapshot_time, "death_from_con_care"]
+            self.df_trial_results.loc[run, f"mortality_ichd_{snapshot_time}"] = results_grouped_by_time.loc[snapshot_time, "death_from_ichd"]
+            self.df_trial_results.loc[run, f"mortality_hhd_{snapshot_time}"] = results_grouped_by_time.loc[snapshot_time, "death_from_hhd"]
+            self.df_trial_results.loc[run, f"mortality_pd_{snapshot_time}"] = results_grouped_by_time.loc[snapshot_time, "death_from_pd"]
+            self.df_trial_results.loc[run, f"mortality_live_Tx_{snapshot_time}"] = results_grouped_by_time.loc[snapshot_time, "death_post_live_transplant"]
+            self.df_trial_results.loc[run, f"mortality_cadaver_Tx_{snapshot_time}"] = results_grouped_by_time.loc[snapshot_time, "death_post_cadaver_transplant"]
 
     def run_trial(self):
         for run in range(self.config.number_of_runs):
             model = Model(run, self.rng, self.config)
             model.run()
-            # Process results. Consider moving to separate function if it gets too complex
-            results_grouped_by_age = (
-                model.results_df.groupby("age_group")
-                .agg(
-                    {
-                        "diverted_to_con_care": "sum",
-                        "entry_time": "count",
-                        "suitable_for_transplant": "sum",
-                        "live_transplant_count": "sum",
-                        "cadaver_transplant_count": "sum",
-                        "pre_emptive_transplant": "sum",
-                    }
-                )
-                .rename(columns={"entry_time": "total_entries"})
-            )
-            self.df_trial_results.loc[run, "total_entries"] = results_grouped_by_age[
-                "total_entries"
-            ].sum()
-            self.df_trial_results.loc[run, "diverted_to_con_care"] = (
-                results_grouped_by_age["diverted_to_con_care"].sum()
-            )
-            for age_group in results_grouped_by_age.index:
-                self.df_trial_results.loc[
-                    run, f"diverted_to_con_care_{int(age_group)}"
-                ] = (
-                    results_grouped_by_age.loc[age_group, "diverted_to_con_care"]
-                    / results_grouped_by_age.loc[age_group, "total_entries"]
-                )
-
-            self.df_trial_results.loc[run, "suitable_for_transplant"] = (
-                results_grouped_by_age["suitable_for_transplant"].sum()
-            )
-            self.df_trial_results.loc[run, "proportion_suitable_for_transplant"] = (
-                results_grouped_by_age["suitable_for_transplant"].sum()
-                / results_grouped_by_age["total_entries"].sum()
-            )
-            for age_group in results_grouped_by_age.index:
-                self.df_trial_results.loc[
-                    run, f"suitable_for_transplant_{int(age_group)}"
-                ] = (
-                    results_grouped_by_age.loc[age_group, "suitable_for_transplant"]
-                    / results_grouped_by_age.loc[age_group, "total_entries"]
-                )
-            self.df_trial_results.loc[run, "pre_emptive_transplant"] = (
-                results_grouped_by_age["pre_emptive_transplant"].sum()
-            )
-            self.df_trial_results.loc[run, "proportion_pre_emptive_transplant"] = (
-                results_grouped_by_age["pre_emptive_transplant"].sum()
-                / results_grouped_by_age["total_entries"].sum()
-            )
-            for age_group in results_grouped_by_age.index:
-                self.df_trial_results.loc[
-                    run, f"pre_emptive_transplant_{int(age_group)}"
-                ] = (
-                    results_grouped_by_age.loc[age_group, "pre_emptive_transplant"]
-                    / results_grouped_by_age.loc[age_group, "total_entries"]
-                )
-            self.df_trial_results.loc[
-                run, "proportion_live_transplant"
-            ] = results_grouped_by_age["live_transplant_count"].sum() / (
-                results_grouped_by_age["live_transplant_count"].sum()
-                + results_grouped_by_age["cadaver_transplant_count"].sum()
-            )
-            for age_group in results_grouped_by_age.index:
-                self.df_trial_results.loc[run, f"live_transplants_{int(age_group)}"] = (
-                    results_grouped_by_age.loc[age_group, "live_transplant_count"]
-                )
-            for age_group in results_grouped_by_age.index:
-                self.df_trial_results.loc[
-                    run, f"cadaver_transplants_{int(age_group)}"
-                ] = results_grouped_by_age.loc[age_group, "cadaver_transplant_count"]
-            for k, v in model.patients_in_system.items():
-                self.df_trial_results.loc[run, k] = v
+            
+            model.snapshot_results_df = pd.concat([model.snapshot_results_df, model.results_df.assign(snapshot_time=model.config.sim_duration)])
+            self.process_snapshot_results(model,run)
 
         self.print_trial_results()