@@ -57,21 +57,22 @@ def return_sum_95_CI_across_runs(df: pd.DataFrame) -> pd.DataFrame:
5757def extract_birth_data_frames_and_outcomes (
5858 folder ,
5959 years_of_interest ,
60- intervention_years ,
60+ intervention_datayears ,
6161 interv
6262) -> Dict [str , pd .DataFrame ]:
6363 """
6464 Extracts and summarizes birth data.
6565
6666 :param folder: Path to the folder containing outcome data.
6767 :param years_of_interest: List of years to extract data for.
68- :param intervention_years: List of years during which the intervention was implemented (if any).
68+ :param intervention_datayears: List of years for which we need data to plot means over the interventions years, ie
69+ from the year before interventions are implemented until the last year of interventions.
6970 :param interv: Name or identifier of the intervention.
7071 :return: Dictionary with DataFrames:
7172 (1) 'births_df': Birth counts for years of interest (by draw and run),
7273 (2) 'births_mean_ci_df': Mean and 95% CI for total births per year and draw,
7374 (3) 'interv_births_df': Birth counts for intervention years,
74- (4) 'interv_births_mean_ci_df': Mean and 95% CI for births per year and draw for intervention years .
75+ (4) 'interv_births_mean_ci_df': Mean and 95% CI for births per year and draw for intervention_datayears .
7576 """
7677
7778 print (f" -{ interv = } " )
@@ -89,14 +90,17 @@ def extract_birth_data_frames_and_outcomes(
8990
9091 births_mean_ci_per_year_per_draw_df = return_mean_95_CI_across_runs (births_df )
9192
92- interv_births_df = births_df .loc [intervention_years ]
93+ interv_births_df = births_df .loc [intervention_datayears ]
9394 interv_births_per_year_per_draw_df = return_mean_95_CI_across_runs (interv_births_df )
9495
96+ # report during which years interventions were implemented (if any)
97+ interv_years = [year + 1 for year in intervention_datayears [:- 1 ]]
98+
9599 return {'births_df' : births_df ,
96100 'births_mean_ci_df' : births_mean_ci_per_year_per_draw_df ,
97101 'interv_births_df' : interv_births_df ,
98102 'interv_births_mean_ci_df' : interv_births_per_year_per_draw_df ,
99- 'interv_years' : intervention_years }
103+ 'interv_years' : interv_years }
100104
101105def extract_death_data_frames_and_outcomes (
102106 folder ,
@@ -169,19 +173,22 @@ def extract_death_data_frames_and_outcomes(
169173 neo_Diarrhoea_deaths_with_SAM_mean_ci_per_year_per_draw_df = \
170174 return_mean_95_CI_across_runs (neonatal_Diarrhoea_deaths_with_SAM_df )
171175
176+ # neo deaths for each year within intervention period
172177 interv_neo_deaths_df = neonatal_deaths_df .loc [intervention_years ]
173- interv_neo_deaths_sum_per_draw_CI_across_runs_df = return_sum_95_CI_across_runs (interv_neo_deaths_df )
174178 interv_neo_SAM_deaths_df = neonatal_SAM_deaths_df .loc [intervention_years ]
175- interv_neo_SAM_deaths_sum_per_draw_CI_across_runs_df = return_sum_95_CI_across_runs (interv_neo_SAM_deaths_df )
176179 interv_neo_ALRI_deaths_df = neonatal_ALRI_deaths_df .loc [intervention_years ]
177- interv_neo_ALRI_deaths_sum_per_draw_CI_across_runs_df = return_sum_95_CI_across_runs (interv_neo_ALRI_deaths_df )
178180 interv_neo_Diarrhoea_deaths_df = neonatal_Diarrhoea_deaths_df .loc [intervention_years ]
181+ interv_neo_ALRI_deaths_with_SAM_df = neonatal_ALRI_deaths_with_SAM_df .loc [intervention_years ]
182+ interv_neo_Diarrhoea_deaths_with_SAM_df = neonatal_Diarrhoea_deaths_with_SAM_df .loc [intervention_years ]
183+
184+ # sum of neo deaths over intervention period
185+ interv_neo_deaths_sum_per_draw_CI_across_runs_df = return_sum_95_CI_across_runs (interv_neo_deaths_df )
186+ interv_neo_SAM_deaths_sum_per_draw_CI_across_runs_df = return_sum_95_CI_across_runs (interv_neo_SAM_deaths_df )
187+ interv_neo_ALRI_deaths_sum_per_draw_CI_across_runs_df = return_sum_95_CI_across_runs (interv_neo_ALRI_deaths_df )
179188 interv_neo_Diarrhoea_deaths_sum_per_draw_CI_across_runs_df = \
180189 return_sum_95_CI_across_runs (interv_neo_Diarrhoea_deaths_df )
181- interv_neo_ALRI_deaths_with_SAM_df = neonatal_ALRI_deaths_with_SAM_df .loc [intervention_years ]
182190 interv_neo_ALRI_deaths_with_SAM_sum_per_draw_CI_across_runs_df = \
183191 return_sum_95_CI_across_runs (interv_neo_ALRI_deaths_with_SAM_df )
184- interv_neo_Diarrhoea_deaths_with_SAM_df = neonatal_Diarrhoea_deaths_with_SAM_df .loc [intervention_years ]
185192 interv_neo_Diarrhoea_deaths_with_SAM_sum_per_draw_CI_across_runs_df = \
186193 return_sum_95_CI_across_runs (interv_neo_Diarrhoea_deaths_with_SAM_df )
187194
@@ -240,20 +247,23 @@ def extract_death_data_frames_and_outcomes(
240247 under5_Diarrhoea_deaths_with_SAM_mean_ci_per_year_per_draw_df = \
241248 return_mean_95_CI_across_runs (under5_Diarrhoea_deaths_with_SAM_df )
242249
250+ # under 5 deaths for each year within intervention period
243251 interv_under5_deaths_df = under5_deaths_df .loc [intervention_years ]
244- interv_under5_deaths_sum_per_draw_CI_across_runs_df = return_sum_95_CI_across_runs (interv_under5_deaths_df )
245252 interv_under5_SAM_deaths_df = under5_SAM_deaths_df .loc [intervention_years ]
246- interv_under5_SAM_deaths_sum_per_draw_CI_across_runs_df = return_sum_95_CI_across_runs (interv_under5_SAM_deaths_df )
247253 interv_under5_ALRI_deaths_df = under5_ALRI_deaths_df .loc [intervention_years ]
254+ interv_under5_Diarrhoea_deaths_df = under5_Diarrhoea_deaths_df .loc [intervention_years ]
255+ interv_under5_ALRI_deaths_with_SAM_df = under5_ALRI_deaths_with_SAM_df .loc [intervention_years ]
256+ interv_under5_Diarrhoea_deaths_with_SAM_df = under5_Diarrhoea_deaths_with_SAM_df .loc [intervention_years ]
257+
258+ # sum of under 5 deaths over intervention period
259+ interv_under5_deaths_sum_per_draw_CI_across_runs_df = return_sum_95_CI_across_runs (interv_under5_deaths_df )
260+ interv_under5_SAM_deaths_sum_per_draw_CI_across_runs_df = return_sum_95_CI_across_runs (interv_under5_SAM_deaths_df )
248261 interv_under5_ALRI_deaths_sum_per_draw_CI_across_runs_df = \
249262 return_sum_95_CI_across_runs (interv_under5_ALRI_deaths_df )
250- interv_under5_Diarrhoea_deaths_df = under5_Diarrhoea_deaths_df .loc [intervention_years ]
251263 interv_under5_Diarrhoea_deaths_sum_per_draw_CI_across_runs_df = \
252264 return_sum_95_CI_across_runs (interv_under5_Diarrhoea_deaths_df )
253- interv_under5_ALRI_deaths_with_SAM_df = under5_ALRI_deaths_with_SAM_df .loc [intervention_years ]
254265 interv_under5_ALRI_deaths_with_SAM_sum_per_draw_CI_across_runs_df = \
255266 return_sum_95_CI_across_runs (interv_under5_ALRI_deaths_with_SAM_df )
256- interv_under5_Diarrhoea_deaths_with_SAM_df = under5_Diarrhoea_deaths_with_SAM_df .loc [intervention_years ]
257267 interv_under5_Diarrhoea_deaths_with_SAM_sum_per_draw_CI_across_runs_df = \
258268 return_sum_95_CI_across_runs (interv_under5_Diarrhoea_deaths_with_SAM_df )
259269
@@ -320,14 +330,7 @@ def extract_death_data_frames_and_outcomes(
320330 'under5_mort_rate_mean_ci_df' : under5mr_per_year_per_draw_df ,
321331 'interv_years' : intervention_years }
322332
323- # # TODO: rm prints when no longer needed
324- # print("\nYears, and (Draws, Runs) with no under 5 death:")
325- # no_under5_deaths = [(under5_deaths.index[row], under5_deaths.columns[col]) for row, col in
326- # zip(*np.where(under5_deaths == 0.0))]
327- # print(f"{no_under5_deaths}")
328- # #
329-
330- def extract_interv_daly_data_frames_and_outcomes (
333+ def extract_daly_data_frames_and_outcomes (
331334 folder ,
332335 years_of_interest ,
333336 intervention_years ,
@@ -347,7 +350,7 @@ def extract_interv_daly_data_frames_and_outcomes(
347350 # Extract all DALYs assigned to children under 5 --- dalys_stacked_by_age_and_time, i.e. all the year of life lost
348351 # are ascribed to the age of the death and the year of the death differentiated by cause of death / disability
349352
350- def extrapolate_interv_dalys_data_from_logs (df : pd .DataFrame ) -> pd .Series :
353+ def extrapolate_dalys_data_from_logs (df : pd .DataFrame ) -> pd .Series :
351354 # Melt the DataFrame to have 'cause_of_dalys' as a variable
352355 df_with_cause_of_dalys = df .melt (
353356 id_vars = ['age_range' , 'sex' , 'year' ],
@@ -363,25 +366,25 @@ def extrapolate_interv_dalys_data_from_logs(df: pd.DataFrame) -> pd.Series:
363366 )
364367
365368 # Keep only dalys for children under-5 by year and cause_of_dalys
366- under5_interv_dalys_by_year_cause = \
369+ under5_dalys_by_year_cause = \
367370 df_with_cause_of_dalys [
368- (df_with_cause_of_dalys ['year' ].isin (intervention_years )) &
371+ (df_with_cause_of_dalys ['year' ].isin (years_of_interest )) &
369372 (df_with_cause_of_dalys ['age_range' ] == '0-4' )
370373 ].groupby (['year' , 'cause_of_dalys' ],as_index = True )['dalys' ].sum ()
371374
372- return under5_interv_dalys_by_year_cause
375+ return under5_dalys_by_year_cause
373376
374- under5_interv_dalys_by_cause_df = extract_results (
377+ under5_dalys_by_cause_df = extract_results (
375378 folder ,
376379 module = "tlo.methods.healthburden" ,
377380 key = "dalys_stacked_by_age_and_time" ,
378- custom_generate_series = lambda df : extrapolate_interv_dalys_data_from_logs (df ),
381+ custom_generate_series = lambda df : extrapolate_dalys_data_from_logs (df ),
379382 do_scaling = True
380383 ).fillna (0 )
381384
382385 # Apply 3% discount rate to DALYs. Re-indexing is required to use the discounting function,
383386 # so the MultiIndexes must be restored afterward.
384- under5_dalys_by_cause_df__reset_index = under5_interv_dalys_by_cause_df .reset_index ()
387+ under5_dalys_by_cause_df__reset_index = under5_dalys_by_cause_df .reset_index ()
385388 under5_dalys_by_cause_df__reset_index .columns = [
386389 f"{ col [0 ]} _{ col [1 ]} " if col [1 ] != "" else f"{ col [0 ]} "
387390 for col in under5_dalys_by_cause_df__reset_index .columns .values
@@ -392,41 +395,46 @@ def extrapolate_interv_dalys_data_from_logs(df: pd.DataFrame) -> pd.Series:
392395 _df = under5_dalys_by_cause_df__reset_index , _discount_rate = 0.03 , _column_for_discounting = col
393396 )[col ]
394397 # set MultiIndex for rows
395- under5_interv_dalys_by_cause_df = under5_dalys_by_cause_df__reset_index .set_index (['year' , 'cause_of_dalys' ])
398+ under5_dalys_by_cause_df = under5_dalys_by_cause_df__reset_index .set_index (['year' , 'cause_of_dalys' ])
396399 # create MultiIndex for columns
397- new_col_tuples = [tuple (map (int , col .split ('_' ))) for col in under5_interv_dalys_by_cause_df .columns if '_' in col ]
400+ new_col_tuples = [tuple (map (int , col .split ('_' ))) for col in under5_dalys_by_cause_df .columns if '_' in col ]
398401 new_col_index = pd .MultiIndex .from_tuples (new_col_tuples , names = ['draw' , 'run' ])
399- under5_interv_dalys_by_cause_df = under5_interv_dalys_by_cause_df [[f"{ d } _{ r } " for d , r in new_col_tuples ]]
400- under5_interv_dalys_by_cause_df .columns = new_col_index
402+ under5_dalys_by_cause_df = under5_dalys_by_cause_df [[f"{ d } _{ r } " for d , r in new_col_tuples ]]
403+ under5_dalys_by_cause_df .columns = new_col_index
401404
402405 # number of dalys by any cause
403- interv_under5_dalys_df = under5_interv_dalys_by_cause_df .groupby (['year' ]).sum ()
406+ under5_dalys_df = under5_dalys_by_cause_df .groupby (['year' ]).sum ()
404407 # number of dalys by specific causes
405- interv_under5_SAM_dalys_df = under5_interv_dalys_by_cause_df .xs ("Childhood Undernutrition" , level = 1 )
406- interv_under5_ALRI_dalys_df = under5_interv_dalys_by_cause_df .xs ("Lower respiratory infections" , level = 1 )
407- interv_under5_Diarrhoea_dalys_df = under5_interv_dalys_by_cause_df .xs ("Childhood Diarrhoea" , level = 1 )
408-
409- interv_under5_dalys_mean_ci_per_year_per_draw_df = return_mean_95_CI_across_runs (interv_under5_dalys_df )
410- interv_under5_SAM_dalys_mean_ci_per_year_per_draw_df = return_mean_95_CI_across_runs (interv_under5_SAM_dalys_df )
411- interv_under5_ALRI_dalys_mean_ci_per_year_per_draw_df = return_mean_95_CI_across_runs (interv_under5_ALRI_dalys_df )
412- interv_under5_Diarrhoea_dalys_mean_ci_per_year_per_draw_df = \
413- return_mean_95_CI_across_runs (interv_under5_Diarrhoea_dalys_df )
414-
408+ under5_SAM_dalys_df = under5_dalys_by_cause_df .xs ("Childhood Undernutrition" , level = 1 )
409+ under5_ALRI_dalys_df = under5_dalys_by_cause_df .xs ("Lower respiratory infections" , level = 1 )
410+ under5_Diarrhoea_dalys_df = under5_dalys_by_cause_df .xs ("Childhood Diarrhoea" , level = 1 )
411+
412+ under5_dalys_mean_ci_per_year_per_draw_df = return_mean_95_CI_across_runs (under5_dalys_df )
413+ under5_SAM_dalys_mean_ci_per_year_per_draw_df = return_mean_95_CI_across_runs (under5_SAM_dalys_df )
414+ under5_ALRI_dalys_mean_ci_per_year_per_draw_df = return_mean_95_CI_across_runs (under5_ALRI_dalys_df )
415+ under5_Diarrhoea_dalys_mean_ci_per_year_per_draw_df = return_mean_95_CI_across_runs (under5_Diarrhoea_dalys_df )
416+
417+ # under 5 DALYs for each year within intervention period
418+ interv_under5_dalys_df = under5_dalys_df .loc [intervention_years ]
419+ interv_under5_SAM_dalys_df = under5_SAM_dalys_df .loc [intervention_years ]
420+ interv_under5_ALRI_dalys_df = under5_ALRI_dalys_df .loc [intervention_years ]
421+ interv_under5_Diarrhoea_dalys_df = under5_Diarrhoea_dalys_df .loc [intervention_years ]
422+
423+ # sum of under 5 DALYs over intervention period
415424 interv_under5_dalys_sum_per_draw_CI_across_runs_df = return_sum_95_CI_across_runs (interv_under5_dalys_df )
416425 interv_under5_SAM_dalys_sum_per_draw_CI_across_runs_df = return_sum_95_CI_across_runs (interv_under5_SAM_dalys_df )
417- interv_under5_ALRI_dalys_sum_per_draw_CI_across_runs_df = \
418- return_sum_95_CI_across_runs (interv_under5_ALRI_dalys_df )
426+ interv_under5_ALRI_dalys_sum_per_draw_CI_across_runs_df = return_sum_95_CI_across_runs (interv_under5_ALRI_dalys_df )
419427 interv_under5_Diarrhoea_dalys_sum_per_draw_CI_across_runs_df = \
420428 return_sum_95_CI_across_runs (interv_under5_Diarrhoea_dalys_df )
421429
422- return {'interv_under5_dalys_df ' : interv_under5_dalys_df ,
423- 'interv_under5_SAM_dalys_df ' : interv_under5_SAM_dalys_df ,
424- 'interv_under5_ALRI_dalys_df ' : interv_under5_ALRI_dalys_df ,
425- 'interv_under5_Diarrhoea_dalys_df ' : interv_under5_Diarrhoea_dalys_df ,
426- 'interv_under5_dalys_mean_ci_df ' : interv_under5_dalys_mean_ci_per_year_per_draw_df ,
427- 'interv_under5_SAM_dalys_mean_ci_df ' : interv_under5_SAM_dalys_mean_ci_per_year_per_draw_df ,
428- 'interv_under5_ALRI_dalys_mean_ci_df ' : interv_under5_ALRI_dalys_mean_ci_per_year_per_draw_df ,
429- 'interv_under5_Diarrhoea_dalys_mean_ci_df ' : interv_under5_Diarrhoea_dalys_mean_ci_per_year_per_draw_df ,
430+ return {'under5_dalys_df ' : under5_dalys_df ,
431+ 'under5_SAM_dalys_df ' : under5_SAM_dalys_df ,
432+ 'under5_ALRI_dalys_df ' : under5_ALRI_dalys_df ,
433+ 'under5_Diarrhoea_dalys_df ' : under5_Diarrhoea_dalys_df ,
434+ 'under5_dalys_mean_ci_df ' : under5_dalys_mean_ci_per_year_per_draw_df ,
435+ 'under5_SAM_dalys_mean_ci_df ' : under5_SAM_dalys_mean_ci_per_year_per_draw_df ,
436+ 'under5_ALRI_dalys_mean_ci_df ' : under5_ALRI_dalys_mean_ci_per_year_per_draw_df ,
437+ 'under5_Diarrhoea_dalys_mean_ci_df ' : under5_Diarrhoea_dalys_mean_ci_per_year_per_draw_df ,
430438 'interv_under5_dalys_sum_ci_df' : interv_under5_dalys_sum_per_draw_CI_across_runs_df ,
431439 'interv_under5_SAM_dalys_sum_ci_df' : interv_under5_SAM_dalys_sum_per_draw_CI_across_runs_df ,
432440 'interv_under5_ALRI_dalys_sum_ci_df' : interv_under5_ALRI_dalys_sum_per_draw_CI_across_runs_df ,
@@ -442,15 +450,15 @@ def regenerate_pickles_with_debug_logs(iterv_folders_dict) -> None:
442450def extract_tx_data_frames (
443451 folder ,
444452 years_of_interest ,
445- intervention_years ,
453+ intervention_datayears ,
446454 interv
447455) -> Dict [str , pd .DataFrame ]:
448456 """
449457 Extracts and summarizes treatment data by age group and year.
450458
451459 :param folder: Path to the folder containing outcome data.
452460 :param years_of_interest: List of years to extract data for.
453- :param intervention_years : List of years during which the intervention was implemented ( if any) .
461+ :param intervention_datayears : List of years for which data include the interventions if any implemented .
454462 :param interv: Name or identifier of the intervention.
455463 :return: Dictionary with DataFrames:
456464 (1) 'tx_by_age_group_df': Counts by year, treatment, age_group (by draw and run),
@@ -493,19 +501,22 @@ def extract_tx_data_frames(
493501 tx_mean_ci_df = return_mean_95_CI_across_runs (tx_mean_df )
494502
495503 # For intervention years
496- interv_tx_by_age_group_df = tx_by_age_group_df .loc [intervention_years ]
504+ interv_tx_by_age_group_df = tx_by_age_group_df .loc [intervention_datayears ]
497505 interv_tx_by_age_group_mean_ci_df = return_mean_95_CI_across_runs (interv_tx_by_age_group_df )
498506 interv_tx_mean_df = interv_tx_by_age_group_df .groupby (['year' , 'treatment' ]).sum ()
499507 interv_tx_mean_ci_df = return_mean_95_CI_across_runs (interv_tx_mean_df )
500508
509+ # report during which years interventions were implemented (if any)
510+ interv_years = [year + 1 for year in intervention_datayears [:- 1 ]]
511+
501512 return {
502513 'tx_by_age_group_df' : tx_by_age_group_df ,
503514 'tx_by_age_group_mean_ci_df' : tx_by_age_group_mean_ci_df ,
504515 'tx_mean_ci_df' : tx_mean_ci_df ,
505516 'interv_tx_by_age_group_df' : interv_tx_by_age_group_df ,
506517 'interv_tx_by_age_group_mean_ci_df' : interv_tx_by_age_group_mean_ci_df ,
507518 'interv_tx_mean_ci_df' : interv_tx_mean_ci_df ,
508- 'interv_years' : intervention_years
519+ 'interv_years' : interv_years
509520 }
510521
511522def get_scen_colour (scen_name : str ) -> str :
@@ -729,8 +740,8 @@ def plot_mean_outcome_and_CIs__scenarios_comparison(
729740 'under5_ALRI_deaths_with_SAM_mean_ci_df' , 'under5_Diarrhoea_deaths_with_SAM_mean_ci_df' ]
730741 else : # outcome_type == "DALYs":
731742 neonatal_outcomes = [None , None , None , None ] # No data on DALYs for neonatal
732- under5_outcomes = ['interv_under5_dalys_mean_ci_df ' , 'interv_under5_SAM_dalys_mean_ci_df ' ,
733- 'interv_under5_ALRI_dalys_mean_ci_df ' , 'interv_under5_Diarrhoea_dalys_mean_ci_df ' ]
743+ under5_outcomes = ['under5_dalys_mean_ci_df ' , 'under5_SAM_dalys_mean_ci_df ' ,
744+ 'under5_ALRI_dalys_mean_ci_df ' , 'under5_Diarrhoea_dalys_mean_ci_df ' ]
734745 outcome = neonatal_outcomes [i ] if cohort == 'Neonatal' else under5_outcomes [i ]
735746
736747 if outcome :
@@ -755,7 +766,7 @@ def plot_mean_outcome_and_CIs__scenarios_comparison(
755766 means , ci_lower , ci_upper = zip (* scen_data .values .flatten ())
756767
757768 # Plot the data
758- years_to_plot = [year for year in plot_years if year in scen_data .index ]
769+ years_to_plot = [year for year in plot_years if year - 1 in scen_data .index ]
759770 ax .plot (years_to_plot , means , label = scenario , color = get_scen_colour (scenario ))
760771 ax .fill_between (years_to_plot , ci_lower , ci_upper , color = get_scen_colour (scenario ), alpha = 0.2 )
761772
0 commit comments