Skip to content

Commit 6b1fee0

Browse files
committed
analysis_utility_fncs_wast & run_interv_analysis_wast: use intervention_datayears instead of intervention_years to extract data; the data are from the end of year, hence for continues plots need to be shown as if for next year
1 parent 8c02dd5 commit 6b1fee0

File tree

2 files changed

+100
-72
lines changed

2 files changed

+100
-72
lines changed

src/scripts/wasting_analyses/analysis_utility_functions_wast.py

Lines changed: 73 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -57,21 +57,22 @@ def return_sum_95_CI_across_runs(df: pd.DataFrame) -> pd.DataFrame:
5757
def extract_birth_data_frames_and_outcomes(
5858
folder,
5959
years_of_interest,
60-
intervention_years,
60+
intervention_datayears,
6161
interv
6262
) -> Dict[str, pd.DataFrame]:
6363
"""
6464
Extracts and summarizes birth data.
6565
6666
:param folder: Path to the folder containing outcome data.
6767
:param years_of_interest: List of years to extract data for.
68-
:param intervention_years: List of years during which the intervention was implemented (if any).
68+
:param intervention_datayears: List of years for which we need data to plot means over the interventions years, ie
69+
from the year before interventions are implemented until the last year of interventions.
6970
:param interv: Name or identifier of the intervention.
7071
:return: Dictionary with DataFrames:
7172
(1) 'births_df': Birth counts for years of interest (by draw and run),
7273
(2) 'births_mean_ci_df': Mean and 95% CI for total births per year and draw,
7374
(3) 'interv_births_df': Birth counts for intervention years,
74-
(4) 'interv_births_mean_ci_df': Mean and 95% CI for births per year and draw for intervention years.
75+
(4) 'interv_births_mean_ci_df': Mean and 95% CI for births per year and draw for intervention_datayears.
7576
"""
7677

7778
print(f" -{interv=}")
@@ -89,14 +90,17 @@ def extract_birth_data_frames_and_outcomes(
8990

9091
births_mean_ci_per_year_per_draw_df = return_mean_95_CI_across_runs(births_df)
9192

92-
interv_births_df = births_df.loc[intervention_years]
93+
interv_births_df = births_df.loc[intervention_datayears]
9394
interv_births_per_year_per_draw_df = return_mean_95_CI_across_runs(interv_births_df)
9495

96+
# report during which years interventions were implemented (if any)
97+
interv_years = [year+1 for year in intervention_datayears[:-1]]
98+
9599
return {'births_df': births_df,
96100
'births_mean_ci_df': births_mean_ci_per_year_per_draw_df,
97101
'interv_births_df': interv_births_df,
98102
'interv_births_mean_ci_df': interv_births_per_year_per_draw_df,
99-
'interv_years': intervention_years}
103+
'interv_years': interv_years}
100104

101105
def extract_death_data_frames_and_outcomes(
102106
folder,
@@ -169,19 +173,22 @@ def extract_death_data_frames_and_outcomes(
169173
neo_Diarrhoea_deaths_with_SAM_mean_ci_per_year_per_draw_df = \
170174
return_mean_95_CI_across_runs(neonatal_Diarrhoea_deaths_with_SAM_df)
171175

176+
# neo deaths for each year within intervention period
172177
interv_neo_deaths_df = neonatal_deaths_df.loc[intervention_years]
173-
interv_neo_deaths_sum_per_draw_CI_across_runs_df = return_sum_95_CI_across_runs(interv_neo_deaths_df)
174178
interv_neo_SAM_deaths_df = neonatal_SAM_deaths_df.loc[intervention_years]
175-
interv_neo_SAM_deaths_sum_per_draw_CI_across_runs_df = return_sum_95_CI_across_runs(interv_neo_SAM_deaths_df)
176179
interv_neo_ALRI_deaths_df = neonatal_ALRI_deaths_df.loc[intervention_years]
177-
interv_neo_ALRI_deaths_sum_per_draw_CI_across_runs_df = return_sum_95_CI_across_runs(interv_neo_ALRI_deaths_df)
178180
interv_neo_Diarrhoea_deaths_df = neonatal_Diarrhoea_deaths_df.loc[intervention_years]
181+
interv_neo_ALRI_deaths_with_SAM_df = neonatal_ALRI_deaths_with_SAM_df.loc[intervention_years]
182+
interv_neo_Diarrhoea_deaths_with_SAM_df = neonatal_Diarrhoea_deaths_with_SAM_df.loc[intervention_years]
183+
184+
# sum of neo deaths over intervention period
185+
interv_neo_deaths_sum_per_draw_CI_across_runs_df = return_sum_95_CI_across_runs(interv_neo_deaths_df)
186+
interv_neo_SAM_deaths_sum_per_draw_CI_across_runs_df = return_sum_95_CI_across_runs(interv_neo_SAM_deaths_df)
187+
interv_neo_ALRI_deaths_sum_per_draw_CI_across_runs_df = return_sum_95_CI_across_runs(interv_neo_ALRI_deaths_df)
179188
interv_neo_Diarrhoea_deaths_sum_per_draw_CI_across_runs_df = \
180189
return_sum_95_CI_across_runs(interv_neo_Diarrhoea_deaths_df)
181-
interv_neo_ALRI_deaths_with_SAM_df = neonatal_ALRI_deaths_with_SAM_df.loc[intervention_years]
182190
interv_neo_ALRI_deaths_with_SAM_sum_per_draw_CI_across_runs_df = \
183191
return_sum_95_CI_across_runs(interv_neo_ALRI_deaths_with_SAM_df)
184-
interv_neo_Diarrhoea_deaths_with_SAM_df = neonatal_Diarrhoea_deaths_with_SAM_df.loc[intervention_years]
185192
interv_neo_Diarrhoea_deaths_with_SAM_sum_per_draw_CI_across_runs_df = \
186193
return_sum_95_CI_across_runs(interv_neo_Diarrhoea_deaths_with_SAM_df)
187194

@@ -240,20 +247,23 @@ def extract_death_data_frames_and_outcomes(
240247
under5_Diarrhoea_deaths_with_SAM_mean_ci_per_year_per_draw_df = \
241248
return_mean_95_CI_across_runs(under5_Diarrhoea_deaths_with_SAM_df)
242249

250+
# under 5 deaths for each year within intervention period
243251
interv_under5_deaths_df = under5_deaths_df.loc[intervention_years]
244-
interv_under5_deaths_sum_per_draw_CI_across_runs_df = return_sum_95_CI_across_runs(interv_under5_deaths_df)
245252
interv_under5_SAM_deaths_df = under5_SAM_deaths_df.loc[intervention_years]
246-
interv_under5_SAM_deaths_sum_per_draw_CI_across_runs_df = return_sum_95_CI_across_runs(interv_under5_SAM_deaths_df)
247253
interv_under5_ALRI_deaths_df = under5_ALRI_deaths_df.loc[intervention_years]
254+
interv_under5_Diarrhoea_deaths_df = under5_Diarrhoea_deaths_df.loc[intervention_years]
255+
interv_under5_ALRI_deaths_with_SAM_df = under5_ALRI_deaths_with_SAM_df.loc[intervention_years]
256+
interv_under5_Diarrhoea_deaths_with_SAM_df = under5_Diarrhoea_deaths_with_SAM_df.loc[intervention_years]
257+
258+
# sum of under 5 deaths over intervention period
259+
interv_under5_deaths_sum_per_draw_CI_across_runs_df = return_sum_95_CI_across_runs(interv_under5_deaths_df)
260+
interv_under5_SAM_deaths_sum_per_draw_CI_across_runs_df = return_sum_95_CI_across_runs(interv_under5_SAM_deaths_df)
248261
interv_under5_ALRI_deaths_sum_per_draw_CI_across_runs_df = \
249262
return_sum_95_CI_across_runs(interv_under5_ALRI_deaths_df)
250-
interv_under5_Diarrhoea_deaths_df = under5_Diarrhoea_deaths_df.loc[intervention_years]
251263
interv_under5_Diarrhoea_deaths_sum_per_draw_CI_across_runs_df = \
252264
return_sum_95_CI_across_runs(interv_under5_Diarrhoea_deaths_df)
253-
interv_under5_ALRI_deaths_with_SAM_df = under5_ALRI_deaths_with_SAM_df.loc[intervention_years]
254265
interv_under5_ALRI_deaths_with_SAM_sum_per_draw_CI_across_runs_df = \
255266
return_sum_95_CI_across_runs(interv_under5_ALRI_deaths_with_SAM_df)
256-
interv_under5_Diarrhoea_deaths_with_SAM_df = under5_Diarrhoea_deaths_with_SAM_df.loc[intervention_years]
257267
interv_under5_Diarrhoea_deaths_with_SAM_sum_per_draw_CI_across_runs_df = \
258268
return_sum_95_CI_across_runs(interv_under5_Diarrhoea_deaths_with_SAM_df)
259269

@@ -320,14 +330,7 @@ def extract_death_data_frames_and_outcomes(
320330
'under5_mort_rate_mean_ci_df': under5mr_per_year_per_draw_df,
321331
'interv_years': intervention_years}
322332

323-
# # TODO: rm prints when no longer needed
324-
# print("\nYears, and (Draws, Runs) with no under 5 death:")
325-
# no_under5_deaths = [(under5_deaths.index[row], under5_deaths.columns[col]) for row, col in
326-
# zip(*np.where(under5_deaths == 0.0))]
327-
# print(f"{no_under5_deaths}")
328-
# #
329-
330-
def extract_interv_daly_data_frames_and_outcomes(
333+
def extract_daly_data_frames_and_outcomes(
331334
folder,
332335
years_of_interest,
333336
intervention_years,
@@ -347,7 +350,7 @@ def extract_interv_daly_data_frames_and_outcomes(
347350
# Extract all DALYs assigned to children under 5 --- dalys_stacked_by_age_and_time, i.e. all the year of life lost
348351
# are ascribed to the age of the death and the year of the death differentiated by cause of death / disability
349352

350-
def extrapolate_interv_dalys_data_from_logs(df: pd.DataFrame) -> pd.Series:
353+
def extrapolate_dalys_data_from_logs(df: pd.DataFrame) -> pd.Series:
351354
# Melt the DataFrame to have 'cause_of_dalys' as a variable
352355
df_with_cause_of_dalys = df.melt(
353356
id_vars=['age_range', 'sex', 'year'],
@@ -363,25 +366,25 @@ def extrapolate_interv_dalys_data_from_logs(df: pd.DataFrame) -> pd.Series:
363366
)
364367

365368
# Keep only dalys for children under-5 by year and cause_of_dalys
366-
under5_interv_dalys_by_year_cause = \
369+
under5_dalys_by_year_cause = \
367370
df_with_cause_of_dalys[
368-
(df_with_cause_of_dalys['year'].isin(intervention_years)) &
371+
(df_with_cause_of_dalys['year'].isin(years_of_interest)) &
369372
(df_with_cause_of_dalys['age_range'] == '0-4')
370373
].groupby(['year', 'cause_of_dalys'],as_index=True)['dalys'].sum()
371374

372-
return under5_interv_dalys_by_year_cause
375+
return under5_dalys_by_year_cause
373376

374-
under5_interv_dalys_by_cause_df = extract_results(
377+
under5_dalys_by_cause_df = extract_results(
375378
folder,
376379
module="tlo.methods.healthburden",
377380
key="dalys_stacked_by_age_and_time",
378-
custom_generate_series=lambda df: extrapolate_interv_dalys_data_from_logs(df),
381+
custom_generate_series=lambda df: extrapolate_dalys_data_from_logs(df),
379382
do_scaling=True
380383
).fillna(0)
381384

382385
# Apply 3% discount rate to DALYs. Re-indexing is required to use the discounting function,
383386
# so the MultiIndexes must be restored afterward.
384-
under5_dalys_by_cause_df__reset_index = under5_interv_dalys_by_cause_df.reset_index()
387+
under5_dalys_by_cause_df__reset_index = under5_dalys_by_cause_df.reset_index()
385388
under5_dalys_by_cause_df__reset_index.columns = [
386389
f"{col[0]}_{col[1]}" if col[1] != "" else f"{col[0]}"
387390
for col in under5_dalys_by_cause_df__reset_index.columns.values
@@ -392,41 +395,46 @@ def extrapolate_interv_dalys_data_from_logs(df: pd.DataFrame) -> pd.Series:
392395
_df=under5_dalys_by_cause_df__reset_index, _discount_rate=0.03, _column_for_discounting=col
393396
)[col]
394397
# set MultiIndex for rows
395-
under5_interv_dalys_by_cause_df = under5_dalys_by_cause_df__reset_index.set_index(['year', 'cause_of_dalys'])
398+
under5_dalys_by_cause_df = under5_dalys_by_cause_df__reset_index.set_index(['year', 'cause_of_dalys'])
396399
# create MultiIndex for columns
397-
new_col_tuples = [tuple(map(int, col.split('_'))) for col in under5_interv_dalys_by_cause_df.columns if '_' in col]
400+
new_col_tuples = [tuple(map(int, col.split('_'))) for col in under5_dalys_by_cause_df.columns if '_' in col]
398401
new_col_index = pd.MultiIndex.from_tuples(new_col_tuples, names=['draw', 'run'])
399-
under5_interv_dalys_by_cause_df = under5_interv_dalys_by_cause_df[[f"{d}_{r}" for d, r in new_col_tuples]]
400-
under5_interv_dalys_by_cause_df.columns = new_col_index
402+
under5_dalys_by_cause_df = under5_dalys_by_cause_df[[f"{d}_{r}" for d, r in new_col_tuples]]
403+
under5_dalys_by_cause_df.columns = new_col_index
401404

402405
# number of dalys by any cause
403-
interv_under5_dalys_df = under5_interv_dalys_by_cause_df.groupby(['year']).sum()
406+
under5_dalys_df = under5_dalys_by_cause_df.groupby(['year']).sum()
404407
# number of dalys by specific causes
405-
interv_under5_SAM_dalys_df = under5_interv_dalys_by_cause_df.xs("Childhood Undernutrition", level=1)
406-
interv_under5_ALRI_dalys_df = under5_interv_dalys_by_cause_df.xs("Lower respiratory infections", level=1)
407-
interv_under5_Diarrhoea_dalys_df = under5_interv_dalys_by_cause_df.xs("Childhood Diarrhoea", level=1)
408-
409-
interv_under5_dalys_mean_ci_per_year_per_draw_df = return_mean_95_CI_across_runs(interv_under5_dalys_df)
410-
interv_under5_SAM_dalys_mean_ci_per_year_per_draw_df = return_mean_95_CI_across_runs(interv_under5_SAM_dalys_df)
411-
interv_under5_ALRI_dalys_mean_ci_per_year_per_draw_df = return_mean_95_CI_across_runs(interv_under5_ALRI_dalys_df)
412-
interv_under5_Diarrhoea_dalys_mean_ci_per_year_per_draw_df = \
413-
return_mean_95_CI_across_runs(interv_under5_Diarrhoea_dalys_df)
414-
408+
under5_SAM_dalys_df = under5_dalys_by_cause_df.xs("Childhood Undernutrition", level=1)
409+
under5_ALRI_dalys_df = under5_dalys_by_cause_df.xs("Lower respiratory infections", level=1)
410+
under5_Diarrhoea_dalys_df = under5_dalys_by_cause_df.xs("Childhood Diarrhoea", level=1)
411+
412+
under5_dalys_mean_ci_per_year_per_draw_df = return_mean_95_CI_across_runs(under5_dalys_df)
413+
under5_SAM_dalys_mean_ci_per_year_per_draw_df = return_mean_95_CI_across_runs(under5_SAM_dalys_df)
414+
under5_ALRI_dalys_mean_ci_per_year_per_draw_df = return_mean_95_CI_across_runs(under5_ALRI_dalys_df)
415+
under5_Diarrhoea_dalys_mean_ci_per_year_per_draw_df = return_mean_95_CI_across_runs(under5_Diarrhoea_dalys_df)
416+
417+
# under 5 DALYs for each year within intervention period
418+
interv_under5_dalys_df = under5_dalys_df.loc[intervention_years]
419+
interv_under5_SAM_dalys_df = under5_SAM_dalys_df.loc[intervention_years]
420+
interv_under5_ALRI_dalys_df = under5_ALRI_dalys_df.loc[intervention_years]
421+
interv_under5_Diarrhoea_dalys_df = under5_Diarrhoea_dalys_df.loc[intervention_years]
422+
423+
# sum of under 5 DALYs over intervention period
415424
interv_under5_dalys_sum_per_draw_CI_across_runs_df = return_sum_95_CI_across_runs(interv_under5_dalys_df)
416425
interv_under5_SAM_dalys_sum_per_draw_CI_across_runs_df = return_sum_95_CI_across_runs(interv_under5_SAM_dalys_df)
417-
interv_under5_ALRI_dalys_sum_per_draw_CI_across_runs_df = \
418-
return_sum_95_CI_across_runs(interv_under5_ALRI_dalys_df)
426+
interv_under5_ALRI_dalys_sum_per_draw_CI_across_runs_df = return_sum_95_CI_across_runs(interv_under5_ALRI_dalys_df)
419427
interv_under5_Diarrhoea_dalys_sum_per_draw_CI_across_runs_df = \
420428
return_sum_95_CI_across_runs(interv_under5_Diarrhoea_dalys_df)
421429

422-
return {'interv_under5_dalys_df': interv_under5_dalys_df,
423-
'interv_under5_SAM_dalys_df': interv_under5_SAM_dalys_df,
424-
'interv_under5_ALRI_dalys_df': interv_under5_ALRI_dalys_df,
425-
'interv_under5_Diarrhoea_dalys_df': interv_under5_Diarrhoea_dalys_df,
426-
'interv_under5_dalys_mean_ci_df': interv_under5_dalys_mean_ci_per_year_per_draw_df,
427-
'interv_under5_SAM_dalys_mean_ci_df': interv_under5_SAM_dalys_mean_ci_per_year_per_draw_df,
428-
'interv_under5_ALRI_dalys_mean_ci_df': interv_under5_ALRI_dalys_mean_ci_per_year_per_draw_df,
429-
'interv_under5_Diarrhoea_dalys_mean_ci_df': interv_under5_Diarrhoea_dalys_mean_ci_per_year_per_draw_df,
430+
return {'under5_dalys_df': under5_dalys_df,
431+
'under5_SAM_dalys_df': under5_SAM_dalys_df,
432+
'under5_ALRI_dalys_df': under5_ALRI_dalys_df,
433+
'under5_Diarrhoea_dalys_df': under5_Diarrhoea_dalys_df,
434+
'under5_dalys_mean_ci_df': under5_dalys_mean_ci_per_year_per_draw_df,
435+
'under5_SAM_dalys_mean_ci_df': under5_SAM_dalys_mean_ci_per_year_per_draw_df,
436+
'under5_ALRI_dalys_mean_ci_df': under5_ALRI_dalys_mean_ci_per_year_per_draw_df,
437+
'under5_Diarrhoea_dalys_mean_ci_df': under5_Diarrhoea_dalys_mean_ci_per_year_per_draw_df,
430438
'interv_under5_dalys_sum_ci_df': interv_under5_dalys_sum_per_draw_CI_across_runs_df,
431439
'interv_under5_SAM_dalys_sum_ci_df': interv_under5_SAM_dalys_sum_per_draw_CI_across_runs_df,
432440
'interv_under5_ALRI_dalys_sum_ci_df': interv_under5_ALRI_dalys_sum_per_draw_CI_across_runs_df,
@@ -442,15 +450,15 @@ def regenerate_pickles_with_debug_logs(iterv_folders_dict) -> None:
442450
def extract_tx_data_frames(
443451
folder,
444452
years_of_interest,
445-
intervention_years,
453+
intervention_datayears,
446454
interv
447455
) -> Dict[str, pd.DataFrame]:
448456
"""
449457
Extracts and summarizes treatment data by age group and year.
450458
451459
:param folder: Path to the folder containing outcome data.
452460
:param years_of_interest: List of years to extract data for.
453-
:param intervention_years: List of years during which the intervention was implemented (if any).
461+
:param intervention_datayears: List of years for which data include the interventions if any implemented.
454462
:param interv: Name or identifier of the intervention.
455463
:return: Dictionary with DataFrames:
456464
(1) 'tx_by_age_group_df': Counts by year, treatment, age_group (by draw and run),
@@ -493,19 +501,22 @@ def extract_tx_data_frames(
493501
tx_mean_ci_df = return_mean_95_CI_across_runs(tx_mean_df)
494502

495503
# For intervention years
496-
interv_tx_by_age_group_df = tx_by_age_group_df.loc[intervention_years]
504+
interv_tx_by_age_group_df = tx_by_age_group_df.loc[intervention_datayears]
497505
interv_tx_by_age_group_mean_ci_df = return_mean_95_CI_across_runs(interv_tx_by_age_group_df)
498506
interv_tx_mean_df = interv_tx_by_age_group_df.groupby(['year', 'treatment']).sum()
499507
interv_tx_mean_ci_df = return_mean_95_CI_across_runs(interv_tx_mean_df)
500508

509+
# report during which years interventions were implemented (if any)
510+
interv_years = [year+1 for year in intervention_datayears[:-1]]
511+
501512
return {
502513
'tx_by_age_group_df': tx_by_age_group_df,
503514
'tx_by_age_group_mean_ci_df': tx_by_age_group_mean_ci_df,
504515
'tx_mean_ci_df': tx_mean_ci_df,
505516
'interv_tx_by_age_group_df': interv_tx_by_age_group_df,
506517
'interv_tx_by_age_group_mean_ci_df': interv_tx_by_age_group_mean_ci_df,
507518
'interv_tx_mean_ci_df': interv_tx_mean_ci_df,
508-
'interv_years': intervention_years
519+
'interv_years': interv_years
509520
}
510521

511522
def get_scen_colour(scen_name: str) -> str:
@@ -729,8 +740,8 @@ def plot_mean_outcome_and_CIs__scenarios_comparison(
729740
'under5_ALRI_deaths_with_SAM_mean_ci_df', 'under5_Diarrhoea_deaths_with_SAM_mean_ci_df']
730741
else: # outcome_type == "DALYs":
731742
neonatal_outcomes = [None, None, None, None] # No data on DALYs for neonatal
732-
under5_outcomes = ['interv_under5_dalys_mean_ci_df', 'interv_under5_SAM_dalys_mean_ci_df',
733-
'interv_under5_ALRI_dalys_mean_ci_df', 'interv_under5_Diarrhoea_dalys_mean_ci_df']
743+
under5_outcomes = ['under5_dalys_mean_ci_df', 'under5_SAM_dalys_mean_ci_df',
744+
'under5_ALRI_dalys_mean_ci_df', 'under5_Diarrhoea_dalys_mean_ci_df']
734745
outcome = neonatal_outcomes[i] if cohort == 'Neonatal' else under5_outcomes[i]
735746

736747
if outcome:
@@ -755,7 +766,7 @@ def plot_mean_outcome_and_CIs__scenarios_comparison(
755766
means, ci_lower, ci_upper = zip(*scen_data.values.flatten())
756767

757768
# Plot the data
758-
years_to_plot = [year for year in plot_years if year in scen_data.index]
769+
years_to_plot = [year for year in plot_years if year-1 in scen_data.index]
759770
ax.plot(years_to_plot, means, label=scenario, color=get_scen_colour(scenario))
760771
ax.fill_between(years_to_plot, ci_lower, ci_upper, color=get_scen_colour(scenario), alpha=0.2)
761772

0 commit comments

Comments
 (0)