Skip to content

Commit c392004

Browse files
Updates to Costing and ROI analysis (#1711)
* Changes to accommodate revised scenarios - Note that Xpert costs no longer need to be estimated separately. * first take - Frontier plot * corrections to Scenario dictionary * improve legend positioning in Cost effectiveness plane plots * remove horizontal lines from ROI plots and formatting changes to CEA Plane * remove horizontal lines from ROI plots and formatting changes to CEA Plane + Add ROI bar plots at two implementation cost assumptions * move all the functions up + fix calculation of roi at upper bound ASC + formatting fixes to plots * update the order in which scenarios appear in extracted csvs * remove superfluous imports * new results folder * Add consumables plot + change title of ROI plots * formatting edits to consumables plot * fix issues with negative costs/health impact when estimating ICERs and ROIs * edit roi bar plots for inset figures * update scenarios and results folder for latest outputs * fix treemap plot - previously summed across the three stats * update results extracts * update results folder for costing_validation * add per capita estimates * add per capita estimates * change input_cost central measure to median * generate heatmap plots of consumable availability (Note that the RF should be taken from branch - tara/gff_hss_and_htm_scale_up * - update the calculation of ICERs to be based on summary stats rather than runs * add frontier in the main CEA plot * update costing scripts to accept latest scenario runs. * add the estimate of HSI counts and update the statements on HIV consumable costs. * Add CSB++ to the list of nutritional consumables. * Update the cost of SAM medicines and scale down the cost of F-75 therapeutic milk * Update cost of F-75 therapeutic milk for validation purposes * clean equipment and consumable names and drop irrelevant ones * add extracts for manuscript on nutrition commodities * fix equipment name cleaning * update the cost of SAM medicines to reflect 15% complicated cases as opposed to 50% as previously assumed (i.e. weighted average of cost for uncomplicated and complicated cases) as opposed to simple average. * update figure format * add scenario file * Update format of figures to meet journal requirements * fix imports * linting --------- Co-authored-by: Tim Hallett <39991060+tbhallett@users.noreply.github.com>
1 parent 3bc75aa commit c392004

File tree

6 files changed

+1783
-805
lines changed

6 files changed

+1783
-805
lines changed
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
version https://git-lfs.github.com/spec/v1
2-
oid sha256:fca1834c38c657dbbd53616e3972b8909882faa1767a22f11349ed2bbcbca183
3-
size 26791
2+
oid sha256:20131421011ea4afa54d7557ab31b962061fe8724b12a08986d670702361e8ed
3+
size 27038

src/scripts/comparison_of_horizontal_and_vertical_programs/economic_analysis_for_manuscript/roi_analysis_horizontal_vs_vertical.py

Lines changed: 1012 additions & 719 deletions
Large diffs are not rendered by default.

src/scripts/costing/cost_estimation.py

Lines changed: 154 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import ast
22
import itertools
33
import math
4+
import re
45
import textwrap
56
from collections import defaultdict
67
from itertools import cycle
@@ -167,6 +168,117 @@ def get_discount_factor(year):
167168

168169
return _df
169170

171+
# Clean the names of consumables in input cost dataframe
172+
def clean_consumable_name(name: str) -> str:
173+
"""
174+
Clean consumable names for analysis and plotting.
175+
Removes procurement suffixes, packaging metadata,
176+
harmonises spelling, and capitalises the first letter.
177+
"""
178+
if not isinstance(name, str):
179+
return name
180+
181+
cleaned = name
182+
183+
# --- 1. Remove common procurement suffixes ---
184+
cleaned = re.sub(
185+
r'_(CMST|IDA|Each_CMST|each_CMST|each|ID|PFR|nt)(\b|_)',
186+
'',
187+
cleaned,
188+
flags=re.IGNORECASE
189+
)
190+
191+
# --- 2. Remove trailing numeric package indicators ---
192+
cleaned = re.sub(r'_\d+(\.\d+)?$', '', cleaned)
193+
cleaned = re.sub(
194+
r'\b\d+\s*(tests|pieces|doses|pack|packs|box|boxes)\b',
195+
'',
196+
cleaned,
197+
flags=re.IGNORECASE
198+
)
199+
200+
# --- 3. Remove awkward characters ---
201+
cleaned = cleaned.replace('Â', '')
202+
cleaned = cleaned.replace('½', '1/2')
203+
204+
# --- 4. Normalise whitespace ---
205+
cleaned = re.sub(r'\s+', ' ', cleaned).strip()
206+
207+
# --- 5. Harmonise common spelling variants ---
208+
harmonisation = {
209+
'Amoxycillin': 'Amoxicillin',
210+
'Gentamycin': 'Gentamicin',
211+
'Declofenac': 'Diclofenac',
212+
'Frusemide': 'Furosemide',
213+
'Cotrimoxizole': 'Cotrimoxazole',
214+
"ringer's lactate": "Ringer's lactate",
215+
}
216+
217+
for old, new in harmonisation.items():
218+
cleaned = re.sub(rf'\b{old}\b', new, cleaned, flags=re.IGNORECASE)
219+
220+
# --- 6. Canonical renaming for key nutrition / diagnostics items ---
221+
canonical_map = {
222+
'Therapeutic spread, sachet 92g/CAR-150':
223+
'Ready-to-use therapeutic food (RUTF)',
224+
'Therapeutic spread, sachet 92g / CAR-150':
225+
'Ready-to-use therapeutic food (RUTF)',
226+
'VL test':
227+
'Viral load test',
228+
'Dietary supplements (country-specific)':
229+
'Multiple micronutrient powder (MNP) supplement'
230+
}
231+
232+
# Apply canonical renaming (case-insensitive exact match)
233+
for old, new in canonical_map.items():
234+
if cleaned.lower() == old.lower():
235+
cleaned = new
236+
break
237+
238+
# --- 7. Capitalise first letter only (preserve acronyms elsewhere) ---
239+
cleaned = re.sub(r'^.', lambda m: m.group(0).upper(), cleaned)
240+
241+
return cleaned
242+
243+
# Clean the names of equipment in the cost dataframe, Drop irrelevant ones
244+
def clean_equipment_name(name: str, equipment_drop_list = None) -> str:
245+
"""
246+
Clean and standardise medical equipment names for analysis.
247+
Applies light normalisation and explicit renaming only.
248+
"""
249+
if not isinstance(name, str):
250+
return name
251+
252+
cleaned = name
253+
254+
# --- 1. Fix known encoding artefacts ---
255+
cleaned = cleaned.replace(\x80\x99', '’')
256+
cleaned = cleaned.replace('Â', '')
257+
258+
# --- 2. Normalise slashes and whitespace ---
259+
cleaned = re.sub(r'\s*/\s*', ' / ', cleaned)
260+
cleaned = re.sub(r'\s+', ' ', cleaned).strip()
261+
262+
# --- 3. Explicit canonical renaming (keep minimal) ---
263+
rename_map = {
264+
'Image view station, for conferences':
265+
'Clinical image viewing workstation (PACS / case review)',
266+
'Cusco’s / bivalved Speculum (small, medium, large)':
267+
'Cusco’s / bivalved speculum (small, medium, large)',
268+
'Cuscoâ\x80\x99s/ bivalved Speculum (small, medium, large)':
269+
'Cusco’s / bivalved speculum (small, medium, large)',
270+
}
271+
272+
for old, new in rename_map.items():
273+
if cleaned.lower() == old.lower():
274+
cleaned = new
275+
break
276+
277+
# --- 4. Capitalise first letter only (preserve acronyms) ---
278+
cleaned = re.sub(r'^.', lambda m: m.group(0).upper(), cleaned)
279+
280+
return cleaned
281+
170282

171283
def estimate_input_cost_of_scenarios(results_folder: Path,
172284
resourcefilepath: Path,
@@ -1218,7 +1330,11 @@ def do_stacked_bar_plot_of_cost_by_category(_df: pd.DataFrame,
12181330
_scenario_dict: Optional[dict[int, str]] = None,
12191331
show_title: bool = True,
12201332
_outputfilepath: Optional[Path] = None,
1221-
_add_figname_suffix: str = ''):
1333+
_add_figname_suffix: str = '',
1334+
_label_fontsize: float = 9.0,
1335+
_tick_fontsize: float = 10.0,
1336+
_legend_label_map: Optional[dict[str, str]] = None
1337+
):
12221338
"""
12231339
Create and save a stacked bar chart of costs by category, subcategory or subgroup.
12241340
@@ -1257,6 +1373,14 @@ def do_stacked_bar_plot_of_cost_by_category(_df: pd.DataFrame,
12571373
_add_figname_suffix : str, default ''
12581374
Optional string to append to the saved figure's filename
12591375
1376+
_label_fontsize : float, optional
1377+
fontsize of data labels
1378+
1379+
_tick_fontsize: float, optional
1380+
font size of axis ticks
1381+
1382+
_legend_label_map: dict, optional
1383+
Dictionary proving clean category names for publishable legends
12601384
Returns:
12611385
-------
12621386
None
@@ -1395,10 +1519,11 @@ def do_stacked_bar_plot_of_cost_by_category(_df: pd.DataFrame,
13951519
xy=(x, rect.get_y() + height), # Arrow start
13961520
xytext=(x + 0.3, rect.get_y() + height + threshold), # Offset text
13971521
arrowprops=dict(arrowstyle="->", color='black', lw=0.8),
1398-
fontsize='small', ha='left', va='center', color='black'
1522+
fontsize=_label_fontsize, ha='left', va='center', color='black', fontweight='bold',
13991523
)
14001524
else: # Large segment -> label inside
1401-
ax.text(x, y, f'{round(height, 1)}', ha='center', va='center', fontsize='small', color='white')
1525+
ax.text(x, y, f'{round(height, 1)}', ha='center', va='center', fontsize=_label_fontsize,
1526+
fontweight='bold', color='white')
14021527

14031528
# Set custom x-tick labels if _scenario_dict is provided
14041529
if _scenario_dict:
@@ -1408,7 +1533,7 @@ def do_stacked_bar_plot_of_cost_by_category(_df: pd.DataFrame,
14081533

14091534
# Wrap x-tick labels for readability
14101535
wrapped_labels = [textwrap.fill(str(label), 20) for label in labels]
1411-
ax.set_xticklabels(wrapped_labels, rotation=45, ha='right', fontsize='small')
1536+
ax.set_xticklabels(wrapped_labels, rotation=45, ha='right', fontsize=_tick_fontsize)
14121537

14131538
# Period included for plot title and name
14141539
if _year == 'all':
@@ -1419,16 +1544,29 @@ def do_stacked_bar_plot_of_cost_by_category(_df: pd.DataFrame,
14191544
period = (f"{min(_year)} - {max(_year)}")
14201545

14211546
# Save plot
1422-
plt.xlabel('Scenario')
1423-
plt.ylabel('Cost (2023 USD), millions')
1547+
plt.xlabel('Scenario', fontsize = _tick_fontsize, fontweight = 'bold')
1548+
plt.ylabel('Cost (2023 USD), millions', fontsize = _tick_fontsize, fontweight = 'bold')
14241549

14251550
# Arrange the legend in the same ascending order
14261551
handles, labels = plt.gca().get_legend_handles_labels()
1427-
plt.legend(handles[::-1], labels[::-1], bbox_to_anchor=(1.05, 0.7), loc='center left', fontsize='small')
1552+
if _legend_label_map is not None:
1553+
labels = [
1554+
_legend_label_map.get(label, label)
1555+
for label in labels
1556+
]
1557+
1558+
plt.legend(
1559+
handles[::-1],
1560+
labels[::-1],
1561+
bbox_to_anchor=(1.05, 0.7),
1562+
loc='center left',
1563+
fontsize=_tick_fontsize
1564+
)
14281565

14291566
# Extend the y-axis by 25%
14301567
max_y = ax.get_ylim()[1]
14311568
ax.set_ylim(0, max_y * 1.25)
1569+
ax.tick_params(axis='y', labelsize=_tick_fontsize)
14321570

14331571
# Save the plot with tight layout
14341572
plt.tight_layout(pad=2.0) # Ensure there is enough space for the legend
@@ -1712,9 +1850,17 @@ def wrap_text(text, width=15):
17121850
if _draw is not None:
17131851
_df = _df[_df.draw == _draw]
17141852

1853+
if _year != 'all':
1854+
_df = _df[_df['year'].isin(_year)]
1855+
1856+
if 'mean' in _df.stat.unique():
1857+
_df = _df[_df['stat'] == 'mean']
1858+
else:
1859+
_df = _df[_df['stat'] == 'median']
1860+
17151861
# Remove non-specific subgroup for consumables
17161862
if _cost_category == 'medical consumables':
1717-
_df = _df[~(_df.cost_subgroup == 'supply chain (all consumables)')]
1863+
_df = _df[~(_df.cost_subgroup.str.contains('all consumables'))] # These are supply chain costs
17181864

17191865
# Create summary dataframe for treemap
17201866
_df = _df.groupby('cost_subgroup')['cost'].sum().reset_index()

0 commit comments

Comments
 (0)