amr/temp2.txt at main · UCL/amr · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444

      return _canonicalize_bacteria_slug(slug)


  def _normalize_drug_slug(name: str) -> str:
      slug = _slugify_value(name)
      return DRUG_SLUG_NORMALIZATION_OVERRIDES.get(slug, slug)


  def _compute_population_scale(year_df: pd.DataFrame, world_population:
Optional[float]) -> float:
      if world_population is None or world_population <= 0:
          return 1.0
      if "total_population" not in year_df:
          return 1.0

      avg_population = year_df["total_population"].mean(skipna=True)
      if pd.isna(avg_population) or avg_population <= 0:
          return 1.0

      return float(world_population / avg_population)


  # Reserve drugs matching the Rust config carbapenem_reserve_drugs list
  RESERVE_DRUG_SLUGS = [
      "meropenem", "meropenem_vaborbactam", "imipenem_c", "ertapenem",
      "colistin", "linezolid", "tedizolid", "quinu_dalfo", "dalbavancin"
  ]


  def _calculate_reserve_drug_stats(year_df: pd.DataFrame) -> Dict[str,
Optional[float]]:
      """Calculate reserve/carbapenem drug usage as percentage of all
antibiotic usage.

      Returns dict with:
          - reserve_drug_share_percent: % of total drug usage from reserve
drugs
          - reserve_drug_users_mean: mean daily count of people on reserve
drugs
          - total_drug_users_mean: mean daily count of people on any drug
      """
      result: Dict[str, Optional[float]] = {
          "reserve_drug_share_percent": None,
          "reserve_drug_users_mean": None,
          "total_drug_users_mean": None,
      }

      if year_df.empty:
          return result

      # Get total drug usage
      total_on_drug_series = year_df.get("currently_taking_drug_count")
      if total_on_drug_series is None or total_on_drug_series.empty:
          return result

      total_mean = float(total_on_drug_series.mean(skipna=True))
      if pd.isna(total_mean) or total_mean <= 0:
          return result

      result["total_drug_users_mean"] = total_mean

      # Sum reserve drug usage
      reserve_total = 0.0
      for drug_slug in RESERVE_DRUG_SLUGS:
          col_name = f"{drug_slug}_currently_on_drug"
          if col_name in year_df.columns:
              drug_mean = year_df[col_name].mean(skipna=True)
              if not pd.isna(drug_mean):
                  reserve_total += float(drug_mean)

      result["reserve_drug_users_mean"] = reserve_total
      result["reserve_drug_share_percent"] = (reserve_total / total_mean) *
100.0

      return result


  def _build_headline_table(
      df: pd.DataFrame,
      year_df: pd.DataFrame,
      targets: CalibrationTargets,
      scale_factor: float,
      window_years: float,
  ) -> pd.DataFrame:
      annualization_factor = window_years if np.isfinite(window_years) and
window_years > 0 else 1.0

      def _annualize_sum(value: float) -> float:
          if not np.isfinite(value):
              return value
          return value / annualization_factor

      aggregations: Dict[str, Optional[float]] = {}

      sepsis_deaths_total = _annualize_sum(
          float(year_df.get("deaths_sepsis", pd.Series(dtype=float)).sum())
      )
      inf_deaths_total = _annualize_sum(
          float(year_df.get("deaths_infection_non_sepsis",
pd.Series(dtype=float)).sum())
      )
      total_infection_deaths = sepsis_deaths_total + inf_deaths_total

      scaled_infection_deaths = total_infection_deaths * scale_factor
      aggregations["infection_deaths_millions"] = (
          scaled_infection_deaths / 1e6 if scaled_infection_deaths else 0.0
      )

      # Calculate incident cases of sepsis (summing per-bacteria incident
cases)
      sepsis_inc_cols = [c for c in year_df.columns if
c.endswith("_new_sepsis_cases")]
      if sepsis_inc_cols:
          raw_sepsis_sum = float(year_df[sepsis_inc_cols].sum().sum())
          annualized_sepsis = _annualize_sum(raw_sepsis_sum)
          scaled_sepsis = annualized_sepsis * scale_factor
          aggregations["sepsis_incident_cases_millions"] = scaled_sepsis / 1e6
      else:
          aggregations["sepsis_incident_cases_millions"] = np.nan

      if "currently_taking_drug_count" in year_df:
          people_on_drug =
year_df["currently_taking_drug_count"].mean(skipna=True)
          if pd.isna(people_on_drug):
              aggregations["people_on_antibiotics_millions"] = np.nan
          else:
              scaled_people_on_drug = float(people_on_drug) * scale_factor
              aggregations["people_on_antibiotics_millions"] =
scaled_people_on_drug / 1e6
      else:
          aggregations["people_on_antibiotics_millions"] = np.nan

      if {"newly_infected_count",
"total_population"}.issubset(year_df.columns):
          total_new_infections =
_annualize_sum(float(year_df["newly_infected_count"].sum()))
          avg_population = float(year_df["total_population"].mean())
          incidence = _safe_divide(total_new_infections, avg_population)
          aggregations["annual_infection_incidence_percent"] = (incidence *
100.0) if incidence is not None else None
      else:
          aggregations["annual_infection_incidence_percent"] = None

      rows = []
      for item in targets.headline_metrics:
          key = item.get("key")
          if key is None:
              continue

          sim_value = aggregations.get(key)
          target_value = item.get("target")
          delta = _format_delta(sim_value, target_value if
isinstance(target_value, (int, float)) else None)

          rows.append({
              "Metric": item.get("label", key),
              "Simulation": sim_value,
              "Target": target_value,
              "Delta": delta,
              "Unit": item.get("unit"),
          })

      return pd.DataFrame(rows)


  def _load_bacteria_drug_matrix(
      path: Path,
      dot_reason: Optional[str] = None,
  ) -> pd.DataFrame:
      if path is None or not path.exists():
          return pd.DataFrame(columns=["Bacteria", "drug", "target_raw",
"target", "reason", "bacteria_slug", "drug_slug"])

      df = pd.read_csv(path)
      if df.empty:
          return pd.DataFrame(columns=["Bacteria", "drug", "target_raw",
"target", "reason", "bacteria_slug", "drug_slug"])

      # Drop metadata columns before melting (these are not drugs)
      metadata_columns = ["notes", "Notes", "NOTES", "note", "Note"]
      df = df.drop(columns=[col for col in metadata_columns if col in
df.columns], errors="ignore")

      df = df.melt(id_vars="Bacteria", var_name="drug",
value_name="target_raw")
      df["target"] = pd.to_numeric(df["target_raw"], errors="coerce")
      df["reason"] = ""

      if dot_reason:
          dot_mask = df["target_raw"].astype(str).str.strip() == "."
          df.loc[dot_mask, "reason"] = dot_reason

      df["bacteria_slug"] = df["Bacteria"].apply(_slugify_bacteria_value)
      df["drug_slug"] = df["drug"].apply(_slugify_value)
      return df


  def _load_bacteria_metric_values(
      path: Optional[Path],
      value_column: str,
  ) -> pd.DataFrame:
      columns = ["Bacteria", "value", "notes", "bacteria_slug"]
      if path is None or not path.exists():
          return pd.DataFrame(columns=columns)

      df = pd.read_csv(path)
      if df.empty or value_column not in df.columns:
          return pd.DataFrame(columns=columns)

      metric_df = pd.DataFrame({
          "Bacteria": df["Bacteria"],
          "value": pd.to_numeric(df[value_column], errors="coerce"),
          "notes": df.get("notes"),
      })
      metric_df["bacteria_slug"] =
metric_df["Bacteria"].apply(_slugify_bacteria_value)
      return metric_df


  def _extract_bacteria_and_drugs(df: pd.DataFrame) -> Tuple[set[str],
set[str]]:
      bacteria = {
          col.replace("_currently_infected", "")
          for col in df.columns
          if col.endswith("_currently_infected")
      }
      drugs = {
          col.replace("_currently_on_drug", "")
          for col in df.columns
          if col.endswith("_currently_on_drug")
      }
      return bacteria, drugs


  def _compute_resistance_stats(
      frame: pd.DataFrame,
      infected_col: str,
      positive_count_col: str,
  ) -> Optional[Tuple[float, float]]:
      required = {infected_col, positive_count_col}
      if frame.empty or any(col not in frame for col in required):
          return None

      infected_series = frame[infected_col].astype(float)
      positive_series = frame[positive_count_col].astype(float)

      mask = infected_series > 0
      if not mask.any():
          return (np.nan, 0.0)

      total_infected = float(infected_series[mask].sum())
      if total_infected <= 0:
          return (np.nan, 0.0)

      total_positive = float(positive_series[mask].sum())
      prevalence = total_positive / total_infected
      percent = float(np.clip(prevalence, 0.0, 1.0) * 100.0)
      return (percent, total_infected)


  def _compute_average_resistant_stats(
      frame: pd.DataFrame,
      sum_any_col: str,
      positive_count_col: str,
  ) -> Optional[Tuple[float, float, bool]]:
      if frame.empty or sum_any_col not in frame:
          return None

      sum_any_series = frame[sum_any_col].astype(float)
      positive_series = (
          frame[positive_count_col].astype(float)
          if positive_count_col in frame
          else pd.Series(0.0, index=sum_any_series.index)
      )

      sum_values = sum_any_series.to_numpy(dtype=float)
      positive_values = positive_series.to_numpy(dtype=float)

      # Use reported positive counts when available; fall back to the summed
any_r values
      # so that ratios remain within [0, 1] even if the count columns are zero
or missing.
      denominators = np.where(positive_values > 0.0, positive_values, 0.0)
      fallback_mask = (denominators <= 0.0) & (sum_values > 0.0)
      fallback_used = bool(fallback_mask.any())
      if fallback_used:
          denominators = denominators.copy()
          denominators[fallback_mask] = sum_values[fallback_mask]

      valid_mask = denominators > 0.0
      if not np.any(valid_mask):
          return (np.nan, 0.0, fallback_used)

      total_any = float(sum_values[valid_mask].sum())
      total_denominator = float(denominators[valid_mask].sum())
      if total_denominator <= 0.0:
          return (np.nan, 0.0, fallback_used)

      mean_resistant = total_any / total_denominator
      mean_resistant = float(np.clip(mean_resistant, 0.0, 1.0))
      percent = mean_resistant * 100.0
      return (percent, total_denominator, fallback_used)


  def _compute_microbiome_stats(
      frame: pd.DataFrame,
      presence_col: str,
      resistant_col: str,
  ) -> Optional[Tuple[float, float]]:
      if frame.empty or presence_col not in frame or resistant_col not in
frame:
          return None

      presence_series = frame[presence_col]
      total_presence = float(presence_series.sum(skipna=True))
      if total_presence <= 0:
          return (np.nan, 0.0)

      resistant_series = frame[resistant_col]
      total_resistant = float(resistant_series.sum(skipna=True))
      share = total_resistant / total_presence
      percent = float(share * 100.0)
      return (percent, total_presence)


> def _calculate_resistance_table(
      df: pd.DataFrame,
      year_df: pd.DataFrame,
      expanded_df: pd.DataFrame,
      resistance_targets: pd.DataFrame,
      *,
      average_targets: Optional[pd.DataFrame] = None,
      microbiome_targets: Optional[pd.DataFrame] = None,
      window_label: Optional[str] = None,
      expanded_label: Optional[str] = None,
      low_sample_threshold: float = 50.0,
  ) -> pd.DataFrame:
      columns = [
          "Bacteria",
          "Drug",
          RESISTANCE_SIM_COL,
          RESISTANCE_TARGET_COL,
          RESISTANCE_DELTA_COL,
          "Average resistant simulation",
          "Average resistant target",
          "Average resistant delta",
          "Microbiome simulation",
          "Microbiome target",
          "Microbiome delta",
          "Infection resistance simulation source: Community (%)",
          "Infection resistance simulation source: HGT (%)",
          "Infection resistance simulation source: Microbiome (%)",
          "Infection resistance simulation source: De Novo (%)",
          "Microbiome HGT Events (Asymptomatic)",
          "Infected person-days",
          "Resistant person-days",
          "Microbiome carrier-days",
          "Note",
      ]
      if resistance_targets is None or resistance_targets.empty:
          resistance_targets = pd.DataFrame(columns=["Bacteria", "drug",
"target", "reason", "bacteria_slug", "drug_slug"])

      if average_targets is None or average_targets.empty:
          average_targets = pd.DataFrame(columns=["Bacteria", "drug",
"target", "reason", "bacteria_slug", "drug_slug"])

      if microbiome_targets is None or microbiome_targets.empty:
          microbiome_targets = pd.DataFrame(columns=["Bacteria", "drug",
"target", "reason", "bacteria_slug", "drug_slug"])

      if resistance_targets.empty and average_targets.empty and
microbiome_targets.empty:
          return pd.DataFrame(columns=columns)

      bacteria_set, drug_set = _extract_bacteria_and_drugs(df)

      combo_display: Dict[Tuple[str, str], Tuple[str, str]] = {}
      prevalence_lookup: Dict[Tuple[str, str], Tuple[Optional[float], str]] =
{}
      average_lookup: Dict[Tuple[str, str], Optional[float]] = {}
      microbiome_lookup: Dict[Tuple[str, str], Optional[float]] = {}

      for _, row in resistance_targets.iterrows():
          key = (row["bacteria_slug"], row["drug_slug"])
          if key not in combo_display:
              combo_display[key] = (row.get("Bacteria", key[0]),
row.get("drug", key[1]))
          prevalence_lookup[key] = (row.get("target"), str(row.get("reason")
or ""))

      for _, row in average_targets.iterrows():
          key = (row["bacteria_slug"], row["drug_slug"])
          if key not in combo_display:
              combo_display[key] = (row.get("Bacteria", key[0]),
row.get("drug", key[1]))
          average_lookup[key] = row.get("target")

      for _, row in microbiome_targets.iterrows():
          key = (row["bacteria_slug"], row["drug_slug"])
          if key not in combo_display:
              combo_display[key] = (row.get("Bacteria", key[0]),
row.get("drug", key[1]))
          microbiome_lookup[key] = row.get("target")

      combo_keys: Set[Tuple[str, str]] = set(combo_display.keys()) |
set(prevalence_lookup.keys()) | set(average_lookup.keys()) |
set(microbiome_lookup.keys())

      def _sort_key(item: Tuple[str, str]) -> Tuple[str, str]:
          display = combo_display.get(item, item)
          return (str(display[0]).lower(), str(display[1]).lower())

      records = []
      for b_slug, d_slug in sorted(combo_keys, key=_sort_key):
          bacteria_name, drug_name = combo_display.get((b_slug, d_slug),
(b_slug.replace("_", " "), d_slug.replace("_", " ")))

          note_parts = []

          prevalence_target_raw, prevalence_reason =
prevalence_lookup.get((b_slug, d_slug), (np.nan, ""))
          if prevalence_reason:
              note_parts.append(prevalence_reason)
          prevalence_target = (
              float(prevalence_target_raw * 100.0)
              if prevalence_target_raw is not None and not
pd.isna(prevalence_target_raw)
              else np.nan
          )

          average_target_raw = average_lookup.get((b_slug, d_slug))
          average_target = (
              float(average_target_raw * 100.0)
              if average_target_raw is not None and not
pd.isna(average_target_raw)
              else np.nan
          )

          microbiome_target_raw = microbiome_lookup.get((b_slug, d_slug))
          microbiome_target = (