CDCgov · KOVALW · Feb 28, 2026 · Mar 3, 2026 · Mar 3, 2026 · Mar 5, 2026
diff --git a/.gitignore b/.gitignore
@@ -1,4 +1,5 @@
 **/profiling.json
+experiments/**/output/
 
 # Data
 *.csv

diff --git a/experiments/phase1/README.md b/experiments/phase1/README.md
@@ -0,0 +1,21 @@
+# Running the calibration routine
+In order to generate the results analysis report from the `reports/` directory, first calibrate the model by using the phase 1 calibration script. Ensure that the uv environment is synced and the rust binaries have been assembled
+```
+uv sync --all-packages
+uv run cargo build -r
+uv run python scripts/phase_1_calibration.py
+```
+
+Then, to render the analysis report, ensure that `tinytex` is installed with
+
+```
+quarto install tinytex
+```
+
+and then render the document using
+
+```
+uv run quarto render experiments/phase1/reports/calibration.qmd
+```
+
+The resulting file should be a PDF in the reports directory.
diff --git a/experiments/phase1/input/default_params.json b/experiments/phase1/input/default_params.json
@@ -0,0 +1,55 @@
+{
+    "epimodel.GlobalParams": {
+        "seed": 1234,
+        "max_time": 100.0,
+        "synth_population_file": "input/people_test.csv",
+        "initial_prevalence": 0.0,
+        "imported_cases_timeseries": {
+            "include": true,
+            "filename": "./experiments/phase1/calibration/output/importation_timeseries.csv"
+        },
+        "infectiousness_rate_fn": {"Constant": {
+                "rate": 1.0,
+                "duration": 5.0
+            }
+        },
+        "probability_mild_given_infect": 0.7,
+        "infect_to_mild_mu": 0.1,
+        "infect_to_mild_sigma": 0.0,
+        "probability_severe_given_mild": 0.2,
+        "mild_to_severe_mu": 0.1,
+        "mild_to_severe_sigma": 0.1,
+        "mild_to_resolved_mu": 0.1,
+        "mild_to_resolved_sigma": 0.1,
+        "probability_critical_given_severe": 0.2,
+        "severe_to_critical_mu": 0.1,
+        "severe_to_critical_sigma": 0.1,
+        "severe_to_resolved_mu": 0.1,
+        "severe_to_resolved_sigma": 0.1,
+        "probability_dead_given_critical": 0.2,
+        "critical_to_dead_mu": 0.1,
+        "critical_to_dead_sigma": 0.1,
+        "critical_to_resolved_mu": 0.1,
+        "critical_to_resolved_sigma": 0.1,
+        "settings_properties": {"Home": {"alpha": 0.0},
+                            "Workplace": {"alpha": 0.0},
+                            "School": {"alpha": 0.0},
+                            "CensusTract": {"alpha": 0.0}},
+        "itinerary_ratios": {"Home": 0.25, "Workplace": 0.25, "School": 0.25, "CensusTract": 0.25},
+        "prevalence_report": {
+            "write": true,
+            "filename": "person_property_count.csv",
+            "period": 1.0
+        },
+        "incidence_report": {
+            "write": true,
+            "filename": "incidence_report.csv",
+            "period": 1.0
+        },
+        "transmission_report": {
+            "write": false,
+            "filename": "transmission_report.csv"
+        },
+        "first_death_terminates_run": false
+    }
+}
diff --git a/experiments/phase1/input/priors.json b/experiments/phase1/input/priors.json
@@ -0,0 +1,32 @@
+{
+    "priors": {
+        "symptomatic_reporting_prob": {
+            "distribution": "beta",
+            "parameters": {
+                "alpha": 5,
+                "beta": 5
+            }
+        },
+        "settings_properties.Home.alpha": {
+            "distribution": "uniform",
+            "parameters": {
+                "min": 0.0,
+                "max": 1.0
+            }
+        },
+        "probability_mild_given_infect": {
+            "distribution": "beta",
+            "parameters": {
+                "alpha": 7,
+                "beta": 3
+            }
+        },
+        "infectiousness_rate_fn.Constant.rate": {
+            "distribution": "uniform",
+            "parameters": {
+                "min": 0.1,
+                "max": 1.2
+            }
+        }
+    }
+}
diff --git a/experiments/phase1/reports/calibration.qmd b/experiments/phase1/reports/calibration.qmd
@@ -0,0 +1,258 @@
+---
+title: "Phase I calibration"
+date: "2026-03-09"
+format: pdf
+---
+
+# Overview
+In this phase, we aim to calibrate the model to the timing of the first reported death due to COVID-19 in the state of Indiana. The first reported death in Indiana occurred on March 16th, 2020, 10 days after the first confirmed case.
+
+# Results
+```{python}
+#| echo: false
+import pickle
+from calibrationtools.calibration_results import CalibrationResults, Particle
+from pathlib import Path
+import os
+import seaborn as sns
+import matplotlib.pyplot as plt
+import numpy as np
+import polars as pl
+import json
+from calibrationtools import ParticleReader
+import tempfile
+import polars as pl
+import os
+from ixa_epi_covid import CovidModel
+from mrp.api import apply_dict_overrides
+
+
+os.chdir('../../../')
+
+wd = Path("experiments", "phase1")
+
+# Load results object from the calibration directory
+with open(wd / "calibration" / "output" / "results.pkl", "rb") as fp:
+    results: CalibrationResults = pickle.load(fp)
+```
+
+
+Quantiles for each fitted parameter
+```{python}
+#| echo: false
+diagnostics = results.get_diagnostics()
+print(
+    json.dumps(
+        {
+            k1: {k2: np.format_float_positional(v2, precision=3) for k2, v2 in v1.items()}
+            for k1, v1 in diagnostics["quantiles"].items()
+        },
+        indent=4,
+    )
+)
+```
+
+Histograms of posterior samples compared to the probability density of each prior. For each plot, the prior is plotted as a blue line and the histogram of the posterior samples is plotted in orange with a kernel density estimator overlay. To generate the histogram, we sample $n$ particles from the posterior population, where $n$ is the effective sample size of the population. Re-sampling particles in this manner reflects the posterior weight distribution, the probability mass function over accepted particles determined by the ABC-SMC algorithm perturbation kernel and prior distributions.
+
+```{python}
+#| echo: false
+#| eval: false
+# Preferred API for development in calibrationtools----------
+for param in results.fitted_params:
+    # get values and use weights to alter histogram instead of sampling with ESS
+    values = results.posterior_particles.get_values_of(param)
+    sns.histplot(values, weights=results.posterior_particles.weights)
+
+    # Method to obtain the marginal prior and call PDF on those values for certain evaluation list
+    marginal_prior = results.get_marginal_prior(param)
+    eval_points = np.arange(min(values) - np.var(values), max(values) + np.var(values), 0.01)
+    prior_density = marginal_prior.probability_density(eval_points)
+    sns.lineplot(
+        data = pl.DataFrame({
+            param: list(eval_points),
+            'density': prior_density
+        })
+    )
+    plt.title(f"Posterior versus prior distribution")
+    plt.xlabel(" ".join(param.split(".")))
+    plt.ylabel("Density")
+    plt.tight_layout()
+    plt.show()
+
+```
+```{python}
+#| echo: false
+posterior_samples = results.sample_posterior_particles(n=int(results.ess))
+
+for param in results.fitted_params:
+    vals = [p[param] for p in posterior_samples]
+    min_val = min(vals)
+    max_val = max(vals)
+
+    sns.histplot(x=vals, stat="density", kde=True, color='orange', edgecolor='black')
+    eval_points = np.arange(
+        min_val - np.var(vals), max_val + np.var(vals), 0.01
+    )
+    param_prior = None
+    for prior in results.priors.priors:
+        if prior.param == param:
+            param_prior = prior
+            break
+    if not param_prior:
+        raise (ValueError, f"Could not find prior {param}")
+
+    density_vals = [
+        param_prior.probability_density(Particle({param: v}))
+        for v in eval_points
+    ]
+
+    sns.lineplot(
+        data=pl.DataFrame({param: list(eval_points), "density": density_vals}),
+        x=param,
+        y="density",
+    )
+    plt.title(f"Posterior versus prior distribution")
+    plt.xlabel(" ".join(param.split(".")))
+    plt.ylabel("Density")
+    plt.tight_layout()
+    plt.show()
+
+```
+```{python}
+#| echo: false
+## Obtaining the importation time series and death incidence data frames for a random smaple form the posterior
+# Re-generating a random sample of parameter sets from posterior
+particle_count = int(min(100, results.ess))
+particles = results.sample_posterior_particles(n=particle_count)
+default_params_file = wd / 'input' / 'default_params.json'
+
+with open(default_params_file, "rb") as fp:
+    default_params = json.load(fp)
+
+ixa_overrides = {
+    "synth_population_file": "/mnt/S_CFA_Predict/team-CMEI/synthetic_populations/cbsa_all_work_school_household_2020-04-24/cbsa_all_work_school_household/IN/Bloomington IN.csv",
+    "imported_cases_timeseries": {
+        "filename": "./experiments/phase1/projection/imported_cases_timeseries.csv"
+    },
+    "max_time": 200
+}
+default_params = apply_dict_overrides(default_params, {'epimodel.GlobalParams': ixa_overrides})
+
+mrp_defaults = {
+    'ixa_inputs': default_params,
+    "config_inputs": {
+        "exe_file": "./target/release/ixa-epi-covid",
+        "output_dir": "./experiments/phase1/projection/output",
+        "force_overwrite": True,
+        "outputs_to_read": ['prevalence_report', 'imported_cases_timeseries']
+    },
+    "importation_inputs": {
+        "state": "Indiana",
+        "year": 2020,
+        "symptomatic_reporting_prob": 0.5
+    },
+}
+reader = ParticleReader(results.priors.params, mrp_defaults)
+
+uniq_id = 0
+model = CovidModel()
+importation_curves = []
+prevalence_data = []
+os.makedirs(mrp_defaults["config_inputs"]["output_dir"], exist_ok=True)
+
+for p in particles:
+    model_inputs = reader.read_particle(p)
+    outputs = model.simulate(model_inputs)
+
+    importation_curves.append(outputs["imported_cases_timeseries"].with_columns(pl.lit(uniq_id).alias("id")))
+    prevalence_data.append(outputs["prevalence_report"].with_columns(pl.lit(uniq_id).alias("id")))
+    uniq_id += 1
+
+importations = pl.concat(importation_curves)
+all_prevalence_data = pl.concat(prevalence_data)
+deaths = (
+    all_prevalence_data
+    .filter(pl.col("symptom_status") == "Dead")
+    .group_by("t", "id")
+    .agg(pl.sum("count"))
+)
+```
+
+We can show the posterior variance in the imported infections time series by overlaying samples from the simulated particles. Blue points on the plot represent the count of imported infections for a given day in a given simulation if the number of importations was above zero. Black line show the median and 95% credible interval of the number of imported infections on each day from January 1st to March 12th.
+```{python}
+#| echo: false
+sns.scatterplot(
+    data=importations.filter(pl.col('imported_infections') > 0),
+    x="time",
+    y="imported_infections",
+    alpha=0.05,
+)
+sns.lineplot(
+    data=importations,
+    x="time",
+    y="imported_infections",
+    estimator='median'
+)
+plt.title(f"Imported infections over time ({particle_count} posterior samples)")
+plt.xlabel("Time (days since simulation start)")
+plt.ylabel("Number of imported infections (daily)")
+plt.tight_layout()
+plt.show()
+```
+We can project the cumulative number of deaths observed in the model projections. Because the last SMC step has a distance tolerance threshold below one, we see no simulations accrue deaths before the first reported death in the data.
+```{python}
+#| echo: false
+
+sns.lineplot(
+    data=deaths,
+    x="t",
+    y="count",
+    units='id', estimator=None,
+    alpha=0.05
+)
+sns.lineplot(
+    data=deaths,
+    x="t",
+    y="count"
+)
+plt.title(f"Total observed deaths over time ({particle_count} posterior samples)")
+plt.xlabel("Time (days since simulation start)")
+plt.ylabel("Number of deaths")
+plt.axvline(x=75, color="black", linestyle="--", label="First death reported (March 16, 2020)")
+plt.tight_layout()
+plt.show()
+```
+
+Finally, we can plot the same trajectories for number of infections observed in the model over time, which includes both imported infections and locally acquired infections. We see that some epidemic trajectories are strongly delayed from first imported infections due to stochasticity.
+
+```{python}
+#| echo: false
+sir_data = all_prevalence_data.group_by(
+    't', 'infection_status', 'id'
+).agg(
+    pl.sum('count')
+)
+
+g = sns.relplot(
+    data = sir_data,
+    x='t',
+    y='count',
+    kind='line',
+    hue='infection_status',
+    units='id',
+    estimator=None,
+    alpha=0.1,
+    row='infection_status',
+)
+g.fig.suptitle(
+    f"Individuals by infection status over time ({particle_count} posterior samples)",
+    fontsize='x-large',
+    fontweight='bold'
+)
+g.fig.subplots_adjust(top=0.85)
+g.set_axis_labels("Time (days since simulation start)", "Number of people")
+for ax in g.axes.flat:
+    ax.axvline(x=65, color="black", linestyle="--", label="First case reported (March 6, 2020)")
+    ax.axvline(x=75, color="red", linestyle="--", label="First death reported (March 16, 2020)")
+plt.show()
+```