bayesflow-org
diff --git a/‎bayesflow/diagnostics/plots/__init__.py‎
Lines changed: 1 addition & 0 deletions b/‎bayesflow/diagnostics/plots/__init__.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎bayesflow/diagnostics/plots/coverage.py‎
Lines changed: 287 additions & 0 deletions b/‎bayesflow/diagnostics/plots/coverage.py‎
Lines changed: 287 additions & 0 deletions
diff --git a/‎bayesflow/utils/__init__.py‎
Lines changed: 1 addition & 0 deletions b/‎bayesflow/utils/__init__.py‎
Lines changed: 1 addition & 0 deletions
@@ -1,6 +1,7 @@
 from .calibration_ecdf import calibration_ecdf
 from .calibration_ecdf_from_quantiles import calibration_ecdf_from_quantiles
 from .calibration_histogram import calibration_histogram
+from .coverage import coverage, coverage_diff
 from .loss import loss
 from .mc_calibration import mc_calibration
 from .mc_confusion_matrix import mc_confusion_matrix
 
@@ -0,0 +1,287 @@
+from collections.abc import Sequence, Mapping
+
+import matplotlib.pyplot as plt
+import numpy as np
+
+from bayesflow.utils import prepare_plot_data, add_titles_and_labels, prettify_subplots, compute_empirical_coverage
+
+
+def coverage(
+    estimates: Mapping[str, np.ndarray] | np.ndarray,
+    targets: Mapping[str, np.ndarray] | np.ndarray,
+    variable_keys: Sequence[str] = None,
+    variable_names: Sequence[str] = None,
+    figsize: Sequence[int] = None,
+    label_fontsize: int = 16,
+    title_fontsize: int = 18,
+    tick_fontsize: int = 12,
+    color: str = "#132a70",
+    num_col: int = None,
+    num_row: int = None,
+) -> plt.Figure:
+    """
+    Creates coverage plots showing empirical coverage of posterior credible intervals.
+
+    The empirical coverage shows the coverage (proportion of true variable values that fall within the interval)
+    of the central posterior credible intervals.
+    A well-calibrated model would have coverage exactly match interval width (i.e. 95%
+    credible interval contains the true value 95% of the time) as shown by the diagonal line.
+
+    The coverage is accompanied by credible intervals for the coverage (gray ribbon).
+    These are computed via the (conjugate) Beta-Binomial model for binomial proportions with a uniform prior.
+
+    Parameters
+    ----------
+    estimates : np.ndarray of shape (num_datasets, num_post_draws, num_params)
+        The posterior draws obtained from num_datasets
+    targets : np.ndarray of shape (num_datasets, num_params)
+        The true parameter values used for generating num_datasets
+    variable_keys : list or None, optional, default: None
+        Select keys from the dictionaries provided in estimates and targets.
+        By default, select all keys.
+    variable_names : list or None, optional, default: None
+        The parameter names for nice plot titles. Inferred if None
+    figsize : tuple or None, optional, default: None
+        The figure size passed to the matplotlib constructor. Inferred if None.
+    label_fontsize : int, optional, default: 16
+        The font size of the y-label and x-label text
+    title_fontsize : int, optional, default: 18
+        The font size of the title text
+    tick_fontsize : int, optional, default: 12
+        The font size of the axis ticklabels
+    color : str, optional, default: '#132a70'
+        The color for the coverage line
+    num_row : int, optional, default: None
+        The number of rows for the subplots. Dynamically determined if None.
+    num_col : int, optional, default: None
+        The number of columns for the subplots. Dynamically determined if None.
+
+    Returns
+    -------
+    f : plt.Figure - the figure instance for optional saving
+
+    Raises
+    ------
+    ShapeError
+        If there is a deviation from the expected shapes of ``estimates`` and ``targets``.
+
+    """
+
+    # Gather plot data and metadata into a dictionary
+    plot_data = prepare_plot_data(
+        estimates=estimates,
+        targets=targets,
+        variable_keys=variable_keys,
+        variable_names=variable_names,
+        num_col=num_col,
+        num_row=num_row,
+        figsize=figsize,
+    )
+
+    estimates = plot_data.pop("estimates")
+    targets = plot_data.pop("targets")
+
+    # Determine widths to compute coverage for
+    num_draws = estimates.shape[1]
+    widths = np.arange(0, num_draws + 2) / (num_draws + 1)
+
+    # Compute empirical coverage with default parameters
+    coverage_data = compute_empirical_coverage(
+        estimates=estimates,
+        targets=targets,
+        widths=widths,
+        prob=0.95,
+        interval_type="central",
+    )
+
+    # Plot coverage for each parameter
+    for i, ax in enumerate(plot_data["axes"].flat):
+        if i >= plot_data["num_variables"]:
+            break
+
+        width_rep = coverage_data["width_represented"][:, i]
+        coverage_est = coverage_data["coverage_estimates"][:, i]
+        coverage_low = coverage_data["coverage_lower"][:, i]
+        coverage_high = coverage_data["coverage_upper"][:, i]
+
+        # Plot confidence ribbon
+        ax.fill_between(
+            width_rep,
+            coverage_low,
+            coverage_high,
+            color="grey",
+            alpha=0.33,
+            label="95% Credible Interval",
+        )
+
+        # Plot ideal coverage line (y = x)
+        ax.plot([0, 1], [0, 1], color="skyblue", linewidth=2.0, label="Ideal Coverage")
+
+        # Plot empirical coverage
+        ax.plot(width_rep, coverage_est, color=color, alpha=1.0, label="Empirical Coverage")
+
+        # Set axis limits
+        ax.set_xlim(0, 1)
+        ax.set_ylim(0, 1)
+
+        # Add legend to first subplot
+        if i == 0:
+            ax.legend(fontsize=tick_fontsize, loc="upper left")
+
+    prettify_subplots(plot_data["axes"], num_subplots=plot_data["num_variables"], tick_fontsize=tick_fontsize)
+
+    # Add labels, titles, and set font sizes
+    add_titles_and_labels(
+        axes=plot_data["axes"],
+        num_row=plot_data["num_row"],
+        num_col=plot_data["num_col"],
+        title=plot_data["variable_names"],
+        xlabel="Central interval width",
+        ylabel="Observed coverage",
+        title_fontsize=title_fontsize,
+        label_fontsize=label_fontsize,
+    )
+
+    plot_data["fig"].tight_layout()
+    return plot_data["fig"]
+
+
+def coverage_diff(
+    estimates: Mapping[str, np.ndarray] | np.ndarray,
+    targets: Mapping[str, np.ndarray] | np.ndarray,
+    variable_keys: Sequence[str] = None,
+    variable_names: Sequence[str] = None,
+    figsize: Sequence[int] = None,
+    label_fontsize: int = 16,
+    title_fontsize: int = 18,
+    tick_fontsize: int = 12,
+    color: str = "#132a70",
+    num_col: int = None,
+    num_row: int = None,
+) -> plt.Figure:
+    """
+    Creates coverage difference plots showing the difference between empirical coverage
+    and ideal coverage of posterior credible intervals.
+
+    This plot shows coverage - width, making deviations from ideal calibration
+    more visible than the standard coverage plot.
+    For more details, see the documentation of the standard coverage plot.
+
+    Parameters
+    ----------
+    estimates : np.ndarray of shape (num_datasets, num_post_draws, num_params)
+        The posterior draws obtained from num_datasets
+    targets : np.ndarray of shape (num_datasets, num_params)
+        The true parameter values used for generating num_datasets
+    variable_keys : list or None, optional, default: None
+        Select keys from the dictionaries provided in estimates and targets.
+        By default, select all keys.
+    variable_names : list or None, optional, default: None
+        The parameter names for nice plot titles. Inferred if None
+    figsize : tuple or None, optional, default: None
+        The figure size passed to the matplotlib constructor. Inferred if None.
+    label_fontsize : int, optional, default: 16
+        The font size of the y-label and x-label text
+    title_fontsize : int, optional, default: 18
+        The font size of the title text
+    tick_fontsize : int, optional, default: 12
+        The font size of the axis ticklabels
+    color : str, optional, default: '#132a70'
+        The color for the coverage difference line
+    num_row : int, optional, default: None
+        The number of rows for the subplots. Dynamically determined if None.
+    num_col : int, optional, default: None
+        The number of columns for the subplots. Dynamically determined if None.
+
+    Returns
+    -------
+    f : plt.Figure - the figure instance for optional saving
+
+    Raises
+    ------
+    ShapeError
+        If there is a deviation from the expected shapes of ``estimates`` and ``targets``.
+
+    """
+
+    # Gather plot data and metadata into a dictionary
+    plot_data = prepare_plot_data(
+        estimates=estimates,
+        targets=targets,
+        variable_keys=variable_keys,
+        variable_names=variable_names,
+        num_col=num_col,
+        num_row=num_row,
+        figsize=figsize,
+    )
+
+    estimates = plot_data.pop("estimates")
+    targets = plot_data.pop("targets")
+
+    # Determine widths to compute coverage for
+    num_draws = estimates.shape[1]
+    widths = np.arange(0, num_draws + 2) / (num_draws + 1)
+
+    # Compute empirical coverage with default parameters
+    coverage_data = compute_empirical_coverage(
+        estimates=estimates,
+        targets=targets,
+        widths=widths,
+        prob=0.95,
+        interval_type="central",
+    )
+
+    # Plot coverage difference for each parameter
+    for i, ax in enumerate(plot_data["axes"].flat):
+        if i >= plot_data["num_variables"]:
+            break
+
+        width_rep = coverage_data["width_represented"][:, i]
+        coverage_est = coverage_data["coverage_estimates"][:, i]
+        coverage_low = coverage_data["coverage_lower"][:, i]
+        coverage_high = coverage_data["coverage_upper"][:, i]
+
+        # Compute differences
+        diff_est = coverage_est - width_rep
+        diff_low = coverage_low - width_rep
+        diff_high = coverage_high - width_rep
+
+        # Plot confidence ribbon
+        ax.fill_between(
+            width_rep,
+            diff_low,
+            diff_high,
+            color="grey",
+            alpha=0.33,
+            label="95% Credible Interval",
+        )
+
+        # Plot ideal coverage difference line (y = 0)
+        ax.axhline(y=0, color="skyblue", linewidth=2.0, label="Ideal Coverage")
+
+        # Plot empirical coverage difference
+        ax.plot(width_rep, diff_est, color=color, alpha=1.0, label="Coverage Difference")
+
+        # Set axis limits
+        ax.set_xlim(0, 1)
+
+        # Add legend to first subplot
+        if i == 0:
+            ax.legend(fontsize=tick_fontsize, loc="upper right")
+
+    prettify_subplots(plot_data["axes"], num_subplots=plot_data["num_variables"], tick_fontsize=tick_fontsize)
+
+    # Add labels, titles, and set font sizes
+    add_titles_and_labels(
+        axes=plot_data["axes"],
+        num_row=plot_data["num_row"],
+        num_col=plot_data["num_col"],
+        title=plot_data["variable_names"],
+        xlabel="Central interval width",
+        ylabel="Coverage difference",
+        title_fontsize=title_fontsize,
+        label_fontsize=label_fontsize,
+    )
+
+    plot_data["fig"].tight_layout()
+    return plot_data["fig"]
@@ -71,6 +71,7 @@
     prettify_subplots,
     make_quadratic,
     add_metric,
+    compute_empirical_coverage,
 )
 from .serialization import serialize_value_or_type, deserialize_value_or_type
Original file line number	Diff line number	Diff line change
`@@ -71,6 +71,7 @@`
`71`	`71`	`prettify_subplots,`
`72`	`72`	`make_quadratic,`
`73`	`73`	`add_metric,`
	`74`	`+ compute_empirical_coverage,`
`74`	`75`	`)`
`75`	`76`	`from .serialization import serialize_value_or_type, deserialize_value_or_type`
`76`	`77`