Add coverage to default diagnostics

stefanradev93 · stefanradev93 · commit 3c29d9e1e790 · 2025-10-05T11:33:24.000-04:00
diff --git a/bayesflow/diagnostics/plots/coverage.py b/bayesflow/diagnostics/plots/coverage.py
@@ -14,8 +14,10 @@ def coverage(
     variable_names: Sequence[str] = None,
     figsize: Sequence[int] = None,
     label_fontsize: int = 16,
+    legend_fontsize: int = 14,
     title_fontsize: int = 18,
     tick_fontsize: int = 12,
+    legend_location: str = "upper right",
     color: str = "#132a70",
     num_col: int = None,
     num_row: int = None,
@@ -52,6 +54,8 @@ def coverage(
         The figure size passed to the matplotlib constructor. Inferred if None.
     label_fontsize : int, optional, default: 16
         The font size of the y-label and x-label text
+    legend_fontsize   : int, optional, default: 14
+        The font size of the legend text
     title_fontsize : int, optional, default: 18
         The font size of the title text
     tick_fontsize : int, optional, default: 12
@@ -133,12 +137,6 @@ def coverage(
             # Plot empirical coverage difference
             ax.plot(width_rep, diff_est, color=color, alpha=1.0, label="Coverage Difference")
 
-            # Set axis limits
-            ax.set_xlim(0, 1)
-
-            # Add legend to first subplot
-            if i == 0:
-                ax.legend(fontsize=tick_fontsize, loc="upper right")
         else:
             # Plot confidence ribbon
             ax.fill_between(
@@ -156,13 +154,13 @@ def coverage(
             # Plot empirical coverage
             ax.plot(width_rep, coverage_est, color=color, alpha=1.0, label="Empirical Coverage")
 
-            # Set axis limits
-            ax.set_xlim(0, 1)
-            ax.set_ylim(0, 1)
+        # Set axis limits
+        ax.set_xlim(0, 1)
+        ax.set_ylim(0, 1)
 
-            # Add legend to first subplot
-            if i == 0:
-                ax.legend(fontsize=tick_fontsize, loc="upper left")
+        # Add legend to first subplot
+        if i == 0:
+            ax.legend(fontsize=legend_fontsize, loc=legend_location)
 
     prettify_subplots(plot_data["axes"], num_subplots=plot_data["num_variables"], tick_fontsize=tick_fontsize)
 
diff --git a/bayesflow/workflows/basic_workflow.py b/bayesflow/workflows/basic_workflow.py
@@ -349,8 +349,12 @@ def plot_default_diagnostics(
         - Loss history (if training history is available).
         - Parameter recovery plots.
         - Calibration ECDF plots.
+        - Coverage plots.
         - Z-score contraction plots.
 
+        Caution: For models with many parameters, plotting all marginal diagnostics becomes unwieldy. Consider
+        providing `variables_keyes` for visualizing the diagnostics for subsets of the parameter space.
+
         Parameters
         ----------
         test_data : Mapping[str, np.ndarray] or int
@@ -400,6 +404,7 @@ def plot_default_diagnostics(
         plot_fns = {
             "recovery": bf_plots.recovery,
             "calibration_ecdf": bf_plots.calibration_ecdf,
+            "coverage": bf_plots.coverage,
             "z_score_contraction": bf_plots.z_score_contraction,
         }
 
@@ -499,9 +504,10 @@ def compute_default_diagnostics(
         """
         Computes default diagnostic metrics to evaluate the quality of inference. The function computes several
         diagnostic metrics, including:
-        - Root Mean Squared Error (RMSE)
-        - Posterior contraction
-        - Calibration error
+        - (Normalized) Root Mean Squared Error ((N)RMSE): summarizes the recovery plots
+        - Log-gamma statistic - summarizes the ECDF calibration plots
+        - Expected Calibration Error (ECE) - summarizes the coverage plots
+        - Posterior contraction - partially summarizes the contraction plots
 
         Parameters
         ----------
@@ -553,12 +559,12 @@ def compute_default_diagnostics(
             **kwargs.get("root_mean_squared_error_kwargs", {}),
         )
 
-        contraction = bf_metrics.posterior_contraction(
+        log_gamma = bf_metrics.calibration_log_gamma(
             estimates=samples,
             targets=test_data,
             variable_keys=variable_keys,
             variable_names=variable_names,
-            **kwargs.get("posterior_contraction_kwargs", {}),
+            **kwargs.get("log_gamma_kwargs", {}),
         )
 
         calibration_errors = bf_metrics.calibration_error(
@@ -569,17 +575,26 @@ def compute_default_diagnostics(
             **kwargs.get("calibration_error_kwargs", {}),
         )
 
+        contraction = bf_metrics.posterior_contraction(
+            estimates=samples,
+            targets=test_data,
+            variable_keys=variable_keys,
+            variable_names=variable_names,
+            **kwargs.get("posterior_contraction_kwargs", {}),
+        )
+
         if as_data_frame:
             metrics = pd.DataFrame(
                 {
                     root_mean_squared_error["metric_name"]: root_mean_squared_error["values"],
-                    contraction["metric_name"]: contraction["values"],
+                    log_gamma["metric_name"]: log_gamma["values"],
                     calibration_errors["metric_name"]: calibration_errors["values"],
+                    contraction["metric_name"]: contraction["values"],
                 },
                 index=variable_keys or root_mean_squared_error["variable_names"],
             ).T
         else:
-            metrics = (root_mean_squared_error, contraction, calibration_errors)
+            metrics = (root_mean_squared_error, log_gamma, calibration_errors, contraction)
 
         return metrics