bayesflow-org
diff --git a/‎bayesflow/diagnostics.py‎
Lines changed: 89 additions & 0 deletions b/‎bayesflow/diagnostics.py‎
Lines changed: 89 additions & 0 deletions
diff --git a/‎bayesflow/exceptions.py‎
Lines changed: 6 additions & 1 deletion b/‎bayesflow/exceptions.py‎
Lines changed: 6 additions & 1 deletion
diff --git a/‎bayesflow/trainers.py‎
Lines changed: 68 additions & 1 deletion b/‎bayesflow/trainers.py‎
Lines changed: 68 additions & 1 deletion
@@ -1175,3 +1175,92 @@ def plot_confusion_matrix(
             )
     if title:
         ax.set_title("Confusion Matrix", fontsize=title_fontsize)
+
+
+def plot_mmd_hypothesis_test(mmd_null,
+                             mmd_observed=None,
+                             alpha_level=0.05,
+                             null_color=(0.16407, 0.020171, 0.577478),
+                             observed_color="red",
+                             alpha_color="orange",
+                             truncate_vlines_at_kde=False,
+                             xmin=None,
+                             xmax=None,
+                             bw_factor=1.5):
+    """
+
+    Parameters
+    ----------
+    mmd_null: np.ndarray
+        samples from the MMD sampling distribution under the null hypothesis "the model is well-specified"
+    mmd_observed: float
+        observed MMD value
+    alpha_level: float
+        rejection probability (type I error)
+    null_color: color
+        color for the H0 sampling distribution
+    observed_color: color
+        color for the observed MMD
+    alpha_color: color
+        color for the rejection area
+    truncate_vlines_at_kde: bool
+        true: cut off the vlines at the kde
+        false: continue kde lines across the plot
+    xmin: float
+        lower x axis limit
+    xmax: float
+        upper x axis limit
+    bw_factor: float, default: 1.5
+        bandwidth (aka. smoothing parameter) of the kernel density estimate
+
+    Returns
+    -------
+    f : plt.Figure - the figure instance for optional saving
+
+    """
+
+    def draw_vline_to_kde(x, kde_object, color, label=None, **kwargs):
+        kde_x, kde_y = kde_object.lines[0].get_data()
+        idx = np.argmin(np.abs(kde_x - x))
+        plt.vlines(x=x, ymin=0, ymax=kde_y[idx], color=color, linewidth=3, label=label, **kwargs)
+
+    def fill_area_under_kde(kde_object, x_start, x_end=None, **kwargs):
+        kde_x, kde_y = kde_object.lines[0].get_data()
+        if x_end is not None:
+            plt.fill_between(kde_x, kde_y, where=(kde_x >= x_start) & (kde_x <= x_end),
+                             interpolate=True, **kwargs)
+        else:
+            plt.fill_between(kde_x, kde_y, where=(kde_x >= x_start),
+                             interpolate=True, **kwargs)
+
+    f = plt.figure(figsize=(8, 4))
+
+    kde = sns.kdeplot(mmd_null, fill=False, linewidth=0, bw_adjust=bw_factor)
+    sns.kdeplot(mmd_null, fill=True, alpha=.12, color=null_color, bw_adjust=bw_factor)
+
+    if truncate_vlines_at_kde:
+        draw_vline_to_kde(x=mmd_observed, kde_object=kde, color=observed_color, label=r"Observed data")
+    else:
+        plt.vlines(x=mmd_observed, ymin=0, ymax=plt.gca().get_ylim()[1], color=observed_color, linewidth=3,
+                   label=r"Observed data")
+
+    mmd_critical = np.quantile(mmd_null, 1 - alpha_level)
+    fill_area_under_kde(kde, mmd_critical, color=alpha_color, alpha=0.5, label=fr"{int(alpha_level*100)}% rejection area")
+
+    if truncate_vlines_at_kde:
+        draw_vline_to_kde(x=mmd_critical, kde_object=kde, color=alpha_color)
+    else:
+        plt.vlines(x=mmd_critical, color=alpha_color, linewidth=3, ymin=0, ymax=plt.gca().get_ylim()[1])
+
+    sns.kdeplot(mmd_null, fill=False, linewidth=3, color=null_color, label=r"$H_0$", bw_adjust=bw_factor)
+
+    plt.xlabel(r"MMD", fontsize=20)
+    plt.ylabel("")
+    plt.yticks([])
+    plt.xlim(xmin, xmax)
+    plt.tick_params(axis='both', which='major', labelsize=16)
+
+    plt.legend(fontsize=20)
+    sns.despine()
+
+    return f
@@ -38,7 +38,7 @@ class LossError(Exception):
 
 
 class ShapeError(Exception):
-    """Class for error in expected shappes."""
+    """Class for error in expected shapes."""
 
     pass
 
@@ -61,3 +61,8 @@ class OperationNotSupportedError(Exception):
     """
 
     pass
+
+
+class ArgumentError(Exception):
+    """Class for error that occurs as a result of a function call which is invalid due to the input arguments."""
+    pass
@@ -38,7 +38,7 @@
 from bayesflow.configuration import *
 from bayesflow.default_settings import DEFAULT_KEYS, OPTIMIZER_DEFAULTS
 from bayesflow.diagnostics import plot_latent_space_2d, plot_sbc_histograms
-from bayesflow.exceptions import SimulationError
+from bayesflow.exceptions import SimulationError, ArgumentError
 from bayesflow.helper_classes import (
     EarlyStopper,
     LossHistory,
@@ -49,6 +49,7 @@
 )
 from bayesflow.helper_functions import backprop_step, extract_current_lr, format_loss_string, loss_to_string
 from bayesflow.simulation import GenerativeModel, MultiGenerativeModel
+from bayesflow.computational_utilities import maximum_mean_discrepancy
 
 
 class Trainer:
@@ -1009,6 +1010,72 @@ def train_rounds(
             self.optimizer = None
         return self.loss_history.get_plottable()
 
+    def mmd_hypothesis_test(self,
+                            observed_data,
+                            reference_data=None,
+                            num_reference_simulations=1000,
+                            num_null_samples=100,
+                            bootstrap=False):
+        """
+
+        Parameters
+        ----------
+        observed_data: np.ndarray
+            Observed data, shape (num_observed, ...)
+        reference_data: np.ndarray
+            Reference data representing samples from the "well-specified model", shape (num_reference, ...)
+        num_reference_simulations: int, default: 1000
+            Number of reference simulations (M) simulated from the trainer's generative model
+             if no `reference_data` are provided.
+        num_null_samples: int, default: 100
+            Number of draws from the MMD sampling distribution under the null hypothesis "the trainer's generative
+            model is well-specified"
+        bootstrap: bool, default: False
+            If true, the reference data (see above) are bootstrapped for each sample from the MMD sampling distribution.
+            If false, a new data set is simulated for computing each draw from the MMD sampling distribution.
+
+        Returns
+        -------
+        mmd_null_samples: np.ndarray
+            samples from the H0 sampling distribution ("well-specified model")
+        mmd_observed: float
+            summary MMD estimate for the observed data sets
+        """
+
+        if reference_data is None:
+            if self.generative_model is None:
+                raise ArgumentError("If you do not provide reference data, your trainer must have a generative model!")
+
+            reference_data = self.configurator(self.generative_model(num_reference_simulations))
+
+        if type(reference_data) == dict and 'summary_conditions' in reference_data.keys():
+            reference_summary = self.amortizer.summary_net(reference_data["summary_conditions"])
+        else:
+            reference_summary = self.amortizer.summary_net(reference_data)
+
+        if type(observed_data) == dict and 'summary_conditions' in observed_data.keys():
+            observed_summary = self.amortizer.summary_net(observed_data["summary_conditions"])
+        else:
+            observed_summary = self.amortizer.summary_net(observed_data)
+
+        num_observed = observed_summary.shape[0]
+        num_reference = reference_summary.shape[0]
+
+        mmd_null_samples = np.empty(num_null_samples, dtype=np.float32)
+        for i in tqdm(range(num_null_samples)):
+            if bootstrap:
+                bootstrap_idx = np.random.randint(0, num_reference, size=num_observed)
+                simulated_summary = tf.gather(reference_summary, bootstrap_idx, axis=0)
+            else:
+                simulated_data = self.configurator(self.generative_model(num_observed))
+                simulated_summary = self.amortizer.summary_net(simulated_data["summary_conditions"])
+
+            mmd_null_samples[i] = np.sqrt(maximum_mean_discrepancy(reference_summary, simulated_summary).numpy())
+
+        mmd_observed = np.sqrt(maximum_mean_discrepancy(reference_summary, observed_summary).numpy())
+
+        return mmd_null_samples, mmd_observed
+
     def _config_validation(self, validation_sims, **kwargs):
         """Helper method to prepare validation set based on user input."""