Merge branch 'fix_sampling_method_kwargs' into compositional_sampling_diffusion

arrjon · arrjon · commit 922412f0178b · 2025-09-22T10:23:18.000+02:00
diff --git a/bayesflow/diagnostics/plots/recovery.py b/bayesflow/diagnostics/plots/recovery.py
@@ -1,20 +1,21 @@
-from collections.abc import Sequence, Mapping
+from collections.abc import Sequence, Mapping, Callable
 
 import matplotlib.pyplot as plt
 import numpy as np
 
-from scipy.stats import median_abs_deviation
-
 from bayesflow.utils import prepare_plot_data, prettify_subplots, make_quadratic, add_titles_and_labels, add_metric
+from bayesflow.utils.numpy_utils import credible_interval
 
 
 def recovery(
     estimates: Mapping[str, np.ndarray] | np.ndarray,
     targets: Mapping[str, np.ndarray] | np.ndarray,
     variable_keys: Sequence[str] = None,
     variable_names: Sequence[str] = None,
-    point_agg=np.median,
-    uncertainty_agg=median_abs_deviation,
+    point_agg: Callable = np.median,
+    uncertainty_agg: Callable = credible_interval,
+    point_agg_kwargs: dict = None,
+    uncertainty_agg_kwargs: dict = None,
     add_corr: bool = True,
     figsize: Sequence[int] = None,
     label_fontsize: int = 16,
@@ -57,8 +58,17 @@ def recovery(
        By default, select all keys.
     variable_names    : list or None, optional, default: None
         The individual parameter names for nice plot titles. Inferred if None
-    point_agg         : function to compute point estimates. Default: median
-    uncertainty_agg   : function to compute uncertainty estimates. Default: MAD
+    point_agg         : callable, optional, default: median
+        Function to compute point estimates.
+    uncertainty_agg   : callable, optional, default: credible_interval with coverage probability 95%
+        Function to compute a measure of uncertainty. Can either be the lower and upper
+        uncertainty bounds provided with the shape (2, num_datasets, num_params) or a
+        scalar measure of uncertainty (e.g., the median absolute deviation) with shape
+        (num_datasets, num_params).
+    point_agg_kwargs : Optional dictionary of further arguments passed to point_agg.
+    uncertainty_agg_kwargs : Optional dictionary of further arguments passed to uncertainty_agg.
+        For example, to change the coverage probability of credible_interval to 50%,
+        use uncertainty_agg_kwargs = dict(prob=0.5)
     add_corr          : boolean, default: True
         Should correlations between estimates and ground truth values be shown?
     figsize           : tuple or None, optional, default : None
@@ -106,11 +116,18 @@ def recovery(
     estimates = plot_data.pop("estimates")
     targets = plot_data.pop("targets")
 
+    point_agg_kwargs = point_agg_kwargs or {}
+    uncertainty_agg_kwargs = uncertainty_agg_kwargs or {}
+
     # Compute point estimates and uncertainties
-    point_estimate = point_agg(estimates, axis=1)
+    point_estimate = point_agg(estimates, axis=1, **point_agg_kwargs)
 
     if uncertainty_agg is not None:
-        u = uncertainty_agg(estimates, axis=1)
+        u = uncertainty_agg(estimates, axis=1, **uncertainty_agg_kwargs)
+        if u.ndim == 3:
+            # compute lower and upper error
+            u[0, :, :] = point_estimate - u[0, :, :]
+            u[1, :, :] = u[1, :, :] - point_estimate
 
     for i, ax in enumerate(plot_data["axes"].flat):
         if i >= plot_data["num_variables"]:
@@ -121,7 +138,7 @@ def recovery(
             _ = ax.errorbar(
                 targets[:, i],
                 point_estimate[:, i],
-                yerr=u[:, i],
+                yerr=u[..., i],
                 fmt="o",
                 alpha=0.5,
                 color=color,
diff --git a/bayesflow/distributions/diagonal_student_t.py b/bayesflow/distributions/diagonal_student_t.py
@@ -63,19 +63,19 @@ def __init__(
 
         self.seed_generator = seed_generator or keras.random.SeedGenerator()
 
-        self.dim = None
+        self.dims = None
         self._loc = None
         self._scale = None
 
     def build(self, input_shape: Shape) -> None:
         if self.built:
             return
 
-        self.dim = int(input_shape[-1])
+        self.dims = tuple(input_shape[1:])
 
         # convert to tensor and broadcast if necessary
-        self.loc = ops.cast(ops.broadcast_to(self.loc, (self.dim,)), "float32")
-        self.scale = ops.cast(ops.broadcast_to(self.scale, (self.dim,)), "float32")
+        self.loc = ops.cast(ops.broadcast_to(self.loc, self.dims), "float32")
+        self.scale = ops.cast(ops.broadcast_to(self.scale, self.dims), "float32")
 
         if self.trainable_parameters:
             self._loc = self.add_weight(
@@ -96,14 +96,14 @@ def build(self, input_shape: Shape) -> None:
 
     def log_prob(self, samples: Tensor, *, normalize: bool = True) -> Tensor:
         mahalanobis_term = ops.sum((samples - self._loc) ** 2 / self._scale**2, axis=-1)
-        result = -0.5 * (self.df + self.dim) * ops.log1p(mahalanobis_term / self.df)
+        result = -0.5 * (self.df + sum(self.dims)) * ops.log1p(mahalanobis_term / self.df)
 
         if normalize:
             log_normalization_constant = (
-                -0.5 * self.dim * math.log(self.df)
-                - 0.5 * self.dim * math.log(math.pi)
+                -0.5 * sum(self.dims) * math.log(self.df)
+                - 0.5 * sum(self.dims) * math.log(math.pi)
                 - math.lgamma(0.5 * self.df)
-                + math.lgamma(0.5 * (self.df + self.dim))
+                + math.lgamma(0.5 * (self.df + sum(self.dims)))
                 - ops.sum(keras.ops.log(self._scale))
             )
             result += log_normalization_constant
@@ -119,9 +119,10 @@ def sample(self, batch_shape: Shape) -> Tensor:
 
         # The chi-quare samples need to be repeated across self.dim
         # since for each element of batch_shape only one sample is created.
-        chi2_samples = expand_tile(chi2_samples, n=self.dim, axis=-1)
+        chi2_samples = expand_tile(chi2_samples, n=sum(self.dims), axis=-1)
+        chi2_samples = keras.ops.reshape(chi2_samples, batch_shape + self.dims)
 
-        normal_samples = keras.random.normal(batch_shape + (self.dim,), seed=self.seed_generator)
+        normal_samples = keras.random.normal(batch_shape + self.dims, seed=self.seed_generator)
 
         return self._loc + self._scale * normal_samples * ops.sqrt(self.df / chi2_samples)
 
diff --git a/bayesflow/distributions/mixture.py b/bayesflow/distributions/mixture.py
@@ -59,7 +59,7 @@ def __init__(
 
         self.trainable_mixture = trainable_mixture
 
-        self.dim = None
+        self.dims = None
         self._mixture_logits = None
 
     @allow_batch_size
@@ -78,7 +78,7 @@ def sample(self, batch_shape: Shape) -> Tensor:
         Returns
         -------
         samples: Tensor
-            A tensor of shape `batch_shape + (dim,)` containing samples drawn
+            A tensor of shape `batch_shape + dims` containing samples drawn
             from the mixture.
         """
         # Will use numpy until keras adds support for N-D categorical sampling
@@ -87,7 +87,7 @@ def sample(self, batch_shape: Shape) -> Tensor:
         cat_samples = cat_samples.argmax(axis=-1)
 
         # Prepare array to fill and dtype to infer
-        samples = np.zeros(batch_shape + (self.dim,))
+        samples = np.zeros(batch_shape + self.dims)
         dtype = None
 
         # Fill in array with vectorized sampling per component
@@ -137,7 +137,7 @@ def build(self, input_shape: Shape) -> None:
         if self.built:
             return
 
-        self.dim = input_shape[-1]
+        self.dims = tuple(input_shape[1:])
 
         for distribution in self.distributions:
             distribution.build(input_shape)
diff --git a/bayesflow/networks/transformers/mab.py b/bayesflow/networks/transformers/mab.py
@@ -3,7 +3,7 @@
 
 from bayesflow.networks import MLP
 from bayesflow.types import Tensor
-from bayesflow.utils import layer_kwargs
+from bayesflow.utils import layer_kwargs, filter_kwargs
 from bayesflow.utils.decorators import sanitize_input_shape
 from bayesflow.utils.serialization import serializable
 
@@ -111,7 +111,7 @@ def call(self, seq_x: Tensor, seq_y: Tensor, training: bool = False, **kwargs) -
         """
 
         h = self.input_projector(seq_x) + self.attention(
-            query=seq_x, key=seq_y, value=seq_y, training=training, **kwargs
+            query=seq_x, key=seq_y, value=seq_y, training=training, **filter_kwargs(kwargs, self.attention.call)
         )
         if self.ln_pre is not None:
             h = self.ln_pre(h, training=training)
diff --git a/bayesflow/networks/transformers/set_transformer.py b/bayesflow/networks/transformers/set_transformer.py
@@ -147,7 +147,7 @@ def call(self, input_set: Tensor, training: bool = False, **kwargs) -> Tensor:
         out : Tensor
             Output of shape (batch_size, set_size, output_dim)
         """
-        summary = self.attention_blocks(input_set, training=training, **kwargs)
+        summary = self.attention_blocks(input_set, training=training)
         summary = self.pooling_by_attention(summary, training=training, **kwargs)
         summary = self.output_projector(summary)
         return summary
diff --git a/bayesflow/utils/numpy_utils.py b/bayesflow/utils/numpy_utils.py
@@ -1,5 +1,6 @@
 import numpy as np
 from scipy import special
+from collections.abc import Sequence
 
 
 def inverse_sigmoid(x: np.ndarray) -> np.ndarray:
@@ -42,3 +43,47 @@ def softplus(x: np.ndarray, beta: float = 1.0, threshold: float = 20.0) -> np.nd
     with np.errstate(over="ignore"):
         exp_beta_x = np.exp(beta * x)
     return np.where(beta * x > threshold, x, np.log1p(exp_beta_x) / beta)
+
+
+def credible_interval(x: np.ndarray, prob: float = 0.95, axis: Sequence[int] | int = None, **kwargs) -> np.ndarray:
+    """
+    Compute credible interval from samples using quantiles.
+
+    Parameters
+    ----------
+    x : array_like
+        Input array of samples from a posterior distribution or bootstrap samples.
+    prob : float, default 0.95
+        Coverage probability of the credible interval (between 0 and 1).
+        For example, 0.95 gives a 95% credible interval.
+    axis : Sequence[int]
+        Axis or axes along which the credible interval is computed.
+        Default is None (flatten array).
+
+    Returns
+    -------
+    a numpy array of shape (2, ...) with the first dimension indicating the
+    lower and upper bounds of the credible interval.
+
+    Examples
+    --------
+    >>> import numpy as np
+    >>> # Simulate posterior samples
+    >>> samples = np.random.normal(size=(10, 1000, 3))
+
+    >>> # Different coverage probabilities
+    >>> credible_interval(samples, prob=0.5, axis=1)  # 50% CI
+    >>> credible_interval(samples, prob=0.99, axis=1)  # 99% CI
+    """
+
+    # Input validation
+    if not 0 <= prob <= 1:
+        raise ValueError(f"prob must be between 0 and 1, got {prob}")
+
+    # Calculate tail probabilities
+    alpha = 1 - prob
+    lower_q = alpha / 2
+    upper_q = 1 - alpha / 2
+
+    # Compute quantiles
+    return np.quantile(x, q=(lower_q, upper_q), axis=axis, **kwargs)
diff --git a/tests/test_approximators/test_sample.py b/tests/test_approximators/test_sample.py
@@ -1,3 +1,4 @@
+import pytest
 import keras
 from tests.utils import check_combination_simulator_adapter
 
@@ -16,3 +17,92 @@ def test_approximator_sample(approximator, simulator, batch_size, adapter):
     samples = approximator.sample(num_samples=2, conditions=data)
 
     assert isinstance(samples, dict)
+
+
+@pytest.mark.parametrize("inference_network_type", ["flow_matching", "diffusion_model"])
+@pytest.mark.parametrize("summary_network_type", ["none", "deep_set", "set_transformer", "time_series"])
+@pytest.mark.parametrize("method", ["euler", "rk45", "euler_maruyama"])
+def test_approximator_sample_with_integration_methods(
+    inference_network_type, summary_network_type, method, simulator, adapter
+):
+    """Test approximator sampling with different integration methods and summary networks.
+
+    Tests flow matching and diffusion models with different ODE/SDE solvers:
+    - euler, rk45: Available for both flow matching and diffusion models
+    - euler_maruyama: Only for diffusion models (stochastic)
+
+    Also tests with different summary network types.
+    """
+    batch_size = 8  # Use smaller batch size for faster tests
+    check_combination_simulator_adapter(simulator, adapter)
+
+    # Skip euler_maruyama for flow matching (deterministic model)
+    if inference_network_type == "flow_matching" and method == "euler_maruyama":
+        pytest.skip("euler_maruyama is only available for diffusion models")
+
+    # Create inference network based on type
+    if inference_network_type == "flow_matching":
+        from bayesflow.networks import FlowMatching, MLP
+
+        inference_network = FlowMatching(
+            subnet=MLP(widths=[32, 32]),
+            integrate_kwargs={"steps": 10},  # Use fewer steps for faster tests
+        )
+    elif inference_network_type == "diffusion_model":
+        from bayesflow.networks import DiffusionModel, MLP
+
+        inference_network = DiffusionModel(
+            subnet=MLP(widths=[32, 32]),
+            integrate_kwargs={"steps": 10},  # Use fewer steps for faster tests
+        )
+    else:
+        pytest.skip(f"Unsupported inference network type: {inference_network_type}")
+
+    # Create summary network based on type
+    summary_network = None
+    if summary_network_type != "none":
+        if summary_network_type == "deep_set":
+            from bayesflow.networks import DeepSet, MLP
+
+            summary_network = DeepSet(subnet=MLP(widths=[16, 16]))
+        elif summary_network_type == "set_transformer":
+            from bayesflow.networks import SetTransformer
+
+            summary_network = SetTransformer(embed_dims=[16, 16], mlp_widths=[16, 16])
+        elif summary_network_type == "time_series":
+            from bayesflow.networks import TimeSeriesNetwork
+
+            summary_network = TimeSeriesNetwork(subnet_kwargs={"widths": [16, 16]}, cell_type="lstm")
+        else:
+            pytest.skip(f"Unsupported summary network type: {summary_network_type}")
+
+        # Update adapter to include summary variables if summary network is present
+        from bayesflow import ContinuousApproximator
+
+        adapter = ContinuousApproximator.build_adapter(
+            inference_variables=["mean", "std"],
+            summary_variables=["x"],  # Use x as summary variable for testing
+        )
+
+    # Create approximator
+    from bayesflow import ContinuousApproximator
+
+    approximator = ContinuousApproximator(
+        adapter=adapter, inference_network=inference_network, summary_network=summary_network
+    )
+
+    # Generate test data
+    num_batches = 2  # Use fewer batches for faster tests
+    data = simulator.sample((num_batches * batch_size,))
+
+    # Build approximator
+    batch = adapter(data)
+    batch = keras.tree.map_structure(keras.ops.convert_to_tensor, batch)
+    batch_shapes = keras.tree.map_structure(keras.ops.shape, batch)
+    approximator.build(batch_shapes)
+
+    # Test sampling with the specified method
+    samples = approximator.sample(num_samples=2, conditions=data, method=method)
+
+    # Verify results
+    assert isinstance(samples, dict)
diff --git a/tests/test_diagnostics/test_diagnostics_plots.py b/tests/test_diagnostics/test_diagnostics_plots.py
@@ -92,9 +92,20 @@ def test_loss(history):
     assert out.axes[0].title._text == "Loss Trajectory"
 
 
-def test_recovery(random_estimates, random_targets):
+def test_recovery_bounds(random_estimates, random_targets):
     # basic functionality: automatic variable names
-    out = bf.diagnostics.plots.recovery(random_estimates, random_targets, markersize=4)
+    from bayesflow.utils.numpy_utils import credible_interval
+
+    out = bf.diagnostics.plots.recovery(
+        random_estimates, random_targets, markersize=4, uncertainty_agg=credible_interval
+    )
+    assert len(out.axes) == num_variables(random_estimates)
+    assert out.axes[2].title._text == "sigma"
+
+
+def test_recovery_symmetric(random_estimates, random_targets):
+    # basic functionality: automatic variable names
+    out = bf.diagnostics.plots.recovery(random_estimates, random_targets, markersize=4, uncertainty_agg=np.std)
     assert len(out.axes) == num_variables(random_estimates)
     assert out.axes[2].title._text == "sigma"