bayesflow-org
diff --git a/‎bayesflow/__init__.py‎
Lines changed: 2 additions & 1 deletion b/‎bayesflow/__init__.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎bayesflow/diagnostics/plots/calibration_ecdf.py‎
Lines changed: 2 additions & 1 deletion b/‎bayesflow/diagnostics/plots/calibration_ecdf.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎bayesflow/diagnostics/plots/calibration_ecdf_from_quantiles.py‎
Lines changed: 2 additions & 1 deletion b/‎bayesflow/diagnostics/plots/calibration_ecdf_from_quantiles.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎bayesflow/networks/transformers/mab.py‎
Lines changed: 8 additions & 0 deletions b/‎bayesflow/networks/transformers/mab.py‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎bayesflow/simulators/hierarchical_simulator.py‎
Lines changed: 28 additions & 0 deletions b/‎bayesflow/simulators/hierarchical_simulator.py‎
Lines changed: 28 additions & 0 deletions
diff --git a/‎bayesflow/simulators/lambda_simulator.py‎
Lines changed: 35 additions & 1 deletion b/‎bayesflow/simulators/lambda_simulator.py‎
Lines changed: 35 additions & 1 deletion
diff --git a/‎bayesflow/simulators/make_simulator.py‎
Lines changed: 5 additions & 0 deletions b/‎bayesflow/simulators/make_simulator.py‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎bayesflow/simulators/model_comparison_simulator.py‎
Lines changed: 56 additions & 20 deletions b/‎bayesflow/simulators/model_comparison_simulator.py‎
Lines changed: 56 additions & 20 deletions
diff --git a/‎bayesflow/simulators/sequential_simulator.py‎
Lines changed: 33 additions & 0 deletions b/‎bayesflow/simulators/sequential_simulator.py‎
Lines changed: 33 additions & 0 deletions
diff --git a/‎bayesflow/wrappers/__init__.py‎
Lines changed: 1 addition & 0 deletions b/‎bayesflow/wrappers/__init__.py‎
Lines changed: 1 addition & 0 deletions
@@ -7,8 +7,9 @@
     experimental,
     networks,
     simulators,
-    workflows,
     utils,
+    workflows,
+    wrappers,
 )
 
 from .adapters import Adapter
 
@@ -19,6 +19,7 @@ def calibration_ecdf(
     figsize: Sequence[float] = None,
     label_fontsize: int = 16,
     legend_fontsize: int = 14,
+    legend_location: str = "upper right",
     title_fontsize: int = 18,
     tick_fontsize: int = 12,
     rank_ecdf_color: str = "#132a70",
@@ -184,7 +185,7 @@ def calibration_ecdf(
 
     for ax, title in zip(plot_data["axes"].flat, titles):
         ax.fill_between(z, L, U, color=fill_color, alpha=0.2, label=rf"{int((1 - alpha) * 100)}$\%$ Confidence Bands")
-        ax.legend(fontsize=legend_fontsize)
+        ax.legend(fontsize=legend_fontsize, loc=legend_location)
         ax.set_title(title, fontsize=title_fontsize)
 
     prettify_subplots(plot_data["axes"], num_subplots=plot_data["num_variables"], tick_fontsize=tick_fontsize)
 
@@ -19,6 +19,7 @@ def calibration_ecdf_from_quantiles(
     figsize: Sequence[float] = None,
     label_fontsize: int = 16,
     legend_fontsize: int = 14,
+    legend_location: str = "upper right",
     title_fontsize: int = 18,
     tick_fontsize: int = 12,
     rank_ecdf_color: str = "#132a70",
@@ -173,7 +174,7 @@ def calibration_ecdf_from_quantiles(
             alpha=0.2,
             label=rf"{int((1 - alpha) * 100)}$\%$ Confidence Bands" + "\n(pointwise)",
         )
-        ax.legend(fontsize=legend_fontsize)
+        ax.legend(fontsize=legend_fontsize, loc=legend_location)
         ax.set_title(title, fontsize=title_fontsize)
 
     prettify_subplots(plot_data["axes"], num_subplots=plot_data["num_variables"], tick_fontsize=tick_fontsize)
 
@@ -10,9 +10,17 @@
 class MultiHeadAttentionBlock(keras.Layer):
     """Implements the MAB block from [1] which represents learnable cross-attention.
 
+    In particular, it uses a so-called "Post-LN" transformer block [2] which applies
+    layer norm following attention and following MLP. A "Pre-LN" transformer block
+    can easily be implemented.
+
     [1] Lee, J., Lee, Y., Kim, J., Kosiorek, A., Choi, S., & Teh, Y. W. (2019).
         Set transformer: A framework for attention-based permutation-invariant neural networks.
         In International conference on machine learning (pp. 3744-3753). PMLR.
+
+    [2] Xiong, R., Yang, Y., He, D., Zheng, K., Zheng, S., Xing, C., ... & Liu, T. (2020, November).
+    On layer normalization in the transformer architecture.
+    In International conference on machine learning (pp. 10524-10533). PMLR.
     """
 
     def __init__(
 
@@ -10,10 +10,38 @@
 
 class HierarchicalSimulator(Simulator):
     def __init__(self, hierarchy: Sequence[Simulator]):
+        """
+        Initialize the hierarchical simulator with a sequence of simulators.
+
+        Parameters
+        ----------
+        hierarchy : Sequence[Simulator]
+            A sequence of simulator instances representing each level of the hierarchy.
+            Each level's output is used as input for the next, with increasing batch dimensions.
+        """
         self.hierarchy = hierarchy
 
     @allow_batch_size
     def sample(self, batch_shape: Shape, **kwargs) -> dict[str, np.ndarray]:
+        """
+        Sample from a hierarchy of simulators.
+
+        Parameters
+        ----------
+        batch_shape : Shape
+            A tuple where each element specifies the number of samples at the corresponding level
+            of the hierarchy. The total batch size increases multiplicatively through the levels.
+        **kwargs
+            Additional keyword arguments passed to each simulator. These are combined with outputs
+            from previous levels and repeated appropriately.
+
+        Returns
+        -------
+        output_data : dict of str to np.ndarray
+            A dictionary containing the outputs from the entire hierarchy. Outputs are reshaped to
+            match the hierarchical batch shape, i.e., with shape equal to `batch_shape + original_shape`.
+        """
+
         input_data = {}
         output_data = {}
 
 
@@ -1,3 +1,5 @@
+from collections.abc import Callable, Sequence, Mapping
+
 import numpy as np
 
 from bayesflow.utils import batched_call, filter_kwargs, tree_stack
@@ -10,12 +12,44 @@
 class LambdaSimulator(Simulator):
     """Implements a simulator based on a sampling function."""
 
-    def __init__(self, sample_fn: callable, *, is_batched: bool = False):
+    def __init__(self, sample_fn: Callable[[Sequence[int]], Mapping[str, any]], *, is_batched: bool = False):
+        """
+        Initialize a simulator based on a simple callable function
+
+        Parameters
+        ----------
+        sample_fn : Callable[[Sequence[int]], Mapping[str, any]]
+            A function that generates samples. It should accept `batch_shape` as its first argument
+            (if `is_batched=True`), followed by keyword arguments.
+        is_batched : bool, optional
+            Whether the `sample_fn` is implemented to handle batched sampling directly.
+            If False, `sample_fn` will be called once per sample and results will be stacked.
+            Default is False.
+        """
         self.sample_fn = sample_fn
         self.is_batched = is_batched
 
     @allow_batch_size
     def sample(self, batch_shape: Shape, **kwargs) -> dict[str, np.ndarray]:
+        """
+        Sample using the wrapped sampling function.
+
+        Parameters
+        ----------
+        batch_shape : Shape
+            The shape of the batch to sample. Typically, a tuple indicating the number of samples,
+            but an int can also be passed.
+        **kwargs
+            Additional keyword arguments passed to the sampling function. Only valid arguments
+            (as determined by the function's signature) are used.
+
+        Returns
+        -------
+        data : dict of str to np.ndarray
+            A dictionary of sampled outputs. Keys are output names and values are numpy arrays.
+            If `is_batched` is False, individual outputs are stacked along the first axis.
+        """
+
         # try to use only valid keyword-arguments
         kwargs = filter_kwargs(kwargs, self.sample_fn)
 
 
@@ -10,6 +10,11 @@
 
 @singledispatch
 def make_simulator(arg, *_, **__):
+    """
+    This is a dispatch function that will accept a list of simulators (callables) returning
+    dictionaries with simulated outputs. The outputs of simulators will be passed to following
+    simulators if the latter accept keyword arguments associated with the keys of previous outputs.
+    """
     raise TypeError(f"Cannot infer simulator from {arg!r}.")
 
 
 
@@ -24,6 +24,26 @@ def __init__(
         use_mixed_batches: bool = True,
         shared_simulator: Simulator | FunctionType = None,
     ):
+        """
+        Initialize a multi-model simulator that can generate data for mixture / model comparison problems.
+
+        Parameters
+        ----------
+        simulators : Sequence[Simulator]
+            A sequence of simulator instances, each representing a different model.
+        p : Sequence[float], optional
+            A sequence of probabilities associated with each simulator. Must sum to 1.
+            Mutually exclusive with `logits`.
+        logits : Sequence[float], optional
+            A sequence of logits corresponding to model probabilities. Mutually exclusive with `p`.
+            If neither `p` nor `logits` is provided, defaults to uniform logits.
+        use_mixed_batches : bool, optional
+            If True, samples in a batch are drawn from different models. If False, the entire batch
+            is drawn from a single model chosen according to the model probabilities. Default is True.
+        shared_simulator : Simulator or FunctionType, optional
+            A shared simulator whose outputs are passed to all model simulators. If a function is
+            provided, it is wrapped in a `LambdaSimulator` with batching enabled.
+        """
         self.simulators = simulators
 
         if isinstance(shared_simulator, FunctionType):
@@ -51,34 +71,50 @@ def __init__(
 
     @allow_batch_size
     def sample(self, batch_shape: Shape, **kwargs) -> dict[str, np.ndarray]:
+        """
+        Sample from the model comparison simulator.
+
+        Parameters
+        ----------
+        batch_shape : Shape
+            The shape of the batch to sample. Typically, a tuple indicating the number of samples,
+            but the user can also supply an int.
+        **kwargs
+            Additional keyword arguments passed to each simulator. These may include outputs from
+            the shared simulator.
+
+        Returns
+        -------
+        data : dict of str to np.ndarray
+            A dictionary containing the sampled outputs. Includes:
+              - outputs from the selected simulator(s)
+              - optionally, outputs from the shared simulator
+              - "model_indices": a one-hot encoded array indicating the model origin of each sample
+        """
         data = {}
         if self.shared_simulator:
             data |= self.shared_simulator.sample(batch_shape, **kwargs)
 
-        if not self.use_mixed_batches:
-            # draw one model index for the whole batch (faster)
-            model_index = np.random.choice(len(self.simulators), p=npu.softmax(self.logits))
+        softmax_logits = npu.softmax(self.logits)
+        num_models = len(self.simulators)
 
-            simulator = self.simulators[model_index]
-            data = simulator.sample(batch_shape, **(kwargs | data))
-
-            model_indices = np.full(batch_shape, model_index, dtype="int32")
-            model_indices = npu.one_hot(model_indices, len(self.simulators))
-        else:
-            # generate data randomly from each model (slower)
-            model_counts = np.random.multinomial(n=batch_shape[0], pvals=npu.softmax(self.logits))
-
-            sims = []
-            for n, simulator in zip(model_counts, self.simulators):
-                if n == 0:
-                    continue
-                sim = simulator.sample(n, **(kwargs | data))
-                sims.append(sim)
+        # generate data randomly from each model (slower)
+        if self.use_mixed_batches:
+            model_counts = np.random.multinomial(n=batch_shape[0], pvals=softmax_logits)
 
+            sims = [
+                simulator.sample(n, **(kwargs | data)) for simulator, n in zip(self.simulators, model_counts) if n > 0
+            ]
             sims = tree_concatenate(sims, numpy=True)
             data |= sims
 
-            model_indices = np.eye(len(self.simulators), dtype="int32")
-            model_indices = np.repeat(model_indices, model_counts, axis=0)
+            model_indices = np.repeat(np.eye(num_models, dtype="int32"), model_counts, axis=0)
+
+        # draw one model index for the whole batch (faster)
+        else:
+            model_index = np.random.choice(num_models, p=softmax_logits)
+
+            data = self.simulators[model_index].sample(batch_shape, **(kwargs | data))
+            model_indices = npu.one_hot(np.full(batch_shape, model_index, dtype="int32"), num_models)
 
         return data | {"model_indices": model_indices}
@@ -11,11 +11,44 @@ class SequentialSimulator(Simulator):
     """Combines multiple simulators into one, sequentially."""
 
     def __init__(self, simulators: Sequence[Simulator], expand_outputs: bool = True):
+        """
+        Initialize a SequentialSimulator.
+
+        Parameters
+        ----------
+        simulators : Sequence[Simulator]
+            A sequence of simulator instances to be executed sequentially. Each simulator should
+            return dictionary outputs and may depend on outputs from previous simulators.
+        expand_outputs : bool, optional
+            If True, 1D output arrays are expanded with an additional dimension at the end.
+            Default is True.
+        """
+
         self.simulators = simulators
         self.expand_outputs = expand_outputs
 
     @allow_batch_size
     def sample(self, batch_shape: Shape, **kwargs) -> dict[str, np.ndarray]:
+        """
+        Sample sequentially from the internal simulator.
+
+        Parameters
+        ----------
+        batch_shape : Shape
+            The shape of the batch to sample. Typically, a tuple indicating the number of samples,
+            but it also accepts an int.
+        **kwargs
+            Additional keyword arguments passed to each simulator. These may include previously
+            sampled outputs used as inputs for subsequent simulators.
+
+        Returns
+        -------
+        data : dict of str to np.ndarray
+            A dictionary containing the combined outputs from all simulators. Keys are output names
+            and values are sampled arrays. If `expand_outputs` is True, 1D arrays are expanded to
+            have shape (..., 1).
+        """
+
         data = {}
         for simulator in self.simulators:
             data |= simulator.sample(batch_shape, **(kwargs | data))
 
@@ -0,0 +1 @@
+from .mamba import Mamba