bayesflow-org
diff --git a/‎assets/__init__.py‎ b/‎assets/__init__.py‎
diff --git a/‎assets/benchmark_network_architectures.py‎
Lines changed: 0 additions & 108 deletions b/‎assets/benchmark_network_architectures.py‎
Lines changed: 0 additions & 108 deletions
diff --git a/‎bayesflow/diagnostics/metrics/calibration_error.py‎
Lines changed: 2 additions & 1 deletion b/‎bayesflow/diagnostics/metrics/calibration_error.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎bayesflow/diagnostics/metrics/expected_calibration_error.py‎
Lines changed: 5 additions & 5 deletions b/‎bayesflow/diagnostics/metrics/expected_calibration_error.py‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎bayesflow/diagnostics/metrics/posterior_contraction.py‎
Lines changed: 2 additions & 1 deletion b/‎bayesflow/diagnostics/metrics/posterior_contraction.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎bayesflow/diagnostics/metrics/root_mean_squared_error.py‎
Lines changed: 2 additions & 1 deletion b/‎bayesflow/diagnostics/metrics/root_mean_squared_error.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎bayesflow/distributions/diagonal_normal.py‎
Lines changed: 26 additions & 0 deletions b/‎bayesflow/distributions/diagonal_normal.py‎
Lines changed: 26 additions & 0 deletions
diff --git a/‎bayesflow/distributions/diagonal_student_t.py‎
Lines changed: 27 additions & 0 deletions b/‎bayesflow/distributions/diagonal_student_t.py‎
Lines changed: 27 additions & 0 deletions
diff --git a/‎bayesflow/networks/consistency_models/consistency_model.py‎
Lines changed: 11 additions & 14 deletions b/‎bayesflow/networks/consistency_models/consistency_model.py‎
Lines changed: 11 additions & 14 deletions
diff --git a/‎bayesflow/networks/coupling_flow/actnorm.py‎
Lines changed: 5 additions & 9 deletions b/‎bayesflow/networks/coupling_flow/actnorm.py‎
Lines changed: 5 additions & 9 deletions
@@ -15,7 +15,8 @@ def calibration_error(
     min_quantile: float = 0.005,
     max_quantile: float = 0.995,
 ) -> Mapping[str, Any]:
-    """Computes an aggregate score for the marginal calibration error over an ensemble of approximate
+    """
+    Computes an aggregate score for the marginal calibration error over an ensemble of approximate
     posteriors. The calibration error is given as the aggregate (e.g., median) of the absolute deviation
     between an alpha-CI and the relative number of inliers from ``estimates`` over multiple alphas in
     (0, 1).
 
@@ -13,15 +13,15 @@ def expected_calibration_error(
     n_bins: int = 10,
     return_probs: bool = False,
 ) -> Mapping[str, Any]:
-    """Estimates the expected calibration error (ECE) of a model comparison network according to [1].
+    """
+    Estimates the expected calibration error (ECE) of a model comparison network according to [1].
 
-    [1] Naeini, M. P., Cooper, G., & Hauskrecht, M. (2015).
-        Obtaining well calibrated probabilities using bayesian binning.
-        In Proceedings of the AAAI conference on artificial intelligence (Vol. 29, No. 1).
+    [1] Naeini, M. P., Cooper, G., & Hauskrecht, M. (2015). Obtaining well calibrated probabilities using
+    Bayesian binning. In Proceedings of the AAAI conference on artificial intelligence (Vol. 29, No. 1).
 
     Notes
     -----
-    Make sure that ``targets`` are **one-hot encoded** classes!
+    Make sure that ``targets`` are **one-hot encoded** classes (i.e., model indices)!
 
     Parameters
     ----------
 
@@ -12,7 +12,8 @@ def posterior_contraction(
     variable_names: Sequence[str] = None,
     aggregation: Callable = np.median,
 ) -> Mapping[str, Any]:
-    """Computes the posterior contraction (PC) from prior to posterior for the given samples.
+    """
+    Computes the posterior contraction (PC) from prior to posterior for the given samples.
 
     Parameters
     ----------
 
@@ -13,7 +13,8 @@ def root_mean_squared_error(
     normalize: bool = True,
     aggregation: Callable = np.median,
 ) -> Mapping[str, Any]:
-    """Computes the (Normalized) Root Mean Squared Error (RMSE/NRMSE) for the given posterior and prior samples.
+    """
+    Computes the (Normalized) Root Mean Squared Error (RMSE/NRMSE) for the given posterior and prior samples.
 
     Parameters
     ----------
 
@@ -22,6 +22,32 @@ def __init__(
         seed_generator: keras.random.SeedGenerator = None,
         **kwargs,
     ):
+        """
+        Initializes a backend-agnostic diagonal Gaussian distribution with optional learnable parameters.
+
+        This class represents a Gaussian distribution with a diagonal covariance matrix, allowing for efficient
+        sampling and density evaluation.
+
+        The mean and standard deviation can be specified as fixed values or learned during training. The class also
+        supports random number generation with an optional seed for reproducibility.
+
+        Parameters
+        ----------
+        mean : int, float, np.ndarray, or Tensor, optional
+            The mean of the Gaussian distribution. Can be a scalar or a tensor. Default is 0.0.
+        std : int, float, np.ndarray, or Tensor, optional
+            The standard deviation of the Gaussian distribution. Can be a scalar or a tensor.
+            Default is 1.0.
+        use_learnable_parameters : bool, optional
+            Whether to treat the mean and standard deviation as learnable parameters. Default is False.
+        seed_generator : keras.random.SeedGenerator, optional
+            A Keras seed generator for reproducible random sampling. If None, a new seed
+            generator is created. Default is None.
+        **kwargs
+            Additional keyword arguments passed to the base `Distribution` class.
+
+        """
+
         super().__init__(**kwargs)
         self.mean = mean
         self.std = std
 
@@ -25,6 +25,33 @@ def __init__(
         seed_generator: keras.random.SeedGenerator = None,
         **kwargs,
     ):
+        """
+        Initializes a backend-agnostic Student's t-distribution with optional learnable parameters.
+
+        This class represents a Student's t-distribution, which is useful for modeling heavy-tailed data.
+        The distribution is parameterized by degrees of freedom (`df`), location (`loc`), and scale (`scale`).
+        These parameters can either be fixed or learned during training.
+
+        The class also supports random number generation with an optional seed for reproducibility.
+
+        Parameters
+        ----------
+        df : int or float
+            Degrees of freedom for the Student's t-distribution. Lower values result in
+            heavier tails, making it more robust to outliers.
+        loc : int, float, np.ndarray, or Tensor, optional
+            The location parameter (mean) of the distribution. Default is 0.0.
+        scale : int, float, np.ndarray, or Tensor, optional
+            The scale parameter (standard deviation) of the distribution. Default is 1.0.
+        use_learnable_parameters : bool, optional
+            Whether to treat `loc` and `scale` as learnable parameters. Default is False.
+        seed_generator : keras.random.SeedGenerator, optional
+            A Keras seed generator for reproducible random sampling. If None, a new seed
+            generator is created. Default is None.
+        **kwargs
+            Additional keyword arguments passed to the base `Distribution` class.
+        """
+
         super().__init__(**kwargs)
 
         self.df = df
 
@@ -15,18 +15,16 @@
 
 @register_keras_serializable(package="bayesflow.networks")
 class ConsistencyModel(InferenceNetwork):
-    """Implements a Consistency Model with Consistency Training (CT) as
-    described in [1-2]. The adaptations to CT described in [2] were taken
-    into account in this implementation.
-
-    [1] Song, Y., Dhariwal, P., Chen, M. & Sutskever, I. (2023).
-    Consistency Models.
-    arXiv preprint arXiv:2303.01469
-
-    [2] Song, Y., & Dhariwal, P. (2023).
-    Improved Techniques for Training Consistency Models:
-    arXiv preprint arXiv:2310.14189
-    Discussion: https://openreview.net/forum?id=WNzy9bRDvG
+    """Implements a Consistency Model with Consistency Training (CT) a described in [1-2]. The adaptations to CT
+    described in [2] were taken into account in our implementation for ABI [3].
+
+    [1] Song, Y., Dhariwal, P., Chen, M. & Sutskever, I. (2023). Consistency Models. arXiv preprint arXiv:2303.01469
+
+    [2] Song, Y., & Dhariwal, P. (2023). Improved Techniques for Training Consistency Models.
+    arXiv preprint arXiv:2310.14189. Discussion: https://openreview.net/forum?id=WNzy9bRDvG
+
+    [3] Schmitt, M., Pratz, V., Köthe, U., Bürkner, P. C., & Radev, S. T. (2023). Consistency models for scalable and
+    fast simulation-based inference. arXiv preprint arXiv:2312.05440.
     """
 
     MLP_DEFAULT_CONFIG = {
@@ -49,8 +47,7 @@ def __init__(
         s1: int | float = 50,
         **kwargs,
     ):
-        """Creates an instance of a consistency model (CM) to be used
-        for standalone consistency training (CT).
+        """Creates an instance of a consistency model (CM) to be used for standalone consistency training (CT).
 
         Parameters:
         -----------
 
@@ -10,24 +10,20 @@
 
 @serializable(package="networks.coupling_flow")
 class ActNorm(InvertibleLayer):
-    """Implements an Activation Normalization (ActNorm) Layer.
-    Activation Normalization is learned invertible normalization, using
-    a Scale (s) and Bias (b) vector::
+    """Implements an Activation Normalization (ActNorm) Layer. Activation Normalization is learned invertible
+    normalization, using a scale (s) and a bias (b) vector::
 
        y = s * x + b(forward)
        x = (y - b) / s(inverse)
 
     References
     ----------
 
-    .. [1] Kingma, D. P., & Dhariwal, P. (2018).
-        Glow: Generative flow with invertible 1x1 convolutions.
+    [1] Kingma, D. P., & Dhariwal, P. (2018). Glow: Generative flow with invertible 1x1 convolutions.
         Advances in Neural Information Processing Systems, 31.
 
-    .. [2] Salimans, Tim, and Durk P. Kingma. (2016).
-       Weight normalization: A simple reparameterization to accelerate
-       training of deep neural networks.
-       Advances in Neural Information Processing Systems, 29.
+    [2] Salimans, Tim, and Durk P. Kingma. (2016). Weight normalization: A simple reparameterization to accelerate
+       training of deep neural networks. Advances in Neural Information Processing Systems, 29.
     """
 
     def __init__(self, **kwargs):