From 34c7f2a241ee860f8bd0dc0f2db46163bec05cf3 Mon Sep 17 00:00:00 2001
From: han-ol <g@hans.olischlaeger.com>
Date: Thu, 27 Mar 2025 16:31:12 +0100
Subject: [PATCH 01/20] Better parameterization of covariance matrices

---
 bayesflow/approximators/point_approximator.py |  9 ++-
 bayesflow/links/__init__.py                   |  2 +-
 bayesflow/links/positive_definite.py          | 52 +++++++++++++
 bayesflow/links/positive_semi_definite.py     | 20 -----
 bayesflow/scores/multivariate_normal_score.py | 14 ++--
 .../scores/parametric_distribution_score.py   |  3 +-
 bayesflow/scores/scoring_rule.py              | 21 +++--
 bayesflow/utils/__init__.py                   |  1 +
 bayesflow/utils/tensor_utils.py               | 77 +++++++++++++++++++
 tests/test_links/conftest.py                  | 16 ++--
 tests/test_links/test_links.py                | 22 ++----
 tests/test_scores/test_scores.py              | 16 ++--
 12 files changed, 192 insertions(+), 61 deletions(-)
 create mode 100644 bayesflow/links/positive_definite.py
 delete mode 100644 bayesflow/links/positive_semi_definite.py

diff --git a/bayesflow/approximators/point_approximator.py b/bayesflow/approximators/point_approximator.py
index 836dd060c..6fb5bdd14 100644
--- a/bayesflow/approximators/point_approximator.py
+++ b/bayesflow/approximators/point_approximator.py
@@ -5,7 +5,7 @@
 )
 
 from bayesflow.types import Tensor
-from bayesflow.utils import filter_kwargs, split_arrays, squeeze_inner_estimates_dict
+from bayesflow.utils import filter_kwargs, split_arrays, squeeze_inner_estimates_dict, logging
 from .continuous_approximator import ContinuousApproximator
 
 
@@ -119,6 +119,7 @@ def sample(
     def _prepare_conditions(self, conditions: dict[str, np.ndarray], **kwargs) -> dict[str, Tensor]:
         """Adapts and converts the conditions to tensors."""
         conditions = self.adapter(conditions, strict=False, stage="inference", **kwargs)
+        conditions.pop("inference_variables", None)
         return keras.tree.map_structure(keras.ops.convert_to_tensor, conditions)
 
     def _apply_inverse_adapter_to_estimates(
@@ -130,6 +131,12 @@ def _apply_inverse_adapter_to_estimates(
         for score_key, score_val in estimates.items():
             processed[score_key] = {}
             for head_key, estimate in score_val.items():
+                if head_key in self.inference_network.scores[score_key].not_transforming_like_vector:
+                    logging.warning(
+                        f"Estimate '{score_key}.{head_key}' is marked to not transform like a vector. "
+                        "It was treated like a vector by the adapter. Handle '{head_key}' estimates with care."
+                    )
+
                 adapted = self.adapter(
                     {"inference_variables": estimate},
                     inverse=True,
diff --git a/bayesflow/links/__init__.py b/bayesflow/links/__init__.py
index a32fd6c21..77913f52b 100644
--- a/bayesflow/links/__init__.py
+++ b/bayesflow/links/__init__.py
@@ -2,7 +2,7 @@
 
 from .ordered import Ordered
 from .ordered_quantiles import OrderedQuantiles
-from .positive_semi_definite import PositiveSemiDefinite
+from .positive_definite import PositiveDefinite
 
 from ..utils._docs import _add_imports_to_all
 
diff --git a/bayesflow/links/positive_definite.py b/bayesflow/links/positive_definite.py
new file mode 100644
index 000000000..d676ac665
--- /dev/null
+++ b/bayesflow/links/positive_definite.py
@@ -0,0 +1,52 @@
+import keras
+
+# import numpy as np
+from keras.saving import register_keras_serializable as serializable
+
+from bayesflow.types import Tensor
+from bayesflow.utils import keras_kwargs, fill_triangular_matrix
+
+
+@serializable(package="bayesflow.links")
+class PositiveDefinite(keras.Layer):
+    """Activation function to link from flat elements of a lower triangular matrix to a positive definite matrix."""
+
+    def __init__(self, **kwargs):
+        super().__init__(**keras_kwargs(kwargs))
+        self.built = True
+
+    def call(self, inputs: Tensor) -> Tensor:
+        # Build cholesky factor from inputs
+        L = fill_triangular_matrix(inputs, positive_diag=True)
+
+        # diagonal_mask = keras.ops.identity(L.shape[-1]) > 0
+        # L[..., diagonal_mask] = keras.activations.softplus(L[..., diagonal_mask])
+        # L += keras.ops.identity(L.shape[-1]) * 2
+        # L *= keras.ops.sign(keras.ops.diagonal(L, axis1=-1))[..., None]  # ensure positive diagonal entries
+
+        # calculate positive definite matrix from cholesky factors
+        psd = keras.ops.matmul(
+            L,
+            keras.ops.moveaxis(L, -2, -1),  # L transposed
+        )
+        return psd
+
+    def compute_output_shape(self, input_shape):
+        m = input_shape[-1]
+        n = int((0.25 + 2.0 * m) ** 0.5 - 0.5)
+        return input_shape[:-1] + (n, n)
+
+    def compute_input_shape(self, output_shape):
+        """
+        Returns the shape of parameterization of a cholesky factor triangular matrix.
+
+        There are m nonzero elements of a lower triangular nxn matrix with m = n * (n + 1) / 2.
+
+        Example
+        -------
+        >>> PositiveDefinite().compute_output_shape((None, 3, 3))
+        6
+        """
+        n = output_shape[-1]
+        m = int(n * (n + 1) / 2)
+        return output_shape[:-2] + (m,)
diff --git a/bayesflow/links/positive_semi_definite.py b/bayesflow/links/positive_semi_definite.py
deleted file mode 100644
index a056fc3c3..000000000
--- a/bayesflow/links/positive_semi_definite.py
+++ /dev/null
@@ -1,20 +0,0 @@
-import keras
-from keras.saving import register_keras_serializable as serializable
-
-from bayesflow.types import Tensor
-from bayesflow.utils import keras_kwargs
-
-
-@serializable(package="bayesflow.links")
-class PositiveSemiDefinite(keras.Layer):
-    """Activation function to link from any square matrix to a positive semidefinite matrix."""
-
-    def __init__(self, **kwargs):
-        super().__init__(**keras_kwargs(kwargs))
-
-    def call(self, inputs: Tensor) -> Tensor:
-        # multiply M * M^T to get symmetric matrix
-        return keras.ops.einsum("...ij,...kj->...ik", inputs, inputs)
-
-    def compute_output_shape(self, input_shape):
-        return input_shape
diff --git a/bayesflow/scores/multivariate_normal_score.py b/bayesflow/scores/multivariate_normal_score.py
index 66153fd34..efe560388 100644
--- a/bayesflow/scores/multivariate_normal_score.py
+++ b/bayesflow/scores/multivariate_normal_score.py
@@ -4,7 +4,7 @@
 from keras.saving import register_keras_serializable as serializable
 
 from bayesflow.types import Shape, Tensor
-from bayesflow.links import PositiveSemiDefinite
+from bayesflow.links import PositiveDefinite
 from bayesflow.utils import logging
 
 from .parametric_distribution_score import ParametricDistributionScore
@@ -21,7 +21,11 @@ def __init__(self, dim: int = None, links: dict = None, **kwargs):
         super().__init__(links=links, **kwargs)
 
         self.dim = dim
-        self.links = links or {"covariance": PositiveSemiDefinite()}
+        self.links = links or {"covariance": PositiveDefinite()}
+
+        # mark head for covariance matrix as an exception for adapter transformations
+        self.not_transforming_like_vector = ["covariance"]
+
         self.config = {"dim": dim}
 
         logging.warning("MultivariateNormalScore is unstable.")
@@ -60,12 +64,12 @@ def log_prob(self, x: Tensor, mean: Tensor, covariance: Tensor) -> Tensor:
             A tensor containing the log probability densities for each sample in `x` under the
             given Gaussian distribution.
         """
-        diff = x[:, None, :] - mean
-        inv_covariance = keras.ops.inv(covariance)
+        diff = x - mean
+        precision = keras.ops.inv(covariance)
         log_det_covariance = keras.ops.slogdet(covariance)[1]  # Only take the log of the determinant part
 
         # Compute the quadratic term in the exponential of the multivariate Gaussian
-        quadratic_term = keras.ops.einsum("...i,...ij,...j->...", diff, inv_covariance, diff)
+        quadratic_term = keras.ops.einsum("...i,...ij,...j->...", diff, precision, diff)
 
         # Compute the log probability density
         log_prob = -0.5 * (self.dim * keras.ops.log(2 * math.pi) + log_det_covariance + quadratic_term)
diff --git a/bayesflow/scores/parametric_distribution_score.py b/bayesflow/scores/parametric_distribution_score.py
index 51cef1776..17806ef16 100644
--- a/bayesflow/scores/parametric_distribution_score.py
+++ b/bayesflow/scores/parametric_distribution_score.py
@@ -51,5 +51,4 @@ def score(self, estimates: dict[str, Tensor], targets: Tensor, weights: Tensor =
         """
         scores = -self.log_prob(x=targets, **estimates)
         score = self.aggregate(scores, weights)
-        # multipy to mitigate instability due to relatively high values of parametric score
-        return score * 0.01
+        return score
diff --git a/bayesflow/scores/scoring_rule.py b/bayesflow/scores/scoring_rule.py
index ef0645cc1..b16d14ac3 100644
--- a/bayesflow/scores/scoring_rule.py
+++ b/bayesflow/scores/scoring_rule.py
@@ -29,6 +29,8 @@ def __init__(
         self.subnets_kwargs = subnets_kwargs or {}
         self.links = links or {}
 
+        self.not_transforming_like_vector = []
+
         self.config = {"subnets_kwargs": self.subnets_kwargs}
 
     def get_config(self):
@@ -95,14 +97,14 @@ def get_link(self, key: str) -> keras.Layer:
         else:
             return self.links[key]
 
-    def get_head(self, key: str, shape: Shape) -> keras.Sequential:
+    def get_head(self, key: str, output_shape: Shape) -> keras.Sequential:
         """For a specified head key and shape, request corresponding head network.
 
         Parameters
         ----------
         key : str
             Name of head for which to request a link.
-        shape: Shape
+        output_shape: Shape
             The necessary shape for the point estimators.
 
         Returns
@@ -111,10 +113,19 @@ def get_head(self, key: str, shape: Shape) -> keras.Sequential:
             Head network consisting of a learnable projection, a reshape and a link operation
             to parameterize estimates.
         """
-        subnet = self.get_subnet(key)
-        dense = keras.layers.Dense(units=math.prod(shape))
-        reshape = keras.layers.Reshape(target_shape=shape)
+        # initialize head components back to front
         link = self.get_link(key)
+
+        # link input shape can differ from output shape
+        if hasattr(link, "compute_input_shape"):
+            link_input_shape = link.compute_input_shape(output_shape)
+        else:
+            link_input_shape = output_shape
+
+        reshape = keras.layers.Reshape(target_shape=link_input_shape)
+        dense = keras.layers.Dense(units=math.prod(link_input_shape))
+        subnet = self.get_subnet(key)
+
         return keras.Sequential([subnet, dense, reshape, link])
 
     def score(self, estimates: dict[str, Tensor], targets: Tensor, weights: Tensor) -> Tensor:
diff --git a/bayesflow/utils/__init__.py b/bayesflow/utils/__init__.py
index ecb546eae..1eeb2d354 100644
--- a/bayesflow/utils/__init__.py
+++ b/bayesflow/utils/__init__.py
@@ -66,6 +66,7 @@
     tile_axis,
     tree_concatenate,
     tree_stack,
+    fill_triangular_matrix,
 )
 from .validators import check_lengths_same
 from .workflow_utils import find_inference_network, find_summary_network
diff --git a/bayesflow/utils/tensor_utils.py b/bayesflow/utils/tensor_utils.py
index b65df49a7..9287dd982 100644
--- a/bayesflow/utils/tensor_utils.py
+++ b/bayesflow/utils/tensor_utils.py
@@ -277,3 +277,80 @@ def stack(*items):
             return keras.ops.stack(items, axis=axis)
 
     return keras.tree.map_structure(stack, *structures)
+
+
+def fill_triangular_matrix(x: Tensor, upper: bool = False, positive_diag: bool = False):
+    """
+    Reshapes a batch of matrix entries into a triangular matrix (either upper or lower).
+
+    Note: If final axis has length 1, this simply reshapes to (batch_size, 1, 1) and optionally applies softplus.
+
+    Parameters
+    ----------
+    x : Tensor of shape (batch_size, m)
+        Batch of flattened nonzero matrix elements for triangular matrix.
+    upper : bool
+        Return upper triangular matrix if True, else lower triangular matrix. Default is False.
+    positive_diag : bool
+        Whether to apply a softplus operation to diagonal elements. Default is False.
+
+    Returns
+    -------
+    Tensor of shape (batch_size, n, n)
+        Batch of triangular matrices with m = n * (n + 1) / 2 unique nonzero elements.
+
+    Raises
+    ------
+    ValueError
+        If provided nonzero elements do not correspond to possible triangular matrix shape
+        (n,n) with n = sqrt( 1/4 + 2 * m) - 1/2 due to m = n * (n + 1) / 2.
+    """
+    batch_shape = x.shape[:-1]
+    m = x.shape[-1]
+
+    if m == 1:
+        y = keras.ops.reshape(x, (-1, 1, 1))
+        if positive_diag:
+            y = keras.activations.softplus(y)
+        return y
+
+    # Calculate matrix shape
+    n = (0.25 + 2 * m) ** 0.5 - 0.5
+    if not np.isclose(np.floor(n), n):
+        raise ValueError(f"Input right-most shape ({m}) does not correspond to a triangular matrix.")
+    else:
+        n = int(n)
+
+    # Trick: Create triangular matrix by concatenating with a flipped version of its tail, then reshape.
+    x_tail = keras.ops.take(x, indices=list(range((m - (n**2 - m)), x.shape[-1])), axis=-1)
+    if not upper:
+        y = keras.ops.concatenate([x_tail, keras.ops.flip(x, axis=-1)], axis=len(batch_shape))
+        y = keras.ops.reshape(y, (-1, n, n))
+        y = keras.ops.tril(y)  # TODO: fails with tensorflow
+
+        if positive_diag:
+            y_offdiag = keras.ops.tril(y, k=-1)
+            y_diag = keras.ops.tril(
+                keras.ops.triu(  # carve out diagonal, by setting upper and lower offdiagonals to zero
+                    keras.activations.softplus(y)
+                ),  # apply softplus to enforce positivity
+            )
+            y = y_diag + y_offdiag
+
+    else:
+        y = keras.ops.concatenate([x, keras.ops.flip(x_tail, axis=-1)], axis=len(batch_shape))
+        y = keras.ops.reshape(y, (-1, n, n))
+        y = keras.ops.triu(
+            y,
+        )
+
+        if positive_diag:
+            y_offdiag = keras.ops.triu(y, k=1)
+            y_diag = keras.ops.tril(
+                keras.ops.triu(  # carve out diagonal, by setting upper and lower offdiagonals to zero
+                    keras.activations.softplus(y)
+                ),  # apply softplus to enforce positivity
+            )
+            y = y_diag + y_offdiag
+
+    return y
diff --git a/tests/test_links/conftest.py b/tests/test_links/conftest.py
index 8beb0bece..53e9eeac8 100644
--- a/tests/test_links/conftest.py
+++ b/tests/test_links/conftest.py
@@ -15,7 +15,7 @@ def num_variables():
 
 @pytest.fixture()
 def generic_preactivation(batch_size):
-    return keras.ops.ones((batch_size, 4, 4))
+    return keras.ops.ones((batch_size, 6))
 
 
 @pytest.fixture()
@@ -33,10 +33,10 @@ def ordered_quantiles():
 
 
 @pytest.fixture()
-def positive_semi_definite():
-    from bayesflow.links import PositiveSemiDefinite
+def positive_definite():
+    from bayesflow.links import PositiveDefinite
 
-    return PositiveSemiDefinite()
+    return PositiveDefinite()
 
 
 @pytest.fixture()
@@ -44,7 +44,7 @@ def linear():
     return keras.layers.Activation("linear")
 
 
-@pytest.fixture(params=["ordered", "ordered_quantiles", "positive_semi_definite", "linear"], scope="function")
+@pytest.fixture(params=["ordered", "ordered_quantiles", "positive_definite", "linear"], scope="function")
 def link(request):
     return request.getfixturevalue(request.param)
 
@@ -84,6 +84,6 @@ def unordered(batch_size, num_quantiles, num_variables):
     return keras.random.normal((batch_size, num_quantiles, num_variables))
 
 
-@pytest.fixture()
-def random_matrix_batch(batch_size, num_variables):
-    return keras.random.normal((batch_size, num_variables, num_variables))
+# @pytest.fixture()
+# def random_matrix_batch(batch_size, num_variables):
+#     return keras.random.normal((batch_size, num_variables, num_variables))
diff --git a/tests/test_links/test_links.py b/tests/test_links/test_links.py
index b0ea22242..aed79b988 100644
--- a/tests/test_links/test_links.py
+++ b/tests/test_links/test_links.py
@@ -3,13 +3,6 @@
 import pytest
 
 
-def test_link_output(link, generic_preactivation):
-    output_shape = link.compute_output_shape(generic_preactivation.shape)
-    output = link(generic_preactivation)
-
-    assert output_shape == output.shape
-
-
 def test_invalid_shape_for_ordered_quantiles(ordered_quantiles, batch_size, num_quantiles, num_variables):
     with pytest.raises(AssertionError) as excinfo:
         ordered_quantiles.build((batch_size, batch_size, num_quantiles, num_variables))
@@ -59,16 +52,17 @@ def test_quantile_ordering(quantiles, unordered):
     check_ordering(output, axis)
 
 
-def test_positive_semi_definite(random_matrix_batch):
-    from bayesflow.links import PositiveSemiDefinite
-
-    activation = PositiveSemiDefinite()
-
-    output = activation(random_matrix_batch)
+def test_positive_definite(positive_definite, batch_size, num_variables):
+    psd = positive_definite
+    input_shape = psd.compute_input_shape((batch_size, num_variables, num_variables))
+    print(input_shape)
+    random_preactivation = keras.random.normal(input_shape, seed=12)
+    output = psd(random_preactivation)
 
     output = keras.ops.convert_to_numpy(output)
     eigenvalues = np.linalg.eig(output).eigenvalues
 
     assert np.all(eigenvalues.real > 0) and np.all(np.isclose(eigenvalues.imag, 0)), (
-        f"output is not positive semi-definite: real={eigenvalues.real}, imag={eigenvalues.imag}"
+        f"output is not positive definite: min(real)={np.min(eigenvalues.real)}, "
+        f"max(abs(imag))={np.max(np.abs(eigenvalues.imag))}"
     )
diff --git a/tests/test_scores/test_scores.py b/tests/test_scores/test_scores.py
index 24765688a..73305961d 100644
--- a/tests/test_scores/test_scores.py
+++ b/tests/test_scores/test_scores.py
@@ -13,15 +13,21 @@ def test_require_argument_k():
 
 def test_score_output(scoring_rule, random_conditions):
     if random_conditions is None:
-        random_conditions = keras.ops.convert_to_tensor([[1.0]])
+        random_conditions = keras.ops.convert_to_tensor([[1.0, 1.0]])
 
     # Using random random_conditions also as targets for the purpose of this test.
     head_shapes = scoring_rule.get_head_shapes_from_target_shape(random_conditions.shape)
     print(scoring_rule.get_config())
-    estimates = {
-        k: scoring_rule.get_link(k)(keras.random.normal((random_conditions.shape[0],) + head_shape))
-        for k, head_shape in head_shapes.items()
-    }
+    estimates = {}
+    for key, output_shape in head_shapes.items():
+        link = scoring_rule.get_link(key)
+        if hasattr(link, "compute_input_shape"):
+            link_input_shape = link.compute_input_shape(output_shape)
+        else:
+            link_input_shape = output_shape
+        dummy_input = keras.random.normal((random_conditions.shape[0],) + link_input_shape)
+        estimates[key] = link(dummy_input)
+
     score = scoring_rule.score(estimates, random_conditions)
 
     assert score.ndim == 0

From 84ed002a24861e20f8d40fe464f9f4de7566cd69 Mon Sep 17 00:00:00 2001
From: han-ol <g@hans.olischlaeger.com>
Date: Thu, 27 Mar 2025 16:44:53 +0100
Subject: [PATCH 02/20] Fix format string

---
 bayesflow/approximators/point_approximator.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bayesflow/approximators/point_approximator.py b/bayesflow/approximators/point_approximator.py
index 6fb5bdd14..0eeb84112 100644
--- a/bayesflow/approximators/point_approximator.py
+++ b/bayesflow/approximators/point_approximator.py
@@ -134,7 +134,7 @@ def _apply_inverse_adapter_to_estimates(
                 if head_key in self.inference_network.scores[score_key].not_transforming_like_vector:
                     logging.warning(
                         f"Estimate '{score_key}.{head_key}' is marked to not transform like a vector. "
-                        "It was treated like a vector by the adapter. Handle '{head_key}' estimates with care."
+                        f"It was treated like a vector by the adapter. Handle '{head_key}' estimates with care."
                     )
 
                 adapted = self.adapter(

From fbc01f573431d6104e6029d0393edd81db43ef90 Mon Sep 17 00:00:00 2001
From: han-ol <g@hans.olischlaeger.com>
Date: Fri, 28 Mar 2025 16:45:25 +0100
Subject: [PATCH 03/20] Test for invertibility of positive definite link output

---
 tests/test_links/test_links.py | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/tests/test_links/test_links.py b/tests/test_links/test_links.py
index aed79b988..411ed2cc2 100644
--- a/tests/test_links/test_links.py
+++ b/tests/test_links/test_links.py
@@ -53,12 +53,16 @@ def test_quantile_ordering(quantiles, unordered):
 
 
 def test_positive_definite(positive_definite, batch_size, num_variables):
-    psd = positive_definite
-    input_shape = psd.compute_input_shape((batch_size, num_variables, num_variables))
-    print(input_shape)
-    random_preactivation = keras.random.normal(input_shape, seed=12)
-    output = psd(random_preactivation)
+    input_shape = positive_definite.compute_input_shape((batch_size, num_variables, num_variables))
 
+    # Too strongly negative values lead to numerical instabilities -> reduce scale
+    random_preactivation = keras.random.normal(input_shape) * 0.1
+    output = positive_definite(random_preactivation)
+
+    # Check if output is invertible
+    np.linalg.inv(output)
+
+    # Calculated eigenvalues to test for positive definiteness
     output = keras.ops.convert_to_numpy(output)
     eigenvalues = np.linalg.eig(output).eigenvalues
 

From eebf9508e508dd5016375b01e1392458761767d4 Mon Sep 17 00:00:00 2001
From: han-ol <g@hans.olischlaeger.com>
Date: Fri, 28 Mar 2025 16:48:01 +0100
Subject: [PATCH 04/20] Allow estimation of univariate MVN

---
 bayesflow/networks/point_inference_network.py | 2 +-
 bayesflow/scores/multivariate_normal_score.py | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/bayesflow/networks/point_inference_network.py b/bayesflow/networks/point_inference_network.py
index a5447c501..0c083c8c7 100644
--- a/bayesflow/networks/point_inference_network.py
+++ b/bayesflow/networks/point_inference_network.py
@@ -132,7 +132,7 @@ def call(
         if xz is None and not self.built:
             raise ValueError("Cannot build inference network without inference variables.")
         if conditions is None:  # unconditional estimation uses a fixed input vector
-            conditions = keras.ops.convert_to_tensor([[1.0]], dtype=keras.ops.dtype(xz))
+            conditions = keras.ops.convert_to_tensor([[1.0]])
 
         # pass conditions to the shared subnet
         output = self.subnet(conditions, training=training)
diff --git a/bayesflow/scores/multivariate_normal_score.py b/bayesflow/scores/multivariate_normal_score.py
index efe560388..b1f6ff707 100644
--- a/bayesflow/scores/multivariate_normal_score.py
+++ b/bayesflow/scores/multivariate_normal_score.py
@@ -101,6 +101,8 @@ def sample(self, batch_shape: Shape, mean: Tensor, covariance: Tensor) -> Tensor
         Tensor
             A tensor of shape (batch_size, num_samples, D) containing the generated samples.
         """
+        if len(batch_shape) == 1:
+            batch_shape = (1,) + batch_shape
         batch_size, num_samples = batch_shape
         dim = keras.ops.shape(mean)[-1]
         if keras.ops.shape(mean) != (batch_size, dim):

From 42c6806b75770a1e93278e2018cfc3094a8acc97 Mon Sep 17 00:00:00 2001
From: han-ol <g@hans.olischlaeger.com>
Date: Fri, 28 Mar 2025 16:57:03 +0100
Subject: [PATCH 05/20] Remove commented lines

---
 bayesflow/links/positive_definite.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/bayesflow/links/positive_definite.py b/bayesflow/links/positive_definite.py
index d676ac665..cdcb27034 100644
--- a/bayesflow/links/positive_definite.py
+++ b/bayesflow/links/positive_definite.py
@@ -19,11 +19,6 @@ def call(self, inputs: Tensor) -> Tensor:
         # Build cholesky factor from inputs
         L = fill_triangular_matrix(inputs, positive_diag=True)
 
-        # diagonal_mask = keras.ops.identity(L.shape[-1]) > 0
-        # L[..., diagonal_mask] = keras.activations.softplus(L[..., diagonal_mask])
-        # L += keras.ops.identity(L.shape[-1]) * 2
-        # L *= keras.ops.sign(keras.ops.diagonal(L, axis1=-1))[..., None]  # ensure positive diagonal entries
-
         # calculate positive definite matrix from cholesky factors
         psd = keras.ops.matmul(
             L,

From d57970ac901caba2c390ae3de5112ea56e44b953 Mon Sep 17 00:00:00 2001
From: han-ol <g@hans.olischlaeger.com>
Date: Fri, 28 Mar 2025 17:00:53 +0100
Subject: [PATCH 06/20] Minor changes to comments and docstring for
 fill_triangular_matrix

---
 bayesflow/utils/tensor_utils.py | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/bayesflow/utils/tensor_utils.py b/bayesflow/utils/tensor_utils.py
index 9287dd982..0dd73b67e 100644
--- a/bayesflow/utils/tensor_utils.py
+++ b/bayesflow/utils/tensor_utils.py
@@ -281,7 +281,7 @@ def stack(*items):
 
 def fill_triangular_matrix(x: Tensor, upper: bool = False, positive_diag: bool = False):
     """
-    Reshapes a batch of matrix entries into a triangular matrix (either upper or lower).
+    Reshapes a batch of matrix elements into a triangular matrix (either upper or lower).
 
     Note: If final axis has length 1, this simply reshapes to (batch_size, 1, 1) and optionally applies softplus.
 
@@ -326,14 +326,13 @@ def fill_triangular_matrix(x: Tensor, upper: bool = False, positive_diag: bool =
     if not upper:
         y = keras.ops.concatenate([x_tail, keras.ops.flip(x, axis=-1)], axis=len(batch_shape))
         y = keras.ops.reshape(y, (-1, n, n))
-        y = keras.ops.tril(y)  # TODO: fails with tensorflow
+        y = keras.ops.tril(y)
 
         if positive_diag:
             y_offdiag = keras.ops.tril(y, k=-1)
+            # carve out diagonal, by setting upper and lower offdiagonals to zero
             y_diag = keras.ops.tril(
-                keras.ops.triu(  # carve out diagonal, by setting upper and lower offdiagonals to zero
-                    keras.activations.softplus(y)
-                ),  # apply softplus to enforce positivity
+                keras.ops.triu(keras.activations.softplus(y)),  # apply softplus to enforce positivity
             )
             y = y_diag + y_offdiag
 
@@ -346,10 +345,9 @@ def fill_triangular_matrix(x: Tensor, upper: bool = False, positive_diag: bool =
 
         if positive_diag:
             y_offdiag = keras.ops.triu(y, k=1)
+            # carve out diagonal, by setting upper and lower offdiagonals to zero
             y_diag = keras.ops.tril(
-                keras.ops.triu(  # carve out diagonal, by setting upper and lower offdiagonals to zero
-                    keras.activations.softplus(y)
-                ),  # apply softplus to enforce positivity
+                keras.ops.triu(keras.activations.softplus(y)),  # apply softplus to enforce positivity
             )
             y = y_diag + y_offdiag
 

From ddfdbdce1a21960acd28ce2602e1b08730ed20ff Mon Sep 17 00:00:00 2001
From: han-ol <g@hans.olischlaeger.com>
Date: Fri, 28 Mar 2025 17:21:53 +0100
Subject: [PATCH 07/20] Test coverage for unconditional MVNScore.sample

---
 tests/test_scores/test_scores.py | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/tests/test_scores/test_scores.py b/tests/test_scores/test_scores.py
index 73305961d..4e44c2ef7 100644
--- a/tests/test_scores/test_scores.py
+++ b/tests/test_scores/test_scores.py
@@ -46,3 +46,17 @@ def test_mean_score_optimality(mean_score, random_conditions):
 
     assert suboptimal_score > optimal_score
     assert keras.ops.isclose(optimal_score, 0)
+
+
+def test_unconditional_mvn(multivariate_normal_score):
+    mean = keras.ops.convert_to_tensor([[0.0, 1.0]])
+    covariance = keras.ops.convert_to_tensor([[[1.0, 0.0], [0.0, 1.0]]])
+    multivariate_normal_score.sample((10,), mean, covariance)
+
+
+def test_unconditional_mvn_value_error(multivariate_normal_score):
+    mean = keras.ops.convert_to_tensor([0.0, 1.0])
+    covariance = keras.ops.convert_to_tensor([[1.0, 0.0], [0.0, 1.0]])
+
+    with pytest.raises(ValueError):
+        multivariate_normal_score.sample((10,), mean, covariance)

From 2b38c211221359b7e061e557475107b0fe81a9ba Mon Sep 17 00:00:00 2001
From: han-ol <g@hans.olischlaeger.com>
Date: Fri, 28 Mar 2025 18:39:56 +0100
Subject: [PATCH 08/20] Remove instability warning MultivariateNormalScore

---
 bayesflow/scores/multivariate_normal_score.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/bayesflow/scores/multivariate_normal_score.py b/bayesflow/scores/multivariate_normal_score.py
index b1f6ff707..e49114081 100644
--- a/bayesflow/scores/multivariate_normal_score.py
+++ b/bayesflow/scores/multivariate_normal_score.py
@@ -5,7 +5,6 @@
 
 from bayesflow.types import Shape, Tensor
 from bayesflow.links import PositiveDefinite
-from bayesflow.utils import logging
 
 from .parametric_distribution_score import ParametricDistributionScore
 
@@ -28,8 +27,6 @@ def __init__(self, dim: int = None, links: dict = None, **kwargs):
 
         self.config = {"dim": dim}
 
-        logging.warning("MultivariateNormalScore is unstable.")
-
     def get_config(self):
         base_config = super().get_config()
         return base_config | self.config

From 1405ee58811056fb40846a7c45fe1dd8d6ad7791 Mon Sep 17 00:00:00 2001
From: han-ol <g@hans.olischlaeger.com>
Date: Mon, 31 Mar 2025 16:32:47 +0200
Subject: [PATCH 09/20] Remove commented numpy import

---
 bayesflow/links/positive_definite.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/bayesflow/links/positive_definite.py b/bayesflow/links/positive_definite.py
index cdcb27034..616f9080d 100644
--- a/bayesflow/links/positive_definite.py
+++ b/bayesflow/links/positive_definite.py
@@ -1,6 +1,5 @@
 import keras
 
-# import numpy as np
 from keras.saving import register_keras_serializable as serializable
 
 from bayesflow.types import Tensor

From f1e1ba1834a0f1f9cbfcbad4c56a4f870ced9756 Mon Sep 17 00:00:00 2001
From: han-ol <g@hans.olischlaeger.com>
Date: Mon, 31 Mar 2025 16:34:49 +0200
Subject: [PATCH 10/20] Fix dtype of dummy conditions if inference variables
 are available

---
 bayesflow/networks/point_inference_network.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/bayesflow/networks/point_inference_network.py b/bayesflow/networks/point_inference_network.py
index 0c083c8c7..05ad467fe 100644
--- a/bayesflow/networks/point_inference_network.py
+++ b/bayesflow/networks/point_inference_network.py
@@ -132,7 +132,9 @@ def call(
         if xz is None and not self.built:
             raise ValueError("Cannot build inference network without inference variables.")
         if conditions is None:  # unconditional estimation uses a fixed input vector
-            conditions = keras.ops.convert_to_tensor([[1.0]])
+            conditions = keras.ops.convert_to_tensor(
+                [[1.0]], dtype=keras.ops.dtype(xz) if xz is not None else "float32"
+            )
 
         # pass conditions to the shared subnet
         output = self.subnet(conditions, training=training)

From 9d8765631b8037108b26d21efdbd139bed0b148a Mon Sep 17 00:00:00 2001
From: han-ol <g@hans.olischlaeger.com>
Date: Mon, 31 Mar 2025 16:47:48 +0200
Subject: [PATCH 11/20] Tuple conversion in case batch_shape is a list

---
 bayesflow/scores/multivariate_normal_score.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bayesflow/scores/multivariate_normal_score.py b/bayesflow/scores/multivariate_normal_score.py
index e49114081..318fbec0a 100644
--- a/bayesflow/scores/multivariate_normal_score.py
+++ b/bayesflow/scores/multivariate_normal_score.py
@@ -99,7 +99,7 @@ def sample(self, batch_shape: Shape, mean: Tensor, covariance: Tensor) -> Tensor
             A tensor of shape (batch_size, num_samples, D) containing the generated samples.
         """
         if len(batch_shape) == 1:
-            batch_shape = (1,) + batch_shape
+            batch_shape = (1,) + tuple(batch_shape)
         batch_size, num_samples = batch_shape
         dim = keras.ops.shape(mean)[-1]
         if keras.ops.shape(mean) != (batch_size, dim):

From 4bbbffafc47ca3b506d971be376223f4ba66aa43 Mon Sep 17 00:00:00 2001
From: han-ol <g@hans.olischlaeger.com>
Date: Mon, 31 Mar 2025 17:05:48 +0200
Subject: [PATCH 12/20] Conversion to numpy before calling numpy operations

---
 tests/test_links/test_links.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_links/test_links.py b/tests/test_links/test_links.py
index 411ed2cc2..ad1be5753 100644
--- a/tests/test_links/test_links.py
+++ b/tests/test_links/test_links.py
@@ -58,12 +58,12 @@ def test_positive_definite(positive_definite, batch_size, num_variables):
     # Too strongly negative values lead to numerical instabilities -> reduce scale
     random_preactivation = keras.random.normal(input_shape) * 0.1
     output = positive_definite(random_preactivation)
+    output = keras.ops.convert_to_numpy(output)
 
     # Check if output is invertible
     np.linalg.inv(output)
 
     # Calculated eigenvalues to test for positive definiteness
-    output = keras.ops.convert_to_numpy(output)
     eigenvalues = np.linalg.eig(output).eigenvalues
 
     assert np.all(eigenvalues.real > 0) and np.all(np.isclose(eigenvalues.imag, 0)), (

From fe201aa15d675bbf1cd3df68c4171737f2e1088e Mon Sep 17 00:00:00 2001
From: han-ol <g@hans.olischlaeger.com>
Date: Mon, 31 Mar 2025 17:37:29 +0200
Subject: [PATCH 13/20] More detailed docs and renamed the transformation
 warning attribute

---
 bayesflow/approximators/point_approximator.py |  2 +-
 bayesflow/scores/scoring_rule.py              | 32 ++++++++++++++++---
 2 files changed, 29 insertions(+), 5 deletions(-)

diff --git a/bayesflow/approximators/point_approximator.py b/bayesflow/approximators/point_approximator.py
index 0eeb84112..9c940241e 100644
--- a/bayesflow/approximators/point_approximator.py
+++ b/bayesflow/approximators/point_approximator.py
@@ -131,7 +131,7 @@ def _apply_inverse_adapter_to_estimates(
         for score_key, score_val in estimates.items():
             processed[score_key] = {}
             for head_key, estimate in score_val.items():
-                if head_key in self.inference_network.scores[score_key].not_transforming_like_vector:
+                if head_key in self.inference_network.scores[score_key].not_transforming_like_vector_warning:
                     logging.warning(
                         f"Estimate '{score_key}.{head_key}' is marked to not transform like a vector. "
                         f"It was treated like a vector by the adapter. Handle '{head_key}' estimates with care."
diff --git a/bayesflow/scores/scoring_rule.py b/bayesflow/scores/scoring_rule.py
index b16d14ac3..384955e72 100644
--- a/bayesflow/scores/scoring_rule.py
+++ b/bayesflow/scores/scoring_rule.py
@@ -17,6 +17,12 @@ class ScoringRule:
 
     To define a custom ``ScoringRule``, inherit from this class and overwrite the score method.
     For proper serialization, any new constructor arguments must be taken care of in a `get_config` method.
+
+    Estimates are typically parameterized by projection heads consisting of a neural network component
+    and a link to project into the correct output space.
+
+    `ScoringRule`s can score estimates consisting of multiple parts. See `MultivariateNormalScore` for an example
+    of a `ParametricDistributionScore`. The score evaluates an estimated mean and covariance simultaneously.
     """
 
     def __init__(
@@ -29,7 +35,12 @@ def __init__(
         self.subnets_kwargs = subnets_kwargs or {}
         self.links = links or {}
 
-        self.not_transforming_like_vector = []
+        # Prediction heads can output estimates in spaces other than the target distribution space.
+        # To such estimates the adapter cannot be straightforwardly applied in inverse direction,
+        # because the adapter is built to map vectors. When subclassing `ScoringRule`, add the names
+        # of such heads to the following list to warn users about difficulties with a type of estimate
+        # whenever the adapter is applied to them in inverse direction.
+        self.not_transforming_like_vector_warning = []
 
         self.config = {"subnets_kwargs": self.subnets_kwargs}
 
@@ -60,12 +71,15 @@ def get_head_shapes_from_target_shape(self, target_shape: Shape) -> dict[str, Sh
 
     def get_subnet(self, key: str) -> keras.Layer:
         """For a specified key, request a subnet to be used for projecting the shared condition embedding
-        before reshaping to the heads output shape.
+        before further projection and reshaping to the heads output shape.
+
+        If no subnet was specified for the key (e.g. upon initialization),
+        return just an instance of keras.layers.Identity.
 
         Parameters
         ----------
         key : str
-            Name of head for which to request a link.
+            Name of head for which to request a subnet.
 
         Returns
         -------
@@ -80,6 +94,8 @@ def get_subnet(self, key: str) -> keras.Layer:
     def get_link(self, key: str) -> keras.Layer:
         """For a specified key, request a link from network output to estimation target.
 
+        If no link was specified for the key (e.g. upon initialization), return a linear activation.
+
         Parameters
         ----------
         key : str
@@ -98,7 +114,15 @@ def get_link(self, key: str) -> keras.Layer:
             return self.links[key]
 
     def get_head(self, key: str, output_shape: Shape) -> keras.Sequential:
-        """For a specified head key and shape, request corresponding head network.
+        """For a specified head key and output shape, request corresponding head network.
+
+        A head network has the following components that are called sequentially:
+        1. subnet: A keras.Layer.
+        2. dense: A trainable linear projection with as many units as are required by the next component.
+        3. reshape: Changes shape of output of projection to match requirements of next component.
+        4. link: Transforms unconstrained values into a constrained space for the final estimator.
+
+        This method initializes the components in reverse order to meet all requirements and returns them.
 
         Parameters
         ----------

From 02ea22ce528292567cf9588801ca0388383a96a7 Mon Sep 17 00:00:00 2001
From: han-ol <g@hans.olischlaeger.com>
Date: Tue, 1 Apr 2025 11:59:39 +0200
Subject: [PATCH 14/20] Doc string detail

---
 bayesflow/scores/scoring_rule.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/bayesflow/scores/scoring_rule.py b/bayesflow/scores/scoring_rule.py
index 384955e72..9154675c1 100644
--- a/bayesflow/scores/scoring_rule.py
+++ b/bayesflow/scores/scoring_rule.py
@@ -129,7 +129,8 @@ def get_head(self, key: str, output_shape: Shape) -> keras.Sequential:
         key : str
             Name of head for which to request a link.
         output_shape: Shape
-            The necessary shape for the point estimators.
+            The necessary shape of estimated values for the given key as returned by
+            `scoring_rule.get_head_shapes_from_target_shape()`.
 
         Returns
         -------

From 9b466016595d44cb22a6e821740bb02dd8101fba Mon Sep 17 00:00:00 2001
From: han-ol <g@hans.olischlaeger.com>
Date: Tue, 1 Apr 2025 14:13:46 +0200
Subject: [PATCH 15/20] Remove untested comment for
 PointInferenceNetwork.sample()

---
 bayesflow/networks/point_inference_network.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/bayesflow/networks/point_inference_network.py b/bayesflow/networks/point_inference_network.py
index 05ad467fe..06a10d288 100644
--- a/bayesflow/networks/point_inference_network.py
+++ b/bayesflow/networks/point_inference_network.py
@@ -167,7 +167,6 @@ def compute_metrics(
 
         return metrics | {"loss": neg_score}
 
-    # WIP: untested draft of sample method
     @allow_batch_size
     def sample(self, batch_shape: Shape, conditions: Tensor = None) -> dict[str, Tensor]:
         """

From 5cb8995387325359752ba1ab94b415fca2c30f64 Mon Sep 17 00:00:00 2001
From: han-ol <g@hans.olischlaeger.com>
Date: Tue, 1 Apr 2025 14:39:17 +0200
Subject: [PATCH 16/20] Relax type hints for ContinuousApproximator.log_prob

---
 bayesflow/approximators/continuous_approximator.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/bayesflow/approximators/continuous_approximator.py b/bayesflow/approximators/continuous_approximator.py
index d1d57bb90..eb45f3fc7 100644
--- a/bayesflow/approximators/continuous_approximator.py
+++ b/bayesflow/approximators/continuous_approximator.py
@@ -338,7 +338,7 @@ def _sample(
             **filter_kwargs(kwargs, self.inference_network.sample),
         )
 
-    def log_prob(self, data: dict[str, np.ndarray], **kwargs) -> np.ndarray:
+    def log_prob(self, data: dict[str, np.ndarray], **kwargs) -> np.ndarray | dict(str, np.ndarray):
         """
         Computes the log-probability of given data under the model. The `data` dictionary is preprocessed using the
         `adapter`. Log-probabilities are returned as NumPy arrays.
@@ -358,7 +358,7 @@ def log_prob(self, data: dict[str, np.ndarray], **kwargs) -> np.ndarray:
         data = self.adapter(data, strict=False, stage="inference", **kwargs)
         data = keras.tree.map_structure(keras.ops.convert_to_tensor, data)
         log_prob = self._log_prob(**data, **kwargs)
-        log_prob = keras.ops.convert_to_numpy(log_prob)
+        log_prob = keras.tree.map_structure(keras.ops.convert_to_numpy, log_prob)
 
         return log_prob
 
@@ -368,7 +368,7 @@ def _log_prob(
         inference_conditions: Tensor = None,
         summary_variables: Tensor = None,
         **kwargs,
-    ) -> Tensor:
+    ) -> Tensor | dict(str, Tensor):
         if self.summary_network is None:
             if summary_variables is not None:
                 raise ValueError("Cannot use summary variables without a summary network.")

From 303127d2399faf39d324255bfd544113c9ad6342 Mon Sep 17 00:00:00 2001
From: han-ol <g@hans.olischlaeger.com>
Date: Tue, 1 Apr 2025 14:50:40 +0200
Subject: [PATCH 17/20] Support log-prob in PointApproximator

---
 bayesflow/approximators/point_approximator.py | 46 +++++++++++++++++--
 1 file changed, 42 insertions(+), 4 deletions(-)

diff --git a/bayesflow/approximators/point_approximator.py b/bayesflow/approximators/point_approximator.py
index 9c940241e..7e35153fb 100644
--- a/bayesflow/approximators/point_approximator.py
+++ b/bayesflow/approximators/point_approximator.py
@@ -111,11 +111,47 @@ def sample(
         if split:
             raise NotImplementedError("split=True is currently not supported for `PointApproximator`.")
             samples = split_arrays(samples, axis=-1)
-        # Squeeze samples if there's only one key-value pair.
-        samples = self._squeeze_samples(samples)
+        # Squeeze sample dictionary if there's only one key-value pair.
+        samples = self._squeeze_parametric_score_major_dict(samples)
 
         return samples
 
+    def log_prob(
+        self,
+        *,
+        data: dict[str, np.ndarray],
+        **kwargs,
+    ) -> np.ndarray | dict[str, np.ndarray]:
+        """
+        Computes the log-probability of given data under the parametric distribution(s) for given input conditions.
+
+        Parameters
+        ----------
+        data : dict[str, np.ndarray]
+            A dictionary mapping variable names to arrays representing the inference conditions and variables.
+        **kwargs
+            Additional keyword arguments passed to underlying processing functions.
+
+        Returns
+        -------
+        log_prob : np.ndarray or dict[str, np.ndarray]
+            Log-probabilities of the distribution `p(inference_variables | inference_conditions, h(summary_conditions))`
+            for all parametric scoring rules.
+
+            If only one parametric score is available, output is an array of log-probabilities.
+
+            Output is a dictionary if multiple parametric scores are available.
+            Then, each key is the name of a score and values are corresponding log-probabilities.
+
+
+            Log-probabilities have shape (num_datasets,).
+        """
+        log_prob = super().log_prob(data=data, **kwargs)
+        # Squeeze log probabilities dictionary if there's only one key-value pair.
+        log_prob = self._squeeze_parametric_score_major_dict(log_prob)
+
+        return log_prob
+
     def _prepare_conditions(self, conditions: dict[str, np.ndarray], **kwargs) -> dict[str, Tensor]:
         """Adapts and converts the conditions to tensors."""
         conditions = self.adapter(conditions, strict=False, stage="inference", **kwargs)
@@ -187,8 +223,10 @@ def _squeeze_estimates(
             }
         return squeezed
 
-    def _squeeze_samples(self, samples: dict[str, np.ndarray]) -> np.ndarray or dict[str, np.ndarray]:
-        """Squeezes the samples dictionary to just the value if there is only one key-value pair."""
+    def _squeeze_parametric_score_major_dict(
+        self, samples: dict[str, np.ndarray]
+    ) -> np.ndarray or dict[str, np.ndarray]:
+        """Squeezes the dictionary to just the value if there is only one key-value pair."""
         if len(samples) == 1:
             return next(iter(samples.values()))  # Extract and return the only item's value
         return samples

From 93e88332f7e1334f0520cdd3618dbb9219de3bfd Mon Sep 17 00:00:00 2001
From: han-ol <g@hans.olischlaeger.com>
Date: Tue, 1 Apr 2025 14:51:34 +0200
Subject: [PATCH 18/20] Remove comment stating log prob was untested

---
 bayesflow/networks/point_inference_network.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/bayesflow/networks/point_inference_network.py b/bayesflow/networks/point_inference_network.py
index 06a10d288..7a3ed1628 100644
--- a/bayesflow/networks/point_inference_network.py
+++ b/bayesflow/networks/point_inference_network.py
@@ -200,7 +200,6 @@ def sample(self, batch_shape: Shape, conditions: Tensor = None) -> dict[str, Ten
 
         return samples
 
-    # WIP: untested draft of log_prob method
     def log_prob(self, samples: Tensor, conditions: Tensor = None, **kwargs) -> dict[str, Tensor]:
         output = self.subnet(conditions)
         log_probs = {}

From 7bfacff5baae4c8c7b32eedc0118285b93dffdb9 Mon Sep 17 00:00:00 2001
From: han-ol <g@hans.olischlaeger.com>
Date: Tue, 1 Apr 2025 14:57:43 +0200
Subject: [PATCH 19/20] Fix typo

---
 bayesflow/approximators/continuous_approximator.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/bayesflow/approximators/continuous_approximator.py b/bayesflow/approximators/continuous_approximator.py
index eb45f3fc7..ed92795a7 100644
--- a/bayesflow/approximators/continuous_approximator.py
+++ b/bayesflow/approximators/continuous_approximator.py
@@ -338,7 +338,7 @@ def _sample(
             **filter_kwargs(kwargs, self.inference_network.sample),
         )
 
-    def log_prob(self, data: dict[str, np.ndarray], **kwargs) -> np.ndarray | dict(str, np.ndarray):
+    def log_prob(self, data: dict[str, np.ndarray], **kwargs) -> np.ndarray | dict[str, np.ndarray]:
         """
         Computes the log-probability of given data under the model. The `data` dictionary is preprocessed using the
         `adapter`. Log-probabilities are returned as NumPy arrays.
@@ -368,7 +368,7 @@ def _log_prob(
         inference_conditions: Tensor = None,
         summary_variables: Tensor = None,
         **kwargs,
-    ) -> Tensor | dict(str, Tensor):
+    ) -> Tensor | dict[str, Tensor]:
         if self.summary_network is None:
             if summary_variables is not None:
                 raise ValueError("Cannot use summary variables without a summary network.")

From d87b0b911b6dbab8790dd390ce237738c48c6b1f Mon Sep 17 00:00:00 2001
From: han-ol <g@hans.olischlaeger.com>
Date: Tue, 1 Apr 2025 17:48:17 +0200
Subject: [PATCH 20/20] Transformation warning using a class variable;
 docstring links

---
 bayesflow/approximators/point_approximator.py | 14 ++++----
 bayesflow/scores/multivariate_normal_score.py | 13 +++++--
 bayesflow/scores/scoring_rule.py              | 36 +++++++++++--------
 3 files changed, 39 insertions(+), 24 deletions(-)

diff --git a/bayesflow/approximators/point_approximator.py b/bayesflow/approximators/point_approximator.py
index 7e35153fb..5f1acf193 100644
--- a/bayesflow/approximators/point_approximator.py
+++ b/bayesflow/approximators/point_approximator.py
@@ -14,8 +14,9 @@ class PointApproximator(ContinuousApproximator):
     """
     A workflow for fast amortized point estimation of a conditional distribution.
 
-    The distribution is approximated by point estimators, parameterized by a feed-forward `PointInferenceNetwork`.
-    Conditions can be compressed by an optional `SummaryNetwork` or used directly as input to the inference network.
+    The distribution is approximated by point estimators, parameterized by a feed-forward
+    :class:`bayesflow.networks.PointInferenceNetwork`. Conditions can be compressed by an optional summary network
+    (inheriting from :class:`bayesflow.networks.SummaryNetwork`) or used directly as input to the inference network.
     """
 
     def estimate(
@@ -89,7 +90,7 @@ def sample(
             for the sampling process.
         split : bool, optional
             If True, the sampled arrays are split along the last axis, by default False.
-            Currently not supported for `PointApproximator`.
+            Currently not supported for :class:`PointApproximator` .
         **kwargs
             Additional keyword arguments passed to underlying processing functions.
 
@@ -135,15 +136,14 @@ def log_prob(
         Returns
         -------
         log_prob : np.ndarray or dict[str, np.ndarray]
-            Log-probabilities of the distribution `p(inference_variables | inference_conditions, h(summary_conditions))`
-            for all parametric scoring rules.
+            Log-probabilities of the distribution
+            `p(inference_variables | inference_conditions, h(summary_conditions))` for all parametric scoring rules.
 
             If only one parametric score is available, output is an array of log-probabilities.
 
             Output is a dictionary if multiple parametric scores are available.
             Then, each key is the name of a score and values are corresponding log-probabilities.
 
-
             Log-probabilities have shape (num_datasets,).
         """
         log_prob = super().log_prob(data=data, **kwargs)
@@ -167,7 +167,7 @@ def _apply_inverse_adapter_to_estimates(
         for score_key, score_val in estimates.items():
             processed[score_key] = {}
             for head_key, estimate in score_val.items():
-                if head_key in self.inference_network.scores[score_key].not_transforming_like_vector_warning:
+                if head_key in self.inference_network.scores[score_key].NOT_TRANSFORMING_LIKE_VECTOR_WARNING:
                     logging.warning(
                         f"Estimate '{score_key}.{head_key}' is marked to not transform like a vector. "
                         f"It was treated like a vector by the adapter. Handle '{head_key}' estimates with care."
diff --git a/bayesflow/scores/multivariate_normal_score.py b/bayesflow/scores/multivariate_normal_score.py
index 318fbec0a..90ccfbbf6 100644
--- a/bayesflow/scores/multivariate_normal_score.py
+++ b/bayesflow/scores/multivariate_normal_score.py
@@ -16,15 +16,22 @@ class MultivariateNormalScore(ParametricDistributionScore):
     Scores a predicted mean and covariance matrix with the log-score of the probability of the materialized value.
     """
 
+    NOT_TRANSFORMING_LIKE_VECTOR_WARNING = ("covariance",)
+    """
+    Marks head for covariance matrix as an exception for adapter transformations.
+
+    This variable contains names of prediction heads that should lead to a warning when the adapter is applied
+    in inverse direction to them.
+
+    For more information see :class:`ScoringRule`.
+    """
+
     def __init__(self, dim: int = None, links: dict = None, **kwargs):
         super().__init__(links=links, **kwargs)
 
         self.dim = dim
         self.links = links or {"covariance": PositiveDefinite()}
 
-        # mark head for covariance matrix as an exception for adapter transformations
-        self.not_transforming_like_vector = ["covariance"]
-
         self.config = {"dim": dim}
 
     def get_config(self):
diff --git a/bayesflow/scores/scoring_rule.py b/bayesflow/scores/scoring_rule.py
index 9154675c1..dd671189c 100644
--- a/bayesflow/scores/scoring_rule.py
+++ b/bayesflow/scores/scoring_rule.py
@@ -15,14 +15,27 @@ class ScoringRule:
     when sampling from the true distribution. By minimizing an expected score, estimates with
     different properties can be obtained.
 
-    To define a custom ``ScoringRule``, inherit from this class and overwrite the score method.
+    To define a custom :class:`ScoringRule`, inherit from this class and overwrite the score method.
     For proper serialization, any new constructor arguments must be taken care of in a `get_config` method.
 
     Estimates are typically parameterized by projection heads consisting of a neural network component
     and a link to project into the correct output space.
 
-    `ScoringRule`s can score estimates consisting of multiple parts. See `MultivariateNormalScore` for an example
-    of a `ParametricDistributionScore`. The score evaluates an estimated mean and covariance simultaneously.
+    A :class:`ScoringRule` can score estimates consisting of multiple parts. See :class:`MultivariateNormalScore`
+    for an example of a :class:`ParametricDistributionScore`. That score evaluates an estimated mean
+    and covariance simultaneously.
+    """
+
+    NOT_TRANSFORMING_LIKE_VECTOR_WARNING = tuple()
+    """
+    This variable contains names of prediction heads that should lead to a warning when the adapter is applied
+    in inverse direction to them.
+
+    Prediction heads can output estimates in spaces other than the target distribution space.
+    To such estimates the adapter cannot be straightforwardly applied in inverse direction,
+    because the adapter is built to map vectors from the inference variable space. When subclassing
+    :class:`ScoringRule`, add the names of such heads to the following list to warn users about difficulties
+    with a type of estimate whenever the adapter is applied to them in inverse direction.
     """
 
     def __init__(
@@ -35,13 +48,6 @@ def __init__(
         self.subnets_kwargs = subnets_kwargs or {}
         self.links = links or {}
 
-        # Prediction heads can output estimates in spaces other than the target distribution space.
-        # To such estimates the adapter cannot be straightforwardly applied in inverse direction,
-        # because the adapter is built to map vectors. When subclassing `ScoringRule`, add the names
-        # of such heads to the following list to warn users about difficulties with a type of estimate
-        # whenever the adapter is applied to them in inverse direction.
-        self.not_transforming_like_vector_warning = []
-
         self.config = {"subnets_kwargs": self.subnets_kwargs}
 
     def get_config(self):
@@ -117,10 +123,12 @@ def get_head(self, key: str, output_shape: Shape) -> keras.Sequential:
         """For a specified head key and output shape, request corresponding head network.
 
         A head network has the following components that are called sequentially:
+
         1. subnet: A keras.Layer.
         2. dense: A trainable linear projection with as many units as are required by the next component.
         3. reshape: Changes shape of output of projection to match requirements of next component.
         4. link: Transforms unconstrained values into a constrained space for the final estimator.
+           See :mod:`bayesflow.links` for examples.
 
         This method initializes the components in reverse order to meet all requirements and returns them.
 
@@ -130,7 +138,7 @@ def get_head(self, key: str, output_shape: Shape) -> keras.Sequential:
             Name of head for which to request a link.
         output_shape: Shape
             The necessary shape of estimated values for the given key as returned by
-            `scoring_rule.get_head_shapes_from_target_shape()`.
+            :func:`get_head_shapes_from_target_shape()`.
 
         Returns
         -------
@@ -173,11 +181,11 @@ def score(self, estimates: dict[str, Tensor], targets: Tensor, weights: Tensor)
 
         Examples
         --------
-        The following shows how to score estimates with a ``MeanScore``. All ``ScoringRule`` s follow this pattern,
-        only differing in the structure of the estimates dictionary.
+        The following shows how to score estimates with a :class:`MeanScore`. All :class:`ScoringRule` s
+        follow this pattern, only differing in the structure of the estimates dictionary.
 
         >>> import keras
-        ... from bayesflow.scores import MeanScore
+        >>> from bayesflow.scores import MeanScore
         >>>
         >>> # batch of samples from a normal distribution
         >>> samples = keras.random.normal(shape=(100,))