bayesflow-org · stefanradev93 · Apr 2, 2025 · Mar 27, 2025 · Mar 27, 2025 · Mar 28, 2025
diff --git a/bayesflow/approximators/continuous_approximator.py b/bayesflow/approximators/continuous_approximator.py
@@ -338,7 +338,7 @@ def _sample(
             **filter_kwargs(kwargs, self.inference_network.sample),
         )
 
-    def log_prob(self, data: dict[str, np.ndarray], **kwargs) -> np.ndarray:
+    def log_prob(self, data: dict[str, np.ndarray], **kwargs) -> np.ndarray | dict[str, np.ndarray]:
         """
         Computes the log-probability of given data under the model. The `data` dictionary is preprocessed using the
         `adapter`. Log-probabilities are returned as NumPy arrays.
@@ -358,7 +358,7 @@ def log_prob(self, data: dict[str, np.ndarray], **kwargs) -> np.ndarray:
         data = self.adapter(data, strict=False, stage="inference", **kwargs)
         data = keras.tree.map_structure(keras.ops.convert_to_tensor, data)
         log_prob = self._log_prob(**data, **kwargs)
-        log_prob = keras.ops.convert_to_numpy(log_prob)
+        log_prob = keras.tree.map_structure(keras.ops.convert_to_numpy, log_prob)
 
         return log_prob
 
@@ -368,7 +368,7 @@ def _log_prob(
         inference_conditions: Tensor = None,
         summary_variables: Tensor = None,
         **kwargs,
-    ) -> Tensor:
+    ) -> Tensor | dict[str, Tensor]:
         if self.summary_network is None:
             if summary_variables is not None:
                 raise ValueError("Cannot use summary variables without a summary network.")

diff --git a/bayesflow/approximators/point_approximator.py b/bayesflow/approximators/point_approximator.py
@@ -5,7 +5,7 @@
 )
 
 from bayesflow.types import Tensor
-from bayesflow.utils import filter_kwargs, split_arrays, squeeze_inner_estimates_dict
+from bayesflow.utils import filter_kwargs, split_arrays, squeeze_inner_estimates_dict, logging
 from .continuous_approximator import ContinuousApproximator
 
 
@@ -14,8 +14,9 @@ class PointApproximator(ContinuousApproximator):
     """
     A workflow for fast amortized point estimation of a conditional distribution.
 
-    The distribution is approximated by point estimators, parameterized by a feed-forward `PointInferenceNetwork`.
-    Conditions can be compressed by an optional `SummaryNetwork` or used directly as input to the inference network.
+    The distribution is approximated by point estimators, parameterized by a feed-forward
+    :class:`bayesflow.networks.PointInferenceNetwork`. Conditions can be compressed by an optional summary network
+    (inheriting from :class:`bayesflow.networks.SummaryNetwork`) or used directly as input to the inference network.
     """
 
     def estimate(
@@ -89,7 +90,7 @@ def sample(
             for the sampling process.
         split : bool, optional
             If True, the sampled arrays are split along the last axis, by default False.
-            Currently not supported for `PointApproximator`.
+            Currently not supported for :class:`PointApproximator` .
         **kwargs
             Additional keyword arguments passed to underlying processing functions.
 
@@ -111,14 +112,50 @@ def sample(
         if split:
             raise NotImplementedError("split=True is currently not supported for `PointApproximator`.")
             samples = split_arrays(samples, axis=-1)
-        # Squeeze samples if there's only one key-value pair.
-        samples = self._squeeze_samples(samples)
+        # Squeeze sample dictionary if there's only one key-value pair.
+        samples = self._squeeze_parametric_score_major_dict(samples)
 
         return samples
 
+    def log_prob(
+        self,
+        *,
+        data: dict[str, np.ndarray],
+        **kwargs,
+    ) -> np.ndarray | dict[str, np.ndarray]:
+        """
+        Computes the log-probability of given data under the parametric distribution(s) for given input conditions.
+
+        Parameters
+        ----------
+        data : dict[str, np.ndarray]
+            A dictionary mapping variable names to arrays representing the inference conditions and variables.
+        **kwargs
+            Additional keyword arguments passed to underlying processing functions.
+
+        Returns
+        -------
+        log_prob : np.ndarray or dict[str, np.ndarray]
+            Log-probabilities of the distribution
+            `p(inference_variables | inference_conditions, h(summary_conditions))` for all parametric scoring rules.
+
+            If only one parametric score is available, output is an array of log-probabilities.
+
+            Output is a dictionary if multiple parametric scores are available.
+            Then, each key is the name of a score and values are corresponding log-probabilities.
+
+            Log-probabilities have shape (num_datasets,).
+        """
+        log_prob = super().log_prob(data=data, **kwargs)
+        # Squeeze log probabilities dictionary if there's only one key-value pair.
+        log_prob = self._squeeze_parametric_score_major_dict(log_prob)
+
+        return log_prob
+
     def _prepare_conditions(self, conditions: dict[str, np.ndarray], **kwargs) -> dict[str, Tensor]:
         """Adapts and converts the conditions to tensors."""
         conditions = self.adapter(conditions, strict=False, stage="inference", **kwargs)
+        conditions.pop("inference_variables", None)
         return keras.tree.map_structure(keras.ops.convert_to_tensor, conditions)
 
     def _apply_inverse_adapter_to_estimates(
@@ -130,6 +167,12 @@ def _apply_inverse_adapter_to_estimates(
         for score_key, score_val in estimates.items():
             processed[score_key] = {}
             for head_key, estimate in score_val.items():
+                if head_key in self.inference_network.scores[score_key].NOT_TRANSFORMING_LIKE_VECTOR_WARNING:
+                    logging.warning(
+                        f"Estimate '{score_key}.{head_key}' is marked to not transform like a vector. "
+                        f"It was treated like a vector by the adapter. Handle '{head_key}' estimates with care."
+                    )
+
                 adapted = self.adapter(
                     {"inference_variables": estimate},
                     inverse=True,
@@ -180,8 +223,10 @@ def _squeeze_estimates(
             }
         return squeezed
 
-    def _squeeze_samples(self, samples: dict[str, np.ndarray]) -> np.ndarray or dict[str, np.ndarray]:
-        """Squeezes the samples dictionary to just the value if there is only one key-value pair."""
+    def _squeeze_parametric_score_major_dict(
+        self, samples: dict[str, np.ndarray]
+    ) -> np.ndarray or dict[str, np.ndarray]:
+        """Squeezes the dictionary to just the value if there is only one key-value pair."""
         if len(samples) == 1:
             return next(iter(samples.values()))  # Extract and return the only item's value
         return samples

diff --git a/bayesflow/links/__init__.py b/bayesflow/links/__init__.py
@@ -2,7 +2,7 @@
 
 from .ordered import Ordered
 from .ordered_quantiles import OrderedQuantiles
-from .positive_semi_definite import PositiveSemiDefinite
+from .positive_definite import PositiveDefinite
 
 from ..utils._docs import _add_imports_to_all
 

diff --git a/bayesflow/links/positive_definite.py b/bayesflow/links/positive_definite.py
@@ -0,0 +1,46 @@
+import keras
+
+from keras.saving import register_keras_serializable as serializable
+
+from bayesflow.types import Tensor
+from bayesflow.utils import keras_kwargs, fill_triangular_matrix
+
+
+@serializable(package="bayesflow.links")
+class PositiveDefinite(keras.Layer):
+    """Activation function to link from flat elements of a lower triangular matrix to a positive definite matrix."""
+
+    def __init__(self, **kwargs):
+        super().__init__(**keras_kwargs(kwargs))
+        self.built = True
+
+    def call(self, inputs: Tensor) -> Tensor:
+        # Build cholesky factor from inputs
+        L = fill_triangular_matrix(inputs, positive_diag=True)
+
+        # calculate positive definite matrix from cholesky factors
+        psd = keras.ops.matmul(
+            L,
+            keras.ops.moveaxis(L, -2, -1),  # L transposed
+        )
+        return psd
+
+    def compute_output_shape(self, input_shape):
+        m = input_shape[-1]
+        n = int((0.25 + 2.0 * m) ** 0.5 - 0.5)
+        return input_shape[:-1] + (n, n)
+
+    def compute_input_shape(self, output_shape):
+        """
+        Returns the shape of parameterization of a cholesky factor triangular matrix.
+
+        There are m nonzero elements of a lower triangular nxn matrix with m = n * (n + 1) / 2.
+
+        Example
+        -------
+        >>> PositiveDefinite().compute_output_shape((None, 3, 3))
+        6
+        """
+        n = output_shape[-1]
+        m = int(n * (n + 1) / 2)
+        return output_shape[:-2] + (m,)
diff --git a/bayesflow/links/positive_semi_definite.py b/bayesflow/links/positive_semi_definite.py
diff --git a/bayesflow/networks/point_inference_network.py b/bayesflow/networks/point_inference_network.py
@@ -132,7 +132,9 @@ def call(
         if xz is None and not self.built:
             raise ValueError("Cannot build inference network without inference variables.")
         if conditions is None:  # unconditional estimation uses a fixed input vector
-            conditions = keras.ops.convert_to_tensor([[1.0]], dtype=keras.ops.dtype(xz))
+            conditions = keras.ops.convert_to_tensor(
+                [[1.0]], dtype=keras.ops.dtype(xz) if xz is not None else "float32"
+            )
 
         # pass conditions to the shared subnet
         output = self.subnet(conditions, training=training)
@@ -165,7 +167,6 @@ def compute_metrics(
 
         return metrics | {"loss": neg_score}
 
-    # WIP: untested draft of sample method
     @allow_batch_size
     def sample(self, batch_shape: Shape, conditions: Tensor = None) -> dict[str, Tensor]:
         """
@@ -199,7 +200,6 @@ def sample(self, batch_shape: Shape, conditions: Tensor = None) -> dict[str, Ten
 
         return samples
 
-    # WIP: untested draft of log_prob method
     def log_prob(self, samples: Tensor, conditions: Tensor = None, **kwargs) -> dict[str, Tensor]:
         output = self.subnet(conditions)
         log_probs = {}

diff --git a/bayesflow/scores/multivariate_normal_score.py b/bayesflow/scores/multivariate_normal_score.py
@@ -4,8 +4,7 @@
 from keras.saving import register_keras_serializable as serializable
 
 from bayesflow.types import Shape, Tensor
-from bayesflow.links import PositiveSemiDefinite
-from bayesflow.utils import logging
+from bayesflow.links import PositiveDefinite
 
 from .parametric_distribution_score import ParametricDistributionScore
 
@@ -17,14 +16,23 @@ class MultivariateNormalScore(ParametricDistributionScore):
     Scores a predicted mean and covariance matrix with the log-score of the probability of the materialized value.
     """
 
+    NOT_TRANSFORMING_LIKE_VECTOR_WARNING = ("covariance",)
+    """
+    Marks head for covariance matrix as an exception for adapter transformations.
+
+    This variable contains names of prediction heads that should lead to a warning when the adapter is applied
+    in inverse direction to them.
+
+    For more information see :class:`ScoringRule`.
+    """
+
     def __init__(self, dim: int = None, links: dict = None, **kwargs):
         super().__init__(links=links, **kwargs)
 
         self.dim = dim
-        self.links = links or {"covariance": PositiveSemiDefinite()}
-        self.config = {"dim": dim}
+        self.links = links or {"covariance": PositiveDefinite()}
 
-        logging.warning("MultivariateNormalScore is unstable.")
+        self.config = {"dim": dim}
 
     def get_config(self):
         base_config = super().get_config()
@@ -60,12 +68,12 @@ def log_prob(self, x: Tensor, mean: Tensor, covariance: Tensor) -> Tensor:
             A tensor containing the log probability densities for each sample in `x` under the
             given Gaussian distribution.
         """
-        diff = x[:, None, :] - mean
-        inv_covariance = keras.ops.inv(covariance)
+        diff = x - mean
+        precision = keras.ops.inv(covariance)
         log_det_covariance = keras.ops.slogdet(covariance)[1]  # Only take the log of the determinant part
 
         # Compute the quadratic term in the exponential of the multivariate Gaussian
-        quadratic_term = keras.ops.einsum("...i,...ij,...j->...", diff, inv_covariance, diff)
+        quadratic_term = keras.ops.einsum("...i,...ij,...j->...", diff, precision, diff)
 
         # Compute the log probability density
         log_prob = -0.5 * (self.dim * keras.ops.log(2 * math.pi) + log_det_covariance + quadratic_term)
@@ -97,6 +105,8 @@ def sample(self, batch_shape: Shape, mean: Tensor, covariance: Tensor) -> Tensor
         Tensor
             A tensor of shape (batch_size, num_samples, D) containing the generated samples.
         """
+        if len(batch_shape) == 1:
+            batch_shape = (1,) + tuple(batch_shape)
         batch_size, num_samples = batch_shape
         dim = keras.ops.shape(mean)[-1]
         if keras.ops.shape(mean) != (batch_size, dim):

diff --git a/bayesflow/scores/parametric_distribution_score.py b/bayesflow/scores/parametric_distribution_score.py
@@ -51,5 +51,4 @@ def score(self, estimates: dict[str, Tensor], targets: Tensor, weights: Tensor =
         """
         scores = -self.log_prob(x=targets, **estimates)
         score = self.aggregate(scores, weights)
-        # multipy to mitigate instability due to relatively high values of parametric score
-        return score * 0.01
+        return score