Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions bayesflow/approximators/continuous_approximator.py
Original file line number Diff line number Diff line change
Expand Up @@ -338,7 +338,7 @@ def _sample(
**filter_kwargs(kwargs, self.inference_network.sample),
)

def log_prob(self, data: dict[str, np.ndarray], **kwargs) -> np.ndarray:
def log_prob(self, data: dict[str, np.ndarray], **kwargs) -> np.ndarray | dict[str, np.ndarray]:
"""
Computes the log-probability of given data under the model. The `data` dictionary is preprocessed using the
`adapter`. Log-probabilities are returned as NumPy arrays.
Expand All @@ -358,7 +358,7 @@ def log_prob(self, data: dict[str, np.ndarray], **kwargs) -> np.ndarray:
data = self.adapter(data, strict=False, stage="inference", **kwargs)
data = keras.tree.map_structure(keras.ops.convert_to_tensor, data)
log_prob = self._log_prob(**data, **kwargs)
log_prob = keras.ops.convert_to_numpy(log_prob)
log_prob = keras.tree.map_structure(keras.ops.convert_to_numpy, log_prob)

return log_prob

Expand All @@ -368,7 +368,7 @@ def _log_prob(
inference_conditions: Tensor = None,
summary_variables: Tensor = None,
**kwargs,
) -> Tensor:
) -> Tensor | dict[str, Tensor]:
if self.summary_network is None:
if summary_variables is not None:
raise ValueError("Cannot use summary variables without a summary network.")
Expand Down
61 changes: 53 additions & 8 deletions bayesflow/approximators/point_approximator.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
)

from bayesflow.types import Tensor
from bayesflow.utils import filter_kwargs, split_arrays, squeeze_inner_estimates_dict
from bayesflow.utils import filter_kwargs, split_arrays, squeeze_inner_estimates_dict, logging
from .continuous_approximator import ContinuousApproximator


Expand All @@ -14,8 +14,9 @@ class PointApproximator(ContinuousApproximator):
"""
A workflow for fast amortized point estimation of a conditional distribution.

The distribution is approximated by point estimators, parameterized by a feed-forward `PointInferenceNetwork`.
Conditions can be compressed by an optional `SummaryNetwork` or used directly as input to the inference network.
The distribution is approximated by point estimators, parameterized by a feed-forward
:class:`bayesflow.networks.PointInferenceNetwork`. Conditions can be compressed by an optional summary network
(inheriting from :class:`bayesflow.networks.SummaryNetwork`) or used directly as input to the inference network.
"""

def estimate(
Expand Down Expand Up @@ -89,7 +90,7 @@ def sample(
for the sampling process.
split : bool, optional
If True, the sampled arrays are split along the last axis, by default False.
Currently not supported for `PointApproximator`.
Currently not supported for :class:`PointApproximator` .
**kwargs
Additional keyword arguments passed to underlying processing functions.

Expand All @@ -111,14 +112,50 @@ def sample(
if split:
raise NotImplementedError("split=True is currently not supported for `PointApproximator`.")
samples = split_arrays(samples, axis=-1)
# Squeeze samples if there's only one key-value pair.
samples = self._squeeze_samples(samples)
# Squeeze sample dictionary if there's only one key-value pair.
samples = self._squeeze_parametric_score_major_dict(samples)

return samples

def log_prob(
self,
*,
data: dict[str, np.ndarray],
**kwargs,
) -> np.ndarray | dict[str, np.ndarray]:
"""
Computes the log-probability of given data under the parametric distribution(s) for given input conditions.

Parameters
----------
data : dict[str, np.ndarray]
A dictionary mapping variable names to arrays representing the inference conditions and variables.
**kwargs
Additional keyword arguments passed to underlying processing functions.

Returns
-------
log_prob : np.ndarray or dict[str, np.ndarray]
Log-probabilities of the distribution
`p(inference_variables | inference_conditions, h(summary_conditions))` for all parametric scoring rules.

If only one parametric score is available, output is an array of log-probabilities.

Output is a dictionary if multiple parametric scores are available.
Then, each key is the name of a score and values are corresponding log-probabilities.

Log-probabilities have shape (num_datasets,).
"""
log_prob = super().log_prob(data=data, **kwargs)
# Squeeze log probabilities dictionary if there's only one key-value pair.
log_prob = self._squeeze_parametric_score_major_dict(log_prob)

return log_prob

def _prepare_conditions(self, conditions: dict[str, np.ndarray], **kwargs) -> dict[str, Tensor]:
"""Adapts and converts the conditions to tensors."""
conditions = self.adapter(conditions, strict=False, stage="inference", **kwargs)
conditions.pop("inference_variables", None)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We could add this function to the ContinuousApproximator, if it is identical between it and the Point Approximator

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes! This and similar refactoring of the ContinuousApproximator is a good idea (but I would keep them out of this PR).
There is also the option of moving the conversion to tensor into the adapter. Possibly with an optional bool flag convert_to_tensor that is by default False.

return keras.tree.map_structure(keras.ops.convert_to_tensor, conditions)

def _apply_inverse_adapter_to_estimates(
Expand All @@ -130,6 +167,12 @@ def _apply_inverse_adapter_to_estimates(
for score_key, score_val in estimates.items():
processed[score_key] = {}
for head_key, estimate in score_val.items():
if head_key in self.inference_network.scores[score_key].NOT_TRANSFORMING_LIKE_VECTOR_WARNING:
logging.warning(
f"Estimate '{score_key}.{head_key}' is marked to not transform like a vector. "
f"It was treated like a vector by the adapter. Handle '{head_key}' estimates with care."
)

adapted = self.adapter(
{"inference_variables": estimate},
inverse=True,
Expand Down Expand Up @@ -180,8 +223,10 @@ def _squeeze_estimates(
}
return squeezed

def _squeeze_samples(self, samples: dict[str, np.ndarray]) -> np.ndarray or dict[str, np.ndarray]:
"""Squeezes the samples dictionary to just the value if there is only one key-value pair."""
def _squeeze_parametric_score_major_dict(
self, samples: dict[str, np.ndarray]
) -> np.ndarray or dict[str, np.ndarray]:
"""Squeezes the dictionary to just the value if there is only one key-value pair."""
if len(samples) == 1:
return next(iter(samples.values())) # Extract and return the only item's value
return samples
Expand Down
2 changes: 1 addition & 1 deletion bayesflow/links/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from .ordered import Ordered
from .ordered_quantiles import OrderedQuantiles
from .positive_semi_definite import PositiveSemiDefinite
from .positive_definite import PositiveDefinite

from ..utils._docs import _add_imports_to_all

Expand Down
46 changes: 46 additions & 0 deletions bayesflow/links/positive_definite.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import keras

from keras.saving import register_keras_serializable as serializable

from bayesflow.types import Tensor
from bayesflow.utils import keras_kwargs, fill_triangular_matrix


@serializable(package="bayesflow.links")
class PositiveDefinite(keras.Layer):
"""Activation function to link from flat elements of a lower triangular matrix to a positive definite matrix."""

def __init__(self, **kwargs):
super().__init__(**keras_kwargs(kwargs))
self.built = True

def call(self, inputs: Tensor) -> Tensor:
# Build cholesky factor from inputs
L = fill_triangular_matrix(inputs, positive_diag=True)

# calculate positive definite matrix from cholesky factors
psd = keras.ops.matmul(
L,
keras.ops.moveaxis(L, -2, -1), # L transposed
)
return psd

def compute_output_shape(self, input_shape):
m = input_shape[-1]
n = int((0.25 + 2.0 * m) ** 0.5 - 0.5)
return input_shape[:-1] + (n, n)

def compute_input_shape(self, output_shape):
"""
Returns the shape of parameterization of a cholesky factor triangular matrix.

There are m nonzero elements of a lower triangular nxn matrix with m = n * (n + 1) / 2.

Example
-------
>>> PositiveDefinite().compute_output_shape((None, 3, 3))
6
"""
n = output_shape[-1]
m = int(n * (n + 1) / 2)
return output_shape[:-2] + (m,)
20 changes: 0 additions & 20 deletions bayesflow/links/positive_semi_definite.py

This file was deleted.

6 changes: 3 additions & 3 deletions bayesflow/networks/point_inference_network.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,9 @@ def call(
if xz is None and not self.built:
raise ValueError("Cannot build inference network without inference variables.")
if conditions is None: # unconditional estimation uses a fixed input vector
conditions = keras.ops.convert_to_tensor([[1.0]], dtype=keras.ops.dtype(xz))
conditions = keras.ops.convert_to_tensor(
[[1.0]], dtype=keras.ops.dtype(xz) if xz is not None else "float32"
)

# pass conditions to the shared subnet
output = self.subnet(conditions, training=training)
Expand Down Expand Up @@ -165,7 +167,6 @@ def compute_metrics(

return metrics | {"loss": neg_score}

# WIP: untested draft of sample method
@allow_batch_size
def sample(self, batch_shape: Shape, conditions: Tensor = None) -> dict[str, Tensor]:
"""
Expand Down Expand Up @@ -199,7 +200,6 @@ def sample(self, batch_shape: Shape, conditions: Tensor = None) -> dict[str, Ten

return samples

# WIP: untested draft of log_prob method
def log_prob(self, samples: Tensor, conditions: Tensor = None, **kwargs) -> dict[str, Tensor]:
output = self.subnet(conditions)
log_probs = {}
Expand Down
26 changes: 18 additions & 8 deletions bayesflow/scores/multivariate_normal_score.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,7 @@
from keras.saving import register_keras_serializable as serializable

from bayesflow.types import Shape, Tensor
from bayesflow.links import PositiveSemiDefinite
from bayesflow.utils import logging
from bayesflow.links import PositiveDefinite

from .parametric_distribution_score import ParametricDistributionScore

Expand All @@ -17,14 +16,23 @@ class MultivariateNormalScore(ParametricDistributionScore):
Scores a predicted mean and covariance matrix with the log-score of the probability of the materialized value.
"""

NOT_TRANSFORMING_LIKE_VECTOR_WARNING = ("covariance",)
"""
Marks head for covariance matrix as an exception for adapter transformations.

This variable contains names of prediction heads that should lead to a warning when the adapter is applied
in inverse direction to them.

For more information see :class:`ScoringRule`.
"""

def __init__(self, dim: int = None, links: dict = None, **kwargs):
super().__init__(links=links, **kwargs)

self.dim = dim
self.links = links or {"covariance": PositiveSemiDefinite()}
self.config = {"dim": dim}
self.links = links or {"covariance": PositiveDefinite()}

logging.warning("MultivariateNormalScore is unstable.")
self.config = {"dim": dim}

def get_config(self):
base_config = super().get_config()
Expand Down Expand Up @@ -60,12 +68,12 @@ def log_prob(self, x: Tensor, mean: Tensor, covariance: Tensor) -> Tensor:
A tensor containing the log probability densities for each sample in `x` under the
given Gaussian distribution.
"""
diff = x[:, None, :] - mean
inv_covariance = keras.ops.inv(covariance)
diff = x - mean
precision = keras.ops.inv(covariance)
log_det_covariance = keras.ops.slogdet(covariance)[1] # Only take the log of the determinant part

# Compute the quadratic term in the exponential of the multivariate Gaussian
quadratic_term = keras.ops.einsum("...i,...ij,...j->...", diff, inv_covariance, diff)
quadratic_term = keras.ops.einsum("...i,...ij,...j->...", diff, precision, diff)

# Compute the log probability density
log_prob = -0.5 * (self.dim * keras.ops.log(2 * math.pi) + log_det_covariance + quadratic_term)
Expand Down Expand Up @@ -97,6 +105,8 @@ def sample(self, batch_shape: Shape, mean: Tensor, covariance: Tensor) -> Tensor
Tensor
A tensor of shape (batch_size, num_samples, D) containing the generated samples.
"""
if len(batch_shape) == 1:
batch_shape = (1,) + tuple(batch_shape)
batch_size, num_samples = batch_shape
dim = keras.ops.shape(mean)[-1]
if keras.ops.shape(mean) != (batch_size, dim):
Expand Down
3 changes: 1 addition & 2 deletions bayesflow/scores/parametric_distribution_score.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,5 +51,4 @@ def score(self, estimates: dict[str, Tensor], targets: Tensor, weights: Tensor =
"""
scores = -self.log_prob(x=targets, **estimates)
score = self.aggregate(scores, weights)
# multipy to mitigate instability due to relatively high values of parametric score
return score * 0.01
return score
Loading
Loading