From 34c7f2a241ee860f8bd0dc0f2db46163bec05cf3 Mon Sep 17 00:00:00 2001 From: han-ol Date: Thu, 27 Mar 2025 16:31:12 +0100 Subject: [PATCH 01/20] Better parameterization of covariance matrices --- bayesflow/approximators/point_approximator.py | 9 ++- bayesflow/links/__init__.py | 2 +- bayesflow/links/positive_definite.py | 52 +++++++++++++ bayesflow/links/positive_semi_definite.py | 20 ----- bayesflow/scores/multivariate_normal_score.py | 14 ++-- .../scores/parametric_distribution_score.py | 3 +- bayesflow/scores/scoring_rule.py | 21 +++-- bayesflow/utils/__init__.py | 1 + bayesflow/utils/tensor_utils.py | 77 +++++++++++++++++++ tests/test_links/conftest.py | 16 ++-- tests/test_links/test_links.py | 22 ++---- tests/test_scores/test_scores.py | 16 ++-- 12 files changed, 192 insertions(+), 61 deletions(-) create mode 100644 bayesflow/links/positive_definite.py delete mode 100644 bayesflow/links/positive_semi_definite.py diff --git a/bayesflow/approximators/point_approximator.py b/bayesflow/approximators/point_approximator.py index 836dd060c..6fb5bdd14 100644 --- a/bayesflow/approximators/point_approximator.py +++ b/bayesflow/approximators/point_approximator.py @@ -5,7 +5,7 @@ ) from bayesflow.types import Tensor -from bayesflow.utils import filter_kwargs, split_arrays, squeeze_inner_estimates_dict +from bayesflow.utils import filter_kwargs, split_arrays, squeeze_inner_estimates_dict, logging from .continuous_approximator import ContinuousApproximator @@ -119,6 +119,7 @@ def sample( def _prepare_conditions(self, conditions: dict[str, np.ndarray], **kwargs) -> dict[str, Tensor]: """Adapts and converts the conditions to tensors.""" conditions = self.adapter(conditions, strict=False, stage="inference", **kwargs) + conditions.pop("inference_variables", None) return keras.tree.map_structure(keras.ops.convert_to_tensor, conditions) def _apply_inverse_adapter_to_estimates( @@ -130,6 +131,12 @@ def _apply_inverse_adapter_to_estimates( for score_key, score_val in estimates.items(): processed[score_key] = {} for head_key, estimate in score_val.items(): + if head_key in self.inference_network.scores[score_key].not_transforming_like_vector: + logging.warning( + f"Estimate '{score_key}.{head_key}' is marked to not transform like a vector. " + "It was treated like a vector by the adapter. Handle '{head_key}' estimates with care." + ) + adapted = self.adapter( {"inference_variables": estimate}, inverse=True, diff --git a/bayesflow/links/__init__.py b/bayesflow/links/__init__.py index a32fd6c21..77913f52b 100644 --- a/bayesflow/links/__init__.py +++ b/bayesflow/links/__init__.py @@ -2,7 +2,7 @@ from .ordered import Ordered from .ordered_quantiles import OrderedQuantiles -from .positive_semi_definite import PositiveSemiDefinite +from .positive_definite import PositiveDefinite from ..utils._docs import _add_imports_to_all diff --git a/bayesflow/links/positive_definite.py b/bayesflow/links/positive_definite.py new file mode 100644 index 000000000..d676ac665 --- /dev/null +++ b/bayesflow/links/positive_definite.py @@ -0,0 +1,52 @@ +import keras + +# import numpy as np +from keras.saving import register_keras_serializable as serializable + +from bayesflow.types import Tensor +from bayesflow.utils import keras_kwargs, fill_triangular_matrix + + +@serializable(package="bayesflow.links") +class PositiveDefinite(keras.Layer): + """Activation function to link from flat elements of a lower triangular matrix to a positive definite matrix.""" + + def __init__(self, **kwargs): + super().__init__(**keras_kwargs(kwargs)) + self.built = True + + def call(self, inputs: Tensor) -> Tensor: + # Build cholesky factor from inputs + L = fill_triangular_matrix(inputs, positive_diag=True) + + # diagonal_mask = keras.ops.identity(L.shape[-1]) > 0 + # L[..., diagonal_mask] = keras.activations.softplus(L[..., diagonal_mask]) + # L += keras.ops.identity(L.shape[-1]) * 2 + # L *= keras.ops.sign(keras.ops.diagonal(L, axis1=-1))[..., None] # ensure positive diagonal entries + + # calculate positive definite matrix from cholesky factors + psd = keras.ops.matmul( + L, + keras.ops.moveaxis(L, -2, -1), # L transposed + ) + return psd + + def compute_output_shape(self, input_shape): + m = input_shape[-1] + n = int((0.25 + 2.0 * m) ** 0.5 - 0.5) + return input_shape[:-1] + (n, n) + + def compute_input_shape(self, output_shape): + """ + Returns the shape of parameterization of a cholesky factor triangular matrix. + + There are m nonzero elements of a lower triangular nxn matrix with m = n * (n + 1) / 2. + + Example + ------- + >>> PositiveDefinite().compute_output_shape((None, 3, 3)) + 6 + """ + n = output_shape[-1] + m = int(n * (n + 1) / 2) + return output_shape[:-2] + (m,) diff --git a/bayesflow/links/positive_semi_definite.py b/bayesflow/links/positive_semi_definite.py deleted file mode 100644 index a056fc3c3..000000000 --- a/bayesflow/links/positive_semi_definite.py +++ /dev/null @@ -1,20 +0,0 @@ -import keras -from keras.saving import register_keras_serializable as serializable - -from bayesflow.types import Tensor -from bayesflow.utils import keras_kwargs - - -@serializable(package="bayesflow.links") -class PositiveSemiDefinite(keras.Layer): - """Activation function to link from any square matrix to a positive semidefinite matrix.""" - - def __init__(self, **kwargs): - super().__init__(**keras_kwargs(kwargs)) - - def call(self, inputs: Tensor) -> Tensor: - # multiply M * M^T to get symmetric matrix - return keras.ops.einsum("...ij,...kj->...ik", inputs, inputs) - - def compute_output_shape(self, input_shape): - return input_shape diff --git a/bayesflow/scores/multivariate_normal_score.py b/bayesflow/scores/multivariate_normal_score.py index 66153fd34..efe560388 100644 --- a/bayesflow/scores/multivariate_normal_score.py +++ b/bayesflow/scores/multivariate_normal_score.py @@ -4,7 +4,7 @@ from keras.saving import register_keras_serializable as serializable from bayesflow.types import Shape, Tensor -from bayesflow.links import PositiveSemiDefinite +from bayesflow.links import PositiveDefinite from bayesflow.utils import logging from .parametric_distribution_score import ParametricDistributionScore @@ -21,7 +21,11 @@ def __init__(self, dim: int = None, links: dict = None, **kwargs): super().__init__(links=links, **kwargs) self.dim = dim - self.links = links or {"covariance": PositiveSemiDefinite()} + self.links = links or {"covariance": PositiveDefinite()} + + # mark head for covariance matrix as an exception for adapter transformations + self.not_transforming_like_vector = ["covariance"] + self.config = {"dim": dim} logging.warning("MultivariateNormalScore is unstable.") @@ -60,12 +64,12 @@ def log_prob(self, x: Tensor, mean: Tensor, covariance: Tensor) -> Tensor: A tensor containing the log probability densities for each sample in `x` under the given Gaussian distribution. """ - diff = x[:, None, :] - mean - inv_covariance = keras.ops.inv(covariance) + diff = x - mean + precision = keras.ops.inv(covariance) log_det_covariance = keras.ops.slogdet(covariance)[1] # Only take the log of the determinant part # Compute the quadratic term in the exponential of the multivariate Gaussian - quadratic_term = keras.ops.einsum("...i,...ij,...j->...", diff, inv_covariance, diff) + quadratic_term = keras.ops.einsum("...i,...ij,...j->...", diff, precision, diff) # Compute the log probability density log_prob = -0.5 * (self.dim * keras.ops.log(2 * math.pi) + log_det_covariance + quadratic_term) diff --git a/bayesflow/scores/parametric_distribution_score.py b/bayesflow/scores/parametric_distribution_score.py index 51cef1776..17806ef16 100644 --- a/bayesflow/scores/parametric_distribution_score.py +++ b/bayesflow/scores/parametric_distribution_score.py @@ -51,5 +51,4 @@ def score(self, estimates: dict[str, Tensor], targets: Tensor, weights: Tensor = """ scores = -self.log_prob(x=targets, **estimates) score = self.aggregate(scores, weights) - # multipy to mitigate instability due to relatively high values of parametric score - return score * 0.01 + return score diff --git a/bayesflow/scores/scoring_rule.py b/bayesflow/scores/scoring_rule.py index ef0645cc1..b16d14ac3 100644 --- a/bayesflow/scores/scoring_rule.py +++ b/bayesflow/scores/scoring_rule.py @@ -29,6 +29,8 @@ def __init__( self.subnets_kwargs = subnets_kwargs or {} self.links = links or {} + self.not_transforming_like_vector = [] + self.config = {"subnets_kwargs": self.subnets_kwargs} def get_config(self): @@ -95,14 +97,14 @@ def get_link(self, key: str) -> keras.Layer: else: return self.links[key] - def get_head(self, key: str, shape: Shape) -> keras.Sequential: + def get_head(self, key: str, output_shape: Shape) -> keras.Sequential: """For a specified head key and shape, request corresponding head network. Parameters ---------- key : str Name of head for which to request a link. - shape: Shape + output_shape: Shape The necessary shape for the point estimators. Returns @@ -111,10 +113,19 @@ def get_head(self, key: str, shape: Shape) -> keras.Sequential: Head network consisting of a learnable projection, a reshape and a link operation to parameterize estimates. """ - subnet = self.get_subnet(key) - dense = keras.layers.Dense(units=math.prod(shape)) - reshape = keras.layers.Reshape(target_shape=shape) + # initialize head components back to front link = self.get_link(key) + + # link input shape can differ from output shape + if hasattr(link, "compute_input_shape"): + link_input_shape = link.compute_input_shape(output_shape) + else: + link_input_shape = output_shape + + reshape = keras.layers.Reshape(target_shape=link_input_shape) + dense = keras.layers.Dense(units=math.prod(link_input_shape)) + subnet = self.get_subnet(key) + return keras.Sequential([subnet, dense, reshape, link]) def score(self, estimates: dict[str, Tensor], targets: Tensor, weights: Tensor) -> Tensor: diff --git a/bayesflow/utils/__init__.py b/bayesflow/utils/__init__.py index ecb546eae..1eeb2d354 100644 --- a/bayesflow/utils/__init__.py +++ b/bayesflow/utils/__init__.py @@ -66,6 +66,7 @@ tile_axis, tree_concatenate, tree_stack, + fill_triangular_matrix, ) from .validators import check_lengths_same from .workflow_utils import find_inference_network, find_summary_network diff --git a/bayesflow/utils/tensor_utils.py b/bayesflow/utils/tensor_utils.py index b65df49a7..9287dd982 100644 --- a/bayesflow/utils/tensor_utils.py +++ b/bayesflow/utils/tensor_utils.py @@ -277,3 +277,80 @@ def stack(*items): return keras.ops.stack(items, axis=axis) return keras.tree.map_structure(stack, *structures) + + +def fill_triangular_matrix(x: Tensor, upper: bool = False, positive_diag: bool = False): + """ + Reshapes a batch of matrix entries into a triangular matrix (either upper or lower). + + Note: If final axis has length 1, this simply reshapes to (batch_size, 1, 1) and optionally applies softplus. + + Parameters + ---------- + x : Tensor of shape (batch_size, m) + Batch of flattened nonzero matrix elements for triangular matrix. + upper : bool + Return upper triangular matrix if True, else lower triangular matrix. Default is False. + positive_diag : bool + Whether to apply a softplus operation to diagonal elements. Default is False. + + Returns + ------- + Tensor of shape (batch_size, n, n) + Batch of triangular matrices with m = n * (n + 1) / 2 unique nonzero elements. + + Raises + ------ + ValueError + If provided nonzero elements do not correspond to possible triangular matrix shape + (n,n) with n = sqrt( 1/4 + 2 * m) - 1/2 due to m = n * (n + 1) / 2. + """ + batch_shape = x.shape[:-1] + m = x.shape[-1] + + if m == 1: + y = keras.ops.reshape(x, (-1, 1, 1)) + if positive_diag: + y = keras.activations.softplus(y) + return y + + # Calculate matrix shape + n = (0.25 + 2 * m) ** 0.5 - 0.5 + if not np.isclose(np.floor(n), n): + raise ValueError(f"Input right-most shape ({m}) does not correspond to a triangular matrix.") + else: + n = int(n) + + # Trick: Create triangular matrix by concatenating with a flipped version of its tail, then reshape. + x_tail = keras.ops.take(x, indices=list(range((m - (n**2 - m)), x.shape[-1])), axis=-1) + if not upper: + y = keras.ops.concatenate([x_tail, keras.ops.flip(x, axis=-1)], axis=len(batch_shape)) + y = keras.ops.reshape(y, (-1, n, n)) + y = keras.ops.tril(y) # TODO: fails with tensorflow + + if positive_diag: + y_offdiag = keras.ops.tril(y, k=-1) + y_diag = keras.ops.tril( + keras.ops.triu( # carve out diagonal, by setting upper and lower offdiagonals to zero + keras.activations.softplus(y) + ), # apply softplus to enforce positivity + ) + y = y_diag + y_offdiag + + else: + y = keras.ops.concatenate([x, keras.ops.flip(x_tail, axis=-1)], axis=len(batch_shape)) + y = keras.ops.reshape(y, (-1, n, n)) + y = keras.ops.triu( + y, + ) + + if positive_diag: + y_offdiag = keras.ops.triu(y, k=1) + y_diag = keras.ops.tril( + keras.ops.triu( # carve out diagonal, by setting upper and lower offdiagonals to zero + keras.activations.softplus(y) + ), # apply softplus to enforce positivity + ) + y = y_diag + y_offdiag + + return y diff --git a/tests/test_links/conftest.py b/tests/test_links/conftest.py index 8beb0bece..53e9eeac8 100644 --- a/tests/test_links/conftest.py +++ b/tests/test_links/conftest.py @@ -15,7 +15,7 @@ def num_variables(): @pytest.fixture() def generic_preactivation(batch_size): - return keras.ops.ones((batch_size, 4, 4)) + return keras.ops.ones((batch_size, 6)) @pytest.fixture() @@ -33,10 +33,10 @@ def ordered_quantiles(): @pytest.fixture() -def positive_semi_definite(): - from bayesflow.links import PositiveSemiDefinite +def positive_definite(): + from bayesflow.links import PositiveDefinite - return PositiveSemiDefinite() + return PositiveDefinite() @pytest.fixture() @@ -44,7 +44,7 @@ def linear(): return keras.layers.Activation("linear") -@pytest.fixture(params=["ordered", "ordered_quantiles", "positive_semi_definite", "linear"], scope="function") +@pytest.fixture(params=["ordered", "ordered_quantiles", "positive_definite", "linear"], scope="function") def link(request): return request.getfixturevalue(request.param) @@ -84,6 +84,6 @@ def unordered(batch_size, num_quantiles, num_variables): return keras.random.normal((batch_size, num_quantiles, num_variables)) -@pytest.fixture() -def random_matrix_batch(batch_size, num_variables): - return keras.random.normal((batch_size, num_variables, num_variables)) +# @pytest.fixture() +# def random_matrix_batch(batch_size, num_variables): +# return keras.random.normal((batch_size, num_variables, num_variables)) diff --git a/tests/test_links/test_links.py b/tests/test_links/test_links.py index b0ea22242..aed79b988 100644 --- a/tests/test_links/test_links.py +++ b/tests/test_links/test_links.py @@ -3,13 +3,6 @@ import pytest -def test_link_output(link, generic_preactivation): - output_shape = link.compute_output_shape(generic_preactivation.shape) - output = link(generic_preactivation) - - assert output_shape == output.shape - - def test_invalid_shape_for_ordered_quantiles(ordered_quantiles, batch_size, num_quantiles, num_variables): with pytest.raises(AssertionError) as excinfo: ordered_quantiles.build((batch_size, batch_size, num_quantiles, num_variables)) @@ -59,16 +52,17 @@ def test_quantile_ordering(quantiles, unordered): check_ordering(output, axis) -def test_positive_semi_definite(random_matrix_batch): - from bayesflow.links import PositiveSemiDefinite - - activation = PositiveSemiDefinite() - - output = activation(random_matrix_batch) +def test_positive_definite(positive_definite, batch_size, num_variables): + psd = positive_definite + input_shape = psd.compute_input_shape((batch_size, num_variables, num_variables)) + print(input_shape) + random_preactivation = keras.random.normal(input_shape, seed=12) + output = psd(random_preactivation) output = keras.ops.convert_to_numpy(output) eigenvalues = np.linalg.eig(output).eigenvalues assert np.all(eigenvalues.real > 0) and np.all(np.isclose(eigenvalues.imag, 0)), ( - f"output is not positive semi-definite: real={eigenvalues.real}, imag={eigenvalues.imag}" + f"output is not positive definite: min(real)={np.min(eigenvalues.real)}, " + f"max(abs(imag))={np.max(np.abs(eigenvalues.imag))}" ) diff --git a/tests/test_scores/test_scores.py b/tests/test_scores/test_scores.py index 24765688a..73305961d 100644 --- a/tests/test_scores/test_scores.py +++ b/tests/test_scores/test_scores.py @@ -13,15 +13,21 @@ def test_require_argument_k(): def test_score_output(scoring_rule, random_conditions): if random_conditions is None: - random_conditions = keras.ops.convert_to_tensor([[1.0]]) + random_conditions = keras.ops.convert_to_tensor([[1.0, 1.0]]) # Using random random_conditions also as targets for the purpose of this test. head_shapes = scoring_rule.get_head_shapes_from_target_shape(random_conditions.shape) print(scoring_rule.get_config()) - estimates = { - k: scoring_rule.get_link(k)(keras.random.normal((random_conditions.shape[0],) + head_shape)) - for k, head_shape in head_shapes.items() - } + estimates = {} + for key, output_shape in head_shapes.items(): + link = scoring_rule.get_link(key) + if hasattr(link, "compute_input_shape"): + link_input_shape = link.compute_input_shape(output_shape) + else: + link_input_shape = output_shape + dummy_input = keras.random.normal((random_conditions.shape[0],) + link_input_shape) + estimates[key] = link(dummy_input) + score = scoring_rule.score(estimates, random_conditions) assert score.ndim == 0 From 84ed002a24861e20f8d40fe464f9f4de7566cd69 Mon Sep 17 00:00:00 2001 From: han-ol Date: Thu, 27 Mar 2025 16:44:53 +0100 Subject: [PATCH 02/20] Fix format string --- bayesflow/approximators/point_approximator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bayesflow/approximators/point_approximator.py b/bayesflow/approximators/point_approximator.py index 6fb5bdd14..0eeb84112 100644 --- a/bayesflow/approximators/point_approximator.py +++ b/bayesflow/approximators/point_approximator.py @@ -134,7 +134,7 @@ def _apply_inverse_adapter_to_estimates( if head_key in self.inference_network.scores[score_key].not_transforming_like_vector: logging.warning( f"Estimate '{score_key}.{head_key}' is marked to not transform like a vector. " - "It was treated like a vector by the adapter. Handle '{head_key}' estimates with care." + f"It was treated like a vector by the adapter. Handle '{head_key}' estimates with care." ) adapted = self.adapter( From fbc01f573431d6104e6029d0393edd81db43ef90 Mon Sep 17 00:00:00 2001 From: han-ol Date: Fri, 28 Mar 2025 16:45:25 +0100 Subject: [PATCH 03/20] Test for invertibility of positive definite link output --- tests/test_links/test_links.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/tests/test_links/test_links.py b/tests/test_links/test_links.py index aed79b988..411ed2cc2 100644 --- a/tests/test_links/test_links.py +++ b/tests/test_links/test_links.py @@ -53,12 +53,16 @@ def test_quantile_ordering(quantiles, unordered): def test_positive_definite(positive_definite, batch_size, num_variables): - psd = positive_definite - input_shape = psd.compute_input_shape((batch_size, num_variables, num_variables)) - print(input_shape) - random_preactivation = keras.random.normal(input_shape, seed=12) - output = psd(random_preactivation) + input_shape = positive_definite.compute_input_shape((batch_size, num_variables, num_variables)) + # Too strongly negative values lead to numerical instabilities -> reduce scale + random_preactivation = keras.random.normal(input_shape) * 0.1 + output = positive_definite(random_preactivation) + + # Check if output is invertible + np.linalg.inv(output) + + # Calculated eigenvalues to test for positive definiteness output = keras.ops.convert_to_numpy(output) eigenvalues = np.linalg.eig(output).eigenvalues From eebf9508e508dd5016375b01e1392458761767d4 Mon Sep 17 00:00:00 2001 From: han-ol Date: Fri, 28 Mar 2025 16:48:01 +0100 Subject: [PATCH 04/20] Allow estimation of univariate MVN --- bayesflow/networks/point_inference_network.py | 2 +- bayesflow/scores/multivariate_normal_score.py | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/bayesflow/networks/point_inference_network.py b/bayesflow/networks/point_inference_network.py index a5447c501..0c083c8c7 100644 --- a/bayesflow/networks/point_inference_network.py +++ b/bayesflow/networks/point_inference_network.py @@ -132,7 +132,7 @@ def call( if xz is None and not self.built: raise ValueError("Cannot build inference network without inference variables.") if conditions is None: # unconditional estimation uses a fixed input vector - conditions = keras.ops.convert_to_tensor([[1.0]], dtype=keras.ops.dtype(xz)) + conditions = keras.ops.convert_to_tensor([[1.0]]) # pass conditions to the shared subnet output = self.subnet(conditions, training=training) diff --git a/bayesflow/scores/multivariate_normal_score.py b/bayesflow/scores/multivariate_normal_score.py index efe560388..b1f6ff707 100644 --- a/bayesflow/scores/multivariate_normal_score.py +++ b/bayesflow/scores/multivariate_normal_score.py @@ -101,6 +101,8 @@ def sample(self, batch_shape: Shape, mean: Tensor, covariance: Tensor) -> Tensor Tensor A tensor of shape (batch_size, num_samples, D) containing the generated samples. """ + if len(batch_shape) == 1: + batch_shape = (1,) + batch_shape batch_size, num_samples = batch_shape dim = keras.ops.shape(mean)[-1] if keras.ops.shape(mean) != (batch_size, dim): From 42c6806b75770a1e93278e2018cfc3094a8acc97 Mon Sep 17 00:00:00 2001 From: han-ol Date: Fri, 28 Mar 2025 16:57:03 +0100 Subject: [PATCH 05/20] Remove commented lines --- bayesflow/links/positive_definite.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/bayesflow/links/positive_definite.py b/bayesflow/links/positive_definite.py index d676ac665..cdcb27034 100644 --- a/bayesflow/links/positive_definite.py +++ b/bayesflow/links/positive_definite.py @@ -19,11 +19,6 @@ def call(self, inputs: Tensor) -> Tensor: # Build cholesky factor from inputs L = fill_triangular_matrix(inputs, positive_diag=True) - # diagonal_mask = keras.ops.identity(L.shape[-1]) > 0 - # L[..., diagonal_mask] = keras.activations.softplus(L[..., diagonal_mask]) - # L += keras.ops.identity(L.shape[-1]) * 2 - # L *= keras.ops.sign(keras.ops.diagonal(L, axis1=-1))[..., None] # ensure positive diagonal entries - # calculate positive definite matrix from cholesky factors psd = keras.ops.matmul( L, From d57970ac901caba2c390ae3de5112ea56e44b953 Mon Sep 17 00:00:00 2001 From: han-ol Date: Fri, 28 Mar 2025 17:00:53 +0100 Subject: [PATCH 06/20] Minor changes to comments and docstring for fill_triangular_matrix --- bayesflow/utils/tensor_utils.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/bayesflow/utils/tensor_utils.py b/bayesflow/utils/tensor_utils.py index 9287dd982..0dd73b67e 100644 --- a/bayesflow/utils/tensor_utils.py +++ b/bayesflow/utils/tensor_utils.py @@ -281,7 +281,7 @@ def stack(*items): def fill_triangular_matrix(x: Tensor, upper: bool = False, positive_diag: bool = False): """ - Reshapes a batch of matrix entries into a triangular matrix (either upper or lower). + Reshapes a batch of matrix elements into a triangular matrix (either upper or lower). Note: If final axis has length 1, this simply reshapes to (batch_size, 1, 1) and optionally applies softplus. @@ -326,14 +326,13 @@ def fill_triangular_matrix(x: Tensor, upper: bool = False, positive_diag: bool = if not upper: y = keras.ops.concatenate([x_tail, keras.ops.flip(x, axis=-1)], axis=len(batch_shape)) y = keras.ops.reshape(y, (-1, n, n)) - y = keras.ops.tril(y) # TODO: fails with tensorflow + y = keras.ops.tril(y) if positive_diag: y_offdiag = keras.ops.tril(y, k=-1) + # carve out diagonal, by setting upper and lower offdiagonals to zero y_diag = keras.ops.tril( - keras.ops.triu( # carve out diagonal, by setting upper and lower offdiagonals to zero - keras.activations.softplus(y) - ), # apply softplus to enforce positivity + keras.ops.triu(keras.activations.softplus(y)), # apply softplus to enforce positivity ) y = y_diag + y_offdiag @@ -346,10 +345,9 @@ def fill_triangular_matrix(x: Tensor, upper: bool = False, positive_diag: bool = if positive_diag: y_offdiag = keras.ops.triu(y, k=1) + # carve out diagonal, by setting upper and lower offdiagonals to zero y_diag = keras.ops.tril( - keras.ops.triu( # carve out diagonal, by setting upper and lower offdiagonals to zero - keras.activations.softplus(y) - ), # apply softplus to enforce positivity + keras.ops.triu(keras.activations.softplus(y)), # apply softplus to enforce positivity ) y = y_diag + y_offdiag From ddfdbdce1a21960acd28ce2602e1b08730ed20ff Mon Sep 17 00:00:00 2001 From: han-ol Date: Fri, 28 Mar 2025 17:21:53 +0100 Subject: [PATCH 07/20] Test coverage for unconditional MVNScore.sample --- tests/test_scores/test_scores.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tests/test_scores/test_scores.py b/tests/test_scores/test_scores.py index 73305961d..4e44c2ef7 100644 --- a/tests/test_scores/test_scores.py +++ b/tests/test_scores/test_scores.py @@ -46,3 +46,17 @@ def test_mean_score_optimality(mean_score, random_conditions): assert suboptimal_score > optimal_score assert keras.ops.isclose(optimal_score, 0) + + +def test_unconditional_mvn(multivariate_normal_score): + mean = keras.ops.convert_to_tensor([[0.0, 1.0]]) + covariance = keras.ops.convert_to_tensor([[[1.0, 0.0], [0.0, 1.0]]]) + multivariate_normal_score.sample((10,), mean, covariance) + + +def test_unconditional_mvn_value_error(multivariate_normal_score): + mean = keras.ops.convert_to_tensor([0.0, 1.0]) + covariance = keras.ops.convert_to_tensor([[1.0, 0.0], [0.0, 1.0]]) + + with pytest.raises(ValueError): + multivariate_normal_score.sample((10,), mean, covariance) From 2b38c211221359b7e061e557475107b0fe81a9ba Mon Sep 17 00:00:00 2001 From: han-ol Date: Fri, 28 Mar 2025 18:39:56 +0100 Subject: [PATCH 08/20] Remove instability warning MultivariateNormalScore --- bayesflow/scores/multivariate_normal_score.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/bayesflow/scores/multivariate_normal_score.py b/bayesflow/scores/multivariate_normal_score.py index b1f6ff707..e49114081 100644 --- a/bayesflow/scores/multivariate_normal_score.py +++ b/bayesflow/scores/multivariate_normal_score.py @@ -5,7 +5,6 @@ from bayesflow.types import Shape, Tensor from bayesflow.links import PositiveDefinite -from bayesflow.utils import logging from .parametric_distribution_score import ParametricDistributionScore @@ -28,8 +27,6 @@ def __init__(self, dim: int = None, links: dict = None, **kwargs): self.config = {"dim": dim} - logging.warning("MultivariateNormalScore is unstable.") - def get_config(self): base_config = super().get_config() return base_config | self.config From 1405ee58811056fb40846a7c45fe1dd8d6ad7791 Mon Sep 17 00:00:00 2001 From: han-ol Date: Mon, 31 Mar 2025 16:32:47 +0200 Subject: [PATCH 09/20] Remove commented numpy import --- bayesflow/links/positive_definite.py | 1 - 1 file changed, 1 deletion(-) diff --git a/bayesflow/links/positive_definite.py b/bayesflow/links/positive_definite.py index cdcb27034..616f9080d 100644 --- a/bayesflow/links/positive_definite.py +++ b/bayesflow/links/positive_definite.py @@ -1,6 +1,5 @@ import keras -# import numpy as np from keras.saving import register_keras_serializable as serializable from bayesflow.types import Tensor From f1e1ba1834a0f1f9cbfcbad4c56a4f870ced9756 Mon Sep 17 00:00:00 2001 From: han-ol Date: Mon, 31 Mar 2025 16:34:49 +0200 Subject: [PATCH 10/20] Fix dtype of dummy conditions if inference variables are available --- bayesflow/networks/point_inference_network.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/bayesflow/networks/point_inference_network.py b/bayesflow/networks/point_inference_network.py index 0c083c8c7..05ad467fe 100644 --- a/bayesflow/networks/point_inference_network.py +++ b/bayesflow/networks/point_inference_network.py @@ -132,7 +132,9 @@ def call( if xz is None and not self.built: raise ValueError("Cannot build inference network without inference variables.") if conditions is None: # unconditional estimation uses a fixed input vector - conditions = keras.ops.convert_to_tensor([[1.0]]) + conditions = keras.ops.convert_to_tensor( + [[1.0]], dtype=keras.ops.dtype(xz) if xz is not None else "float32" + ) # pass conditions to the shared subnet output = self.subnet(conditions, training=training) From 9d8765631b8037108b26d21efdbd139bed0b148a Mon Sep 17 00:00:00 2001 From: han-ol Date: Mon, 31 Mar 2025 16:47:48 +0200 Subject: [PATCH 11/20] Tuple conversion in case batch_shape is a list --- bayesflow/scores/multivariate_normal_score.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bayesflow/scores/multivariate_normal_score.py b/bayesflow/scores/multivariate_normal_score.py index e49114081..318fbec0a 100644 --- a/bayesflow/scores/multivariate_normal_score.py +++ b/bayesflow/scores/multivariate_normal_score.py @@ -99,7 +99,7 @@ def sample(self, batch_shape: Shape, mean: Tensor, covariance: Tensor) -> Tensor A tensor of shape (batch_size, num_samples, D) containing the generated samples. """ if len(batch_shape) == 1: - batch_shape = (1,) + batch_shape + batch_shape = (1,) + tuple(batch_shape) batch_size, num_samples = batch_shape dim = keras.ops.shape(mean)[-1] if keras.ops.shape(mean) != (batch_size, dim): From 4bbbffafc47ca3b506d971be376223f4ba66aa43 Mon Sep 17 00:00:00 2001 From: han-ol Date: Mon, 31 Mar 2025 17:05:48 +0200 Subject: [PATCH 12/20] Conversion to numpy before calling numpy operations --- tests/test_links/test_links.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_links/test_links.py b/tests/test_links/test_links.py index 411ed2cc2..ad1be5753 100644 --- a/tests/test_links/test_links.py +++ b/tests/test_links/test_links.py @@ -58,12 +58,12 @@ def test_positive_definite(positive_definite, batch_size, num_variables): # Too strongly negative values lead to numerical instabilities -> reduce scale random_preactivation = keras.random.normal(input_shape) * 0.1 output = positive_definite(random_preactivation) + output = keras.ops.convert_to_numpy(output) # Check if output is invertible np.linalg.inv(output) # Calculated eigenvalues to test for positive definiteness - output = keras.ops.convert_to_numpy(output) eigenvalues = np.linalg.eig(output).eigenvalues assert np.all(eigenvalues.real > 0) and np.all(np.isclose(eigenvalues.imag, 0)), ( From fe201aa15d675bbf1cd3df68c4171737f2e1088e Mon Sep 17 00:00:00 2001 From: han-ol Date: Mon, 31 Mar 2025 17:37:29 +0200 Subject: [PATCH 13/20] More detailed docs and renamed the transformation warning attribute --- bayesflow/approximators/point_approximator.py | 2 +- bayesflow/scores/scoring_rule.py | 32 ++++++++++++++++--- 2 files changed, 29 insertions(+), 5 deletions(-) diff --git a/bayesflow/approximators/point_approximator.py b/bayesflow/approximators/point_approximator.py index 0eeb84112..9c940241e 100644 --- a/bayesflow/approximators/point_approximator.py +++ b/bayesflow/approximators/point_approximator.py @@ -131,7 +131,7 @@ def _apply_inverse_adapter_to_estimates( for score_key, score_val in estimates.items(): processed[score_key] = {} for head_key, estimate in score_val.items(): - if head_key in self.inference_network.scores[score_key].not_transforming_like_vector: + if head_key in self.inference_network.scores[score_key].not_transforming_like_vector_warning: logging.warning( f"Estimate '{score_key}.{head_key}' is marked to not transform like a vector. " f"It was treated like a vector by the adapter. Handle '{head_key}' estimates with care." diff --git a/bayesflow/scores/scoring_rule.py b/bayesflow/scores/scoring_rule.py index b16d14ac3..384955e72 100644 --- a/bayesflow/scores/scoring_rule.py +++ b/bayesflow/scores/scoring_rule.py @@ -17,6 +17,12 @@ class ScoringRule: To define a custom ``ScoringRule``, inherit from this class and overwrite the score method. For proper serialization, any new constructor arguments must be taken care of in a `get_config` method. + + Estimates are typically parameterized by projection heads consisting of a neural network component + and a link to project into the correct output space. + + `ScoringRule`s can score estimates consisting of multiple parts. See `MultivariateNormalScore` for an example + of a `ParametricDistributionScore`. The score evaluates an estimated mean and covariance simultaneously. """ def __init__( @@ -29,7 +35,12 @@ def __init__( self.subnets_kwargs = subnets_kwargs or {} self.links = links or {} - self.not_transforming_like_vector = [] + # Prediction heads can output estimates in spaces other than the target distribution space. + # To such estimates the adapter cannot be straightforwardly applied in inverse direction, + # because the adapter is built to map vectors. When subclassing `ScoringRule`, add the names + # of such heads to the following list to warn users about difficulties with a type of estimate + # whenever the adapter is applied to them in inverse direction. + self.not_transforming_like_vector_warning = [] self.config = {"subnets_kwargs": self.subnets_kwargs} @@ -60,12 +71,15 @@ def get_head_shapes_from_target_shape(self, target_shape: Shape) -> dict[str, Sh def get_subnet(self, key: str) -> keras.Layer: """For a specified key, request a subnet to be used for projecting the shared condition embedding - before reshaping to the heads output shape. + before further projection and reshaping to the heads output shape. + + If no subnet was specified for the key (e.g. upon initialization), + return just an instance of keras.layers.Identity. Parameters ---------- key : str - Name of head for which to request a link. + Name of head for which to request a subnet. Returns ------- @@ -80,6 +94,8 @@ def get_subnet(self, key: str) -> keras.Layer: def get_link(self, key: str) -> keras.Layer: """For a specified key, request a link from network output to estimation target. + If no link was specified for the key (e.g. upon initialization), return a linear activation. + Parameters ---------- key : str @@ -98,7 +114,15 @@ def get_link(self, key: str) -> keras.Layer: return self.links[key] def get_head(self, key: str, output_shape: Shape) -> keras.Sequential: - """For a specified head key and shape, request corresponding head network. + """For a specified head key and output shape, request corresponding head network. + + A head network has the following components that are called sequentially: + 1. subnet: A keras.Layer. + 2. dense: A trainable linear projection with as many units as are required by the next component. + 3. reshape: Changes shape of output of projection to match requirements of next component. + 4. link: Transforms unconstrained values into a constrained space for the final estimator. + + This method initializes the components in reverse order to meet all requirements and returns them. Parameters ---------- From 02ea22ce528292567cf9588801ca0388383a96a7 Mon Sep 17 00:00:00 2001 From: han-ol Date: Tue, 1 Apr 2025 11:59:39 +0200 Subject: [PATCH 14/20] Doc string detail --- bayesflow/scores/scoring_rule.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/bayesflow/scores/scoring_rule.py b/bayesflow/scores/scoring_rule.py index 384955e72..9154675c1 100644 --- a/bayesflow/scores/scoring_rule.py +++ b/bayesflow/scores/scoring_rule.py @@ -129,7 +129,8 @@ def get_head(self, key: str, output_shape: Shape) -> keras.Sequential: key : str Name of head for which to request a link. output_shape: Shape - The necessary shape for the point estimators. + The necessary shape of estimated values for the given key as returned by + `scoring_rule.get_head_shapes_from_target_shape()`. Returns ------- From 9b466016595d44cb22a6e821740bb02dd8101fba Mon Sep 17 00:00:00 2001 From: han-ol Date: Tue, 1 Apr 2025 14:13:46 +0200 Subject: [PATCH 15/20] Remove untested comment for PointInferenceNetwork.sample() --- bayesflow/networks/point_inference_network.py | 1 - 1 file changed, 1 deletion(-) diff --git a/bayesflow/networks/point_inference_network.py b/bayesflow/networks/point_inference_network.py index 05ad467fe..06a10d288 100644 --- a/bayesflow/networks/point_inference_network.py +++ b/bayesflow/networks/point_inference_network.py @@ -167,7 +167,6 @@ def compute_metrics( return metrics | {"loss": neg_score} - # WIP: untested draft of sample method @allow_batch_size def sample(self, batch_shape: Shape, conditions: Tensor = None) -> dict[str, Tensor]: """ From 5cb8995387325359752ba1ab94b415fca2c30f64 Mon Sep 17 00:00:00 2001 From: han-ol Date: Tue, 1 Apr 2025 14:39:17 +0200 Subject: [PATCH 16/20] Relax type hints for ContinuousApproximator.log_prob --- bayesflow/approximators/continuous_approximator.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bayesflow/approximators/continuous_approximator.py b/bayesflow/approximators/continuous_approximator.py index d1d57bb90..eb45f3fc7 100644 --- a/bayesflow/approximators/continuous_approximator.py +++ b/bayesflow/approximators/continuous_approximator.py @@ -338,7 +338,7 @@ def _sample( **filter_kwargs(kwargs, self.inference_network.sample), ) - def log_prob(self, data: dict[str, np.ndarray], **kwargs) -> np.ndarray: + def log_prob(self, data: dict[str, np.ndarray], **kwargs) -> np.ndarray | dict(str, np.ndarray): """ Computes the log-probability of given data under the model. The `data` dictionary is preprocessed using the `adapter`. Log-probabilities are returned as NumPy arrays. @@ -358,7 +358,7 @@ def log_prob(self, data: dict[str, np.ndarray], **kwargs) -> np.ndarray: data = self.adapter(data, strict=False, stage="inference", **kwargs) data = keras.tree.map_structure(keras.ops.convert_to_tensor, data) log_prob = self._log_prob(**data, **kwargs) - log_prob = keras.ops.convert_to_numpy(log_prob) + log_prob = keras.tree.map_structure(keras.ops.convert_to_numpy, log_prob) return log_prob @@ -368,7 +368,7 @@ def _log_prob( inference_conditions: Tensor = None, summary_variables: Tensor = None, **kwargs, - ) -> Tensor: + ) -> Tensor | dict(str, Tensor): if self.summary_network is None: if summary_variables is not None: raise ValueError("Cannot use summary variables without a summary network.") From 303127d2399faf39d324255bfd544113c9ad6342 Mon Sep 17 00:00:00 2001 From: han-ol Date: Tue, 1 Apr 2025 14:50:40 +0200 Subject: [PATCH 17/20] Support log-prob in PointApproximator --- bayesflow/approximators/point_approximator.py | 46 +++++++++++++++++-- 1 file changed, 42 insertions(+), 4 deletions(-) diff --git a/bayesflow/approximators/point_approximator.py b/bayesflow/approximators/point_approximator.py index 9c940241e..7e35153fb 100644 --- a/bayesflow/approximators/point_approximator.py +++ b/bayesflow/approximators/point_approximator.py @@ -111,11 +111,47 @@ def sample( if split: raise NotImplementedError("split=True is currently not supported for `PointApproximator`.") samples = split_arrays(samples, axis=-1) - # Squeeze samples if there's only one key-value pair. - samples = self._squeeze_samples(samples) + # Squeeze sample dictionary if there's only one key-value pair. + samples = self._squeeze_parametric_score_major_dict(samples) return samples + def log_prob( + self, + *, + data: dict[str, np.ndarray], + **kwargs, + ) -> np.ndarray | dict[str, np.ndarray]: + """ + Computes the log-probability of given data under the parametric distribution(s) for given input conditions. + + Parameters + ---------- + data : dict[str, np.ndarray] + A dictionary mapping variable names to arrays representing the inference conditions and variables. + **kwargs + Additional keyword arguments passed to underlying processing functions. + + Returns + ------- + log_prob : np.ndarray or dict[str, np.ndarray] + Log-probabilities of the distribution `p(inference_variables | inference_conditions, h(summary_conditions))` + for all parametric scoring rules. + + If only one parametric score is available, output is an array of log-probabilities. + + Output is a dictionary if multiple parametric scores are available. + Then, each key is the name of a score and values are corresponding log-probabilities. + + + Log-probabilities have shape (num_datasets,). + """ + log_prob = super().log_prob(data=data, **kwargs) + # Squeeze log probabilities dictionary if there's only one key-value pair. + log_prob = self._squeeze_parametric_score_major_dict(log_prob) + + return log_prob + def _prepare_conditions(self, conditions: dict[str, np.ndarray], **kwargs) -> dict[str, Tensor]: """Adapts and converts the conditions to tensors.""" conditions = self.adapter(conditions, strict=False, stage="inference", **kwargs) @@ -187,8 +223,10 @@ def _squeeze_estimates( } return squeezed - def _squeeze_samples(self, samples: dict[str, np.ndarray]) -> np.ndarray or dict[str, np.ndarray]: - """Squeezes the samples dictionary to just the value if there is only one key-value pair.""" + def _squeeze_parametric_score_major_dict( + self, samples: dict[str, np.ndarray] + ) -> np.ndarray or dict[str, np.ndarray]: + """Squeezes the dictionary to just the value if there is only one key-value pair.""" if len(samples) == 1: return next(iter(samples.values())) # Extract and return the only item's value return samples From 93e88332f7e1334f0520cdd3618dbb9219de3bfd Mon Sep 17 00:00:00 2001 From: han-ol Date: Tue, 1 Apr 2025 14:51:34 +0200 Subject: [PATCH 18/20] Remove comment stating log prob was untested --- bayesflow/networks/point_inference_network.py | 1 - 1 file changed, 1 deletion(-) diff --git a/bayesflow/networks/point_inference_network.py b/bayesflow/networks/point_inference_network.py index 06a10d288..7a3ed1628 100644 --- a/bayesflow/networks/point_inference_network.py +++ b/bayesflow/networks/point_inference_network.py @@ -200,7 +200,6 @@ def sample(self, batch_shape: Shape, conditions: Tensor = None) -> dict[str, Ten return samples - # WIP: untested draft of log_prob method def log_prob(self, samples: Tensor, conditions: Tensor = None, **kwargs) -> dict[str, Tensor]: output = self.subnet(conditions) log_probs = {} From 7bfacff5baae4c8c7b32eedc0118285b93dffdb9 Mon Sep 17 00:00:00 2001 From: han-ol Date: Tue, 1 Apr 2025 14:57:43 +0200 Subject: [PATCH 19/20] Fix typo --- bayesflow/approximators/continuous_approximator.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bayesflow/approximators/continuous_approximator.py b/bayesflow/approximators/continuous_approximator.py index eb45f3fc7..ed92795a7 100644 --- a/bayesflow/approximators/continuous_approximator.py +++ b/bayesflow/approximators/continuous_approximator.py @@ -338,7 +338,7 @@ def _sample( **filter_kwargs(kwargs, self.inference_network.sample), ) - def log_prob(self, data: dict[str, np.ndarray], **kwargs) -> np.ndarray | dict(str, np.ndarray): + def log_prob(self, data: dict[str, np.ndarray], **kwargs) -> np.ndarray | dict[str, np.ndarray]: """ Computes the log-probability of given data under the model. The `data` dictionary is preprocessed using the `adapter`. Log-probabilities are returned as NumPy arrays. @@ -368,7 +368,7 @@ def _log_prob( inference_conditions: Tensor = None, summary_variables: Tensor = None, **kwargs, - ) -> Tensor | dict(str, Tensor): + ) -> Tensor | dict[str, Tensor]: if self.summary_network is None: if summary_variables is not None: raise ValueError("Cannot use summary variables without a summary network.") From d87b0b911b6dbab8790dd390ce237738c48c6b1f Mon Sep 17 00:00:00 2001 From: han-ol Date: Tue, 1 Apr 2025 17:48:17 +0200 Subject: [PATCH 20/20] Transformation warning using a class variable; docstring links --- bayesflow/approximators/point_approximator.py | 14 ++++---- bayesflow/scores/multivariate_normal_score.py | 13 +++++-- bayesflow/scores/scoring_rule.py | 36 +++++++++++-------- 3 files changed, 39 insertions(+), 24 deletions(-) diff --git a/bayesflow/approximators/point_approximator.py b/bayesflow/approximators/point_approximator.py index 7e35153fb..5f1acf193 100644 --- a/bayesflow/approximators/point_approximator.py +++ b/bayesflow/approximators/point_approximator.py @@ -14,8 +14,9 @@ class PointApproximator(ContinuousApproximator): """ A workflow for fast amortized point estimation of a conditional distribution. - The distribution is approximated by point estimators, parameterized by a feed-forward `PointInferenceNetwork`. - Conditions can be compressed by an optional `SummaryNetwork` or used directly as input to the inference network. + The distribution is approximated by point estimators, parameterized by a feed-forward + :class:`bayesflow.networks.PointInferenceNetwork`. Conditions can be compressed by an optional summary network + (inheriting from :class:`bayesflow.networks.SummaryNetwork`) or used directly as input to the inference network. """ def estimate( @@ -89,7 +90,7 @@ def sample( for the sampling process. split : bool, optional If True, the sampled arrays are split along the last axis, by default False. - Currently not supported for `PointApproximator`. + Currently not supported for :class:`PointApproximator` . **kwargs Additional keyword arguments passed to underlying processing functions. @@ -135,15 +136,14 @@ def log_prob( Returns ------- log_prob : np.ndarray or dict[str, np.ndarray] - Log-probabilities of the distribution `p(inference_variables | inference_conditions, h(summary_conditions))` - for all parametric scoring rules. + Log-probabilities of the distribution + `p(inference_variables | inference_conditions, h(summary_conditions))` for all parametric scoring rules. If only one parametric score is available, output is an array of log-probabilities. Output is a dictionary if multiple parametric scores are available. Then, each key is the name of a score and values are corresponding log-probabilities. - Log-probabilities have shape (num_datasets,). """ log_prob = super().log_prob(data=data, **kwargs) @@ -167,7 +167,7 @@ def _apply_inverse_adapter_to_estimates( for score_key, score_val in estimates.items(): processed[score_key] = {} for head_key, estimate in score_val.items(): - if head_key in self.inference_network.scores[score_key].not_transforming_like_vector_warning: + if head_key in self.inference_network.scores[score_key].NOT_TRANSFORMING_LIKE_VECTOR_WARNING: logging.warning( f"Estimate '{score_key}.{head_key}' is marked to not transform like a vector. " f"It was treated like a vector by the adapter. Handle '{head_key}' estimates with care." diff --git a/bayesflow/scores/multivariate_normal_score.py b/bayesflow/scores/multivariate_normal_score.py index 318fbec0a..90ccfbbf6 100644 --- a/bayesflow/scores/multivariate_normal_score.py +++ b/bayesflow/scores/multivariate_normal_score.py @@ -16,15 +16,22 @@ class MultivariateNormalScore(ParametricDistributionScore): Scores a predicted mean and covariance matrix with the log-score of the probability of the materialized value. """ + NOT_TRANSFORMING_LIKE_VECTOR_WARNING = ("covariance",) + """ + Marks head for covariance matrix as an exception for adapter transformations. + + This variable contains names of prediction heads that should lead to a warning when the adapter is applied + in inverse direction to them. + + For more information see :class:`ScoringRule`. + """ + def __init__(self, dim: int = None, links: dict = None, **kwargs): super().__init__(links=links, **kwargs) self.dim = dim self.links = links or {"covariance": PositiveDefinite()} - # mark head for covariance matrix as an exception for adapter transformations - self.not_transforming_like_vector = ["covariance"] - self.config = {"dim": dim} def get_config(self): diff --git a/bayesflow/scores/scoring_rule.py b/bayesflow/scores/scoring_rule.py index 9154675c1..dd671189c 100644 --- a/bayesflow/scores/scoring_rule.py +++ b/bayesflow/scores/scoring_rule.py @@ -15,14 +15,27 @@ class ScoringRule: when sampling from the true distribution. By minimizing an expected score, estimates with different properties can be obtained. - To define a custom ``ScoringRule``, inherit from this class and overwrite the score method. + To define a custom :class:`ScoringRule`, inherit from this class and overwrite the score method. For proper serialization, any new constructor arguments must be taken care of in a `get_config` method. Estimates are typically parameterized by projection heads consisting of a neural network component and a link to project into the correct output space. - `ScoringRule`s can score estimates consisting of multiple parts. See `MultivariateNormalScore` for an example - of a `ParametricDistributionScore`. The score evaluates an estimated mean and covariance simultaneously. + A :class:`ScoringRule` can score estimates consisting of multiple parts. See :class:`MultivariateNormalScore` + for an example of a :class:`ParametricDistributionScore`. That score evaluates an estimated mean + and covariance simultaneously. + """ + + NOT_TRANSFORMING_LIKE_VECTOR_WARNING = tuple() + """ + This variable contains names of prediction heads that should lead to a warning when the adapter is applied + in inverse direction to them. + + Prediction heads can output estimates in spaces other than the target distribution space. + To such estimates the adapter cannot be straightforwardly applied in inverse direction, + because the adapter is built to map vectors from the inference variable space. When subclassing + :class:`ScoringRule`, add the names of such heads to the following list to warn users about difficulties + with a type of estimate whenever the adapter is applied to them in inverse direction. """ def __init__( @@ -35,13 +48,6 @@ def __init__( self.subnets_kwargs = subnets_kwargs or {} self.links = links or {} - # Prediction heads can output estimates in spaces other than the target distribution space. - # To such estimates the adapter cannot be straightforwardly applied in inverse direction, - # because the adapter is built to map vectors. When subclassing `ScoringRule`, add the names - # of such heads to the following list to warn users about difficulties with a type of estimate - # whenever the adapter is applied to them in inverse direction. - self.not_transforming_like_vector_warning = [] - self.config = {"subnets_kwargs": self.subnets_kwargs} def get_config(self): @@ -117,10 +123,12 @@ def get_head(self, key: str, output_shape: Shape) -> keras.Sequential: """For a specified head key and output shape, request corresponding head network. A head network has the following components that are called sequentially: + 1. subnet: A keras.Layer. 2. dense: A trainable linear projection with as many units as are required by the next component. 3. reshape: Changes shape of output of projection to match requirements of next component. 4. link: Transforms unconstrained values into a constrained space for the final estimator. + See :mod:`bayesflow.links` for examples. This method initializes the components in reverse order to meet all requirements and returns them. @@ -130,7 +138,7 @@ def get_head(self, key: str, output_shape: Shape) -> keras.Sequential: Name of head for which to request a link. output_shape: Shape The necessary shape of estimated values for the given key as returned by - `scoring_rule.get_head_shapes_from_target_shape()`. + :func:`get_head_shapes_from_target_shape()`. Returns ------- @@ -173,11 +181,11 @@ def score(self, estimates: dict[str, Tensor], targets: Tensor, weights: Tensor) Examples -------- - The following shows how to score estimates with a ``MeanScore``. All ``ScoringRule`` s follow this pattern, - only differing in the structure of the estimates dictionary. + The following shows how to score estimates with a :class:`MeanScore`. All :class:`ScoringRule` s + follow this pattern, only differing in the structure of the estimates dictionary. >>> import keras - ... from bayesflow.scores import MeanScore + >>> from bayesflow.scores import MeanScore >>> >>> # batch of samples from a normal distribution >>> samples = keras.random.normal(shape=(100,))