Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions bayesflow/approximators/continuous_approximator.py
Original file line number Diff line number Diff line change
Expand Up @@ -578,9 +578,9 @@
def summaries(self, data: Mapping[str, np.ndarray], **kwargs) -> np.ndarray:
"""
.. deprecated:: 2.0.4
`summaries` will be removed in version 2.0.5, it was renamed to `summarize` which should be used instead.
`summaries` will be removed in version 2.0.6, it was renamed to `summarize` which should be used instead.
"""
warnings.warn("`summaries` was renamed to `summarize` and will be removed in version 2.0.5.", FutureWarning)
warnings.warn("`summaries` was renamed to `summarize` and will be removed in version 2.0.6.", FutureWarning)

Check warning on line 583 in bayesflow/approximators/continuous_approximator.py

View check run for this annotation

Codecov / codecov/patch

bayesflow/approximators/continuous_approximator.py#L583

Added line #L583 was not covered by tests
return self.summarize(data=data, **kwargs)

def log_prob(self, data: Mapping[str, np.ndarray], **kwargs) -> np.ndarray:
Expand Down
4 changes: 2 additions & 2 deletions bayesflow/approximators/model_comparison_approximator.py
Original file line number Diff line number Diff line change
Expand Up @@ -442,9 +442,9 @@
def summaries(self, data: Mapping[str, np.ndarray], **kwargs) -> np.ndarray:
"""
.. deprecated:: 2.0.4
`summaries` will be removed in version 2.0.5, it was renamed to `summarize` which should be used instead.
`summaries` will be removed in version 2.0.6, it was renamed to `summarize` which should be used instead.
"""
warnings.warn("`summaries` was renamed to `summarize` and will be removed in version 2.0.5.", FutureWarning)
warnings.warn("`summaries` was renamed to `summarize` and will be removed in version 2.0.6.", FutureWarning)

Check warning on line 447 in bayesflow/approximators/model_comparison_approximator.py

View check run for this annotation

Codecov / codecov/patch

bayesflow/approximators/model_comparison_approximator.py#L447

Added line #L447 was not covered by tests
return self.summarize(data=data, **kwargs)

def _compute_logits(self, classifier_conditions: Tensor) -> Tensor:
Expand Down
1 change: 1 addition & 0 deletions bayesflow/diagnostics/metrics/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@
from .expected_calibration_error import expected_calibration_error
from .classifier_two_sample_test import classifier_two_sample_test
from .model_misspecification import bootstrap_comparison, summary_space_comparison
from .sbc import log_gamma
162 changes: 162 additions & 0 deletions bayesflow/diagnostics/metrics/sbc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
from collections.abc import Mapping, Sequence

import numpy as np
from scipy.stats import binom

from ...utils.dict_utils import dicts_to_arrays


def log_gamma(
estimates: Mapping[str, np.ndarray] | np.ndarray,
targets: Mapping[str, np.ndarray] | np.ndarray,
variable_keys: Sequence[str] = None,
variable_names: Sequence[str] = None,
num_null_draws: int = 1000,
quantile: float = 0.05,
):
"""
Compute the log gamma discrepancy statistic, see [1] for additional information.
Log gamma is log(gamma/gamma_null), where gamma_null is the 5th percentile of the
null distribution under uniformity of ranks.
That is, if adopting a hypothesis testing framework,then log_gamma < 0 implies
a rejection of the hypothesis of uniform ranks at the 5% level.
This diagnostic is typically more sensitive than the Kolmogorov-Smirnoff test or
ChiSq test.

[1] Martin Modrák. Angie H. Moon. Shinyoung Kim. Paul Bürkner. Niko Huurre.
Kateřina Faltejsková. Andrew Gelman. Aki Vehtari.
"Simulation-Based Calibration Checking for Bayesian Computation:
The Choice of Test Quantities Shapes Sensitivity."
Bayesian Anal. 20 (2) 461 - 488, June 2025. https://doi.org/10.1214/23-BA1404

Parameters
----------
estimates : np.ndarray of shape (num_datasets, num_draws, num_variables)
The random draws from the approximate posteriors over ``num_datasets``
targets : np.ndarray of shape (num_datasets, num_variables)
The corresponding ground-truth values sampled from the prior
variable_keys : Sequence[str], optional (default = None)
Select keys from the dictionaries provided in estimates and targets.
By default, select all keys.
variable_names : Sequence[str], optional (default = None)
Optional variable names to show in the output.
quantile : float in (0, 1), optional, default 0.05
The quantile from the null distribution to be used as a threshold.
A lower quantile increases sensitivity to deviations from uniformity.

Returns
-------
result : dict
Dictionary containing:

- "values" : float or np.ndarray
The log gamma values per variable
- "metric_name" : str
The name of the metric ("Log Gamma").
- "variable_names" : str
The (inferred) variable names.
"""
samples = dicts_to_arrays(
estimates=estimates,
targets=targets,
variable_keys=variable_keys,
variable_names=variable_names,
)

num_ranks = samples["estimates"].shape[0]
num_post_draws = samples["estimates"].shape[1]

# rank statistics
ranks = np.sum(samples["estimates"] < samples["targets"][:, None], axis=1)

# null distribution and threshold
null_distribution = gamma_null_distribution(num_ranks, num_post_draws, num_null_draws)
null_quantile = np.quantile(null_distribution, quantile)

# compute log gamma for each parameter
log_gammas = np.empty(ranks.shape[-1])

for i in range(ranks.shape[-1]):
gamma = gamma_discrepancy(ranks[:, i], num_post_draws=num_post_draws)
log_gammas[i] = np.log(gamma / null_quantile)

output = {
"values": log_gammas,
"metric_name": "Log Gamma",
"variable_names": samples["estimates"].variable_names,
}

return output


def gamma_null_distribution(num_ranks: int, num_post_draws: int = 1000, num_null_draws: int = 1000) -> np.ndarray:
"""
Computes the distribution of expected gamma values under uniformity of ranks.

Parameters
----------
num_ranks : int
Number of ranks to use for each gamma.
num_post_draws : int, optional, default 1000
Number of posterior draws that were used to calculate the rank distribution.
num_null_draws : int, optional, default 1000
Number of returned gamma values under uniformity of ranks.

Returns
-------
result : np.ndarray
Array of shape (num_null_draws,) containing gamma values under uniformity of ranks.
"""
z_i = np.arange(1, num_post_draws + 2) / (num_post_draws + 1)
gamma = np.empty(num_null_draws)

# loop non-vectorized to reduce memory footprint
for i in range(num_null_draws):
u = np.random.uniform(size=num_ranks)
F_z = np.mean(u[:, None] < z_i, axis=0)
bin_1 = binom.cdf(num_ranks * F_z, num_ranks, z_i)
bin_2 = 1 - binom.cdf(num_ranks * F_z - 1, num_ranks, z_i)

gamma[i] = 2 * np.min(np.minimum(bin_1, bin_2))

return gamma


def gamma_discrepancy(ranks: np.ndarray, num_post_draws: int = 100) -> float:
"""
Quantifies deviation from uniformity by the likelihood of observing the
most extreme point on the empirical CDF of the given rank distribution
according to [1] (equation 7).

[1] Martin Modrák. Angie H. Moon. Shinyoung Kim. Paul Bürkner. Niko Huurre.
Kateřina Faltejsková. Andrew Gelman. Aki Vehtari.
"Simulation-Based Calibration Checking for Bayesian Computation:
The Choice of Test Quantities Shapes Sensitivity."
Bayesian Anal. 20 (2) 461 - 488, June 2025. https://doi.org/10.1214/23-BA1404

Parameters
----------
ranks : array of shape (num_ranks,)
Empirical rank distribution
num_post_draws : int, optional, default 100
Number of posterior draws used to generate ranks.

Returns
-------
result : float
Gamma discrepancy values for each parameter.
"""
num_ranks = len(ranks)

# observed count of ranks smaller than i
R_i = np.array([sum(ranks < i) for i in range(1, num_post_draws + 2)])

# expected proportion of ranks smaller than i
z_i = np.arange(1, num_post_draws + 2) / (num_post_draws + 1)

bin_1 = binom.cdf(R_i, num_ranks, z_i)
bin_2 = 1 - binom.cdf(R_i - 1, num_ranks, z_i)

# likelihood of obtaining the most extreme point on the empirical CDF
# if the rank distribution was indeed uniform
return float(2 * np.min(np.minimum(bin_1, bin_2)))
13 changes: 7 additions & 6 deletions bayesflow/distributions/diagonal_normal.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,6 @@ def __init__(
self.seed_generator = seed_generator or keras.random.SeedGenerator()

self.dim = None
self.log_normalization_constant = None
self._mean = None
self._std = None

Expand All @@ -71,17 +70,18 @@ def build(self, input_shape: Shape) -> None:
self.mean = ops.cast(ops.broadcast_to(self.mean, (self.dim,)), "float32")
self.std = ops.cast(ops.broadcast_to(self.std, (self.dim,)), "float32")

self.log_normalization_constant = -0.5 * self.dim * math.log(2.0 * math.pi) - ops.sum(ops.log(self.std))

if self.trainable_parameters:
self._mean = self.add_weight(
shape=ops.shape(self.mean),
initializer=keras.initializers.get(self.mean),
initializer=keras.initializers.get(keras.ops.copy(self.mean)),
dtype="float32",
trainable=True,
)
self._std = self.add_weight(
shape=ops.shape(self.std), initializer=keras.initializers.get(self.std), dtype="float32", trainable=True
shape=ops.shape(self.std),
initializer=keras.initializers.get(keras.ops.copy(self.std)),
dtype="float32",
trainable=True,
)
else:
self._mean = self.mean
Expand All @@ -91,7 +91,8 @@ def log_prob(self, samples: Tensor, *, normalize: bool = True) -> Tensor:
result = -0.5 * ops.sum((samples - self._mean) ** 2 / self._std**2, axis=-1)

if normalize:
result += self.log_normalization_constant
log_normalization_constant = -0.5 * self.dim * math.log(2.0 * math.pi) - ops.sum(ops.log(self._std))
result += log_normalization_constant

return result

Expand Down
25 changes: 13 additions & 12 deletions bayesflow/distributions/diagonal_student_t.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,6 @@ def __init__(

self.seed_generator = seed_generator or keras.random.SeedGenerator()

self.log_normalization_constant = None
self.dim = None
self._loc = None
self._scale = None
Expand All @@ -78,21 +77,16 @@ def build(self, input_shape: Shape) -> None:
self.loc = ops.cast(ops.broadcast_to(self.loc, (self.dim,)), "float32")
self.scale = ops.cast(ops.broadcast_to(self.scale, (self.dim,)), "float32")

self.log_normalization_constant = (
-0.5 * self.dim * math.log(self.df)
- 0.5 * self.dim * math.log(math.pi)
- math.lgamma(0.5 * self.df)
+ math.lgamma(0.5 * (self.df + self.dim))
- ops.sum(keras.ops.log(self.scale))
)

if self.trainable_parameters:
self._loc = self.add_weight(
shape=ops.shape(self.loc), initializer=keras.initializers.get(self.loc), dtype="float32", trainable=True
shape=ops.shape(self.loc),
initializer=keras.initializers.get(keras.ops.copy(self.loc)),
dtype="float32",
trainable=True,
)
self._scale = self.add_weight(
shape=ops.shape(self.scale),
initializer=keras.initializers.get(self.scale),
initializer=keras.initializers.get(keras.ops.copy(self.scale)),
dtype="float32",
trainable=True,
)
Expand All @@ -105,7 +99,14 @@ def log_prob(self, samples: Tensor, *, normalize: bool = True) -> Tensor:
result = -0.5 * (self.df + self.dim) * ops.log1p(mahalanobis_term / self.df)

if normalize:
result += self.log_normalization_constant
log_normalization_constant = (
-0.5 * self.dim * math.log(self.df)
- 0.5 * self.dim * math.log(math.pi)
- math.lgamma(0.5 * self.df)
+ math.lgamma(0.5 * (self.df + self.dim))
- ops.sum(keras.ops.log(self._scale))
)
result += log_normalization_constant

return result

Expand Down
2 changes: 1 addition & 1 deletion bayesflow/distributions/mixture.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ def build(self, input_shape: Shape) -> None:

self._mixture_logits = self.add_weight(
shape=(len(self.distributions),),
initializer=keras.initializers.get(self.mixture_logits),
initializer=keras.initializers.get(keras.ops.copy(self.mixture_logits)),
dtype="float32",
trainable=self.trainable_mixture,
)
Expand Down
10 changes: 5 additions & 5 deletions bayesflow/networks/standardization/standardization.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def moving_std(self, index: int) -> Tensor:
"""
return keras.ops.where(
self.moving_m2[index] > 0,
keras.ops.sqrt(self.moving_m2[index] / self.count),
keras.ops.sqrt(self.moving_m2[index] / self.count[index]),
1.0,
)

Expand All @@ -53,7 +53,7 @@ def build(self, input_shape: Shape):
self.moving_m2 = [
self.add_weight(shape=(shape[-1],), initializer="zeros", trainable=False) for shape in flattened_shapes
]
self.count = self.add_weight(shape=(), initializer="zeros", trainable=False)
self.count = [self.add_weight(shape=(), initializer="zeros", trainable=False) for _ in flattened_shapes]

def call(
self,
Expand Down Expand Up @@ -150,7 +150,7 @@ def _update_moments(self, x: Tensor, index: int):
"""

reduce_axes = tuple(range(x.ndim - 1))
batch_count = keras.ops.cast(keras.ops.shape(x)[0], self.count.dtype)
batch_count = keras.ops.cast(keras.ops.prod(keras.ops.shape(x)[:-1]), self.count[index].dtype)

# Compute batch mean and M2 per feature
batch_mean = keras.ops.mean(x, axis=reduce_axes)
Expand All @@ -159,7 +159,7 @@ def _update_moments(self, x: Tensor, index: int):
# Read current totals
mean = self.moving_mean[index]
m2 = self.moving_m2[index]
count = self.count
count = self.count[index]

total_count = count + batch_count
delta = batch_mean - mean
Expand All @@ -169,4 +169,4 @@ def _update_moments(self, x: Tensor, index: int):

self.moving_mean[index].assign(new_mean)
self.moving_m2[index].assign(new_m2)
self.count.assign(total_count)
self.count[index].assign(total_count)
14 changes: 8 additions & 6 deletions bayesflow/scores/multivariate_normal_score.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,13 +82,15 @@ def log_prob(self, x: Tensor, mean: Tensor, cov_chol: Tensor) -> Tensor:
"""
diff = x - mean

# Calculate covariance from Cholesky factors
covariance = keras.ops.matmul(
cov_chol,
keras.ops.swapaxes(cov_chol, -2, -1),
# Calculate precision from Cholesky factors of covariance matrix
cov_chol_inv = keras.ops.inv(cov_chol)
precision = keras.ops.matmul(
keras.ops.swapaxes(cov_chol_inv, -2, -1),
cov_chol_inv,
)
precision = keras.ops.inv(covariance)
log_det_covariance = keras.ops.slogdet(covariance)[1] # Only take the log of the determinant part

# Compute log determinant, exploiting Cholesky factors
log_det_covariance = keras.ops.log(keras.ops.prod(keras.ops.diagonal(cov_chol, axis1=1, axis2=2), axis=1)) * 2

# Compute the quadratic term in the exponential of the multivariate Gaussian
quadratic_term = keras.ops.einsum("...i,...ij,...j->...", diff, precision, diff)
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "bayesflow"
version = "2.0.4"
version = "2.0.5"
authors = [{ name = "The BayesFlow Team" }]
classifiers = [
"Development Status :: 5 - Production/Stable",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@ def test_save_and_load(tmp_path, approximator, train_dataset, validation_dataset
approximator.build(data_shapes)
for layer in approximator.standardize_layers.values():
assert layer.built
assert layer.count == 0
for count in layer.count:
assert count == 0.0
approximator.compute_metrics(**train_dataset[0])

keras.saving.save_model(approximator, tmp_path / "model.keras")
Expand Down
3 changes: 2 additions & 1 deletion tests/test_approximators/test_build.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,5 @@ def test_build(approximator, simulator, batch_size, adapter):
approximator.build(batch_shapes)
for layer in approximator.standardize_layers.values():
assert layer.built
assert layer.count == 0
for count in layer.count:
assert count == 0.0
Loading