Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions bayesflow/approximators/continuous_approximator.py
Original file line number Diff line number Diff line change
Expand Up @@ -578,9 +578,9 @@
def summaries(self, data: Mapping[str, np.ndarray], **kwargs) -> np.ndarray:
"""
.. deprecated:: 2.0.4
`summaries` will be removed in version 2.0.5, it was renamed to `summarize` which should be used instead.
`summaries` will be removed in version 2.0.6, it was renamed to `summarize` which should be used instead.
"""
warnings.warn("`summaries` was renamed to `summarize` and will be removed in version 2.0.5.", FutureWarning)
warnings.warn("`summaries` was renamed to `summarize` and will be removed in version 2.0.6.", FutureWarning)

Check warning on line 583 in bayesflow/approximators/continuous_approximator.py

View check run for this annotation

Codecov / codecov/patch

bayesflow/approximators/continuous_approximator.py#L583

Added line #L583 was not covered by tests
return self.summarize(data=data, **kwargs)

def log_prob(self, data: Mapping[str, np.ndarray], **kwargs) -> np.ndarray:
Expand Down
4 changes: 2 additions & 2 deletions bayesflow/approximators/model_comparison_approximator.py
Original file line number Diff line number Diff line change
Expand Up @@ -442,9 +442,9 @@
def summaries(self, data: Mapping[str, np.ndarray], **kwargs) -> np.ndarray:
"""
.. deprecated:: 2.0.4
`summaries` will be removed in version 2.0.5, it was renamed to `summarize` which should be used instead.
`summaries` will be removed in version 2.0.6, it was renamed to `summarize` which should be used instead.
"""
warnings.warn("`summaries` was renamed to `summarize` and will be removed in version 2.0.5.", FutureWarning)
warnings.warn("`summaries` was renamed to `summarize` and will be removed in version 2.0.6.", FutureWarning)

Check warning on line 447 in bayesflow/approximators/model_comparison_approximator.py

View check run for this annotation

Codecov / codecov/patch

bayesflow/approximators/model_comparison_approximator.py#L447

Added line #L447 was not covered by tests
return self.summarize(data=data, **kwargs)

def _compute_logits(self, classifier_conditions: Tensor) -> Tensor:
Expand Down
1 change: 1 addition & 0 deletions bayesflow/diagnostics/metrics/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@
from .expected_calibration_error import expected_calibration_error
from .classifier_two_sample_test import classifier_two_sample_test
from .model_misspecification import bootstrap_comparison, summary_space_comparison
from .calibration_log_gamma import calibration_log_gamma, gamma_null_distribution, gamma_discrepancy
163 changes: 163 additions & 0 deletions bayesflow/diagnostics/metrics/calibration_log_gamma.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
from collections.abc import Mapping, Sequence

import numpy as np
from scipy.stats import binom

from ...utils.dict_utils import dicts_to_arrays


def calibration_log_gamma(
estimates: Mapping[str, np.ndarray] | np.ndarray,
targets: Mapping[str, np.ndarray] | np.ndarray,
variable_keys: Sequence[str] = None,
variable_names: Sequence[str] = None,
num_null_draws: int = 1000,
quantile: float = 0.05,
):
"""
Compute the log gamma discrepancy statistic to test posterior calibration,
see [1] for additional information.
Log gamma is log(gamma/gamma_null), where gamma_null is the 5th percentile of the
null distribution under uniformity of ranks.
That is, if adopting a hypothesis testing framework,then log_gamma < 0 implies
a rejection of the hypothesis of uniform ranks at the 5% level.
This diagnostic is typically more sensitive than the Kolmogorov-Smirnoff test or
ChiSq test.

[1] Martin Modrák. Angie H. Moon. Shinyoung Kim. Paul Bürkner. Niko Huurre.
Kateřina Faltejsková. Andrew Gelman. Aki Vehtari.
"Simulation-Based Calibration Checking for Bayesian Computation:
The Choice of Test Quantities Shapes Sensitivity."
Bayesian Anal. 20 (2) 461 - 488, June 2025. https://doi.org/10.1214/23-BA1404

Parameters
----------
estimates : np.ndarray of shape (num_datasets, num_draws, num_variables)
The random draws from the approximate posteriors over ``num_datasets``
targets : np.ndarray of shape (num_datasets, num_variables)
The corresponding ground-truth values sampled from the prior
variable_keys : Sequence[str], optional (default = None)
Select keys from the dictionaries provided in estimates and targets.
By default, select all keys.
variable_names : Sequence[str], optional (default = None)
Optional variable names to show in the output.
quantile : float in (0, 1), optional, default 0.05
The quantile from the null distribution to be used as a threshold.
A lower quantile increases sensitivity to deviations from uniformity.

Returns
-------
result : dict
Dictionary containing:

- "values" : float or np.ndarray
The log gamma values per variable
- "metric_name" : str
The name of the metric ("Log Gamma").
- "variable_names" : str
The (inferred) variable names.
"""
samples = dicts_to_arrays(
estimates=estimates,
targets=targets,
variable_keys=variable_keys,
variable_names=variable_names,
)

num_ranks = samples["estimates"].shape[0]
num_post_draws = samples["estimates"].shape[1]

# rank statistics
ranks = np.sum(samples["estimates"] < samples["targets"][:, None], axis=1)

# null distribution and threshold
null_distribution = gamma_null_distribution(num_ranks, num_post_draws, num_null_draws)
null_quantile = np.quantile(null_distribution, quantile)

# compute log gamma for each parameter
log_gammas = np.empty(ranks.shape[-1])

for i in range(ranks.shape[-1]):
gamma = gamma_discrepancy(ranks[:, i], num_post_draws=num_post_draws)
log_gammas[i] = np.log(gamma / null_quantile)

output = {
"values": log_gammas,
"metric_name": "Log Gamma",
"variable_names": samples["estimates"].variable_names,
}

return output


def gamma_null_distribution(num_ranks: int, num_post_draws: int = 1000, num_null_draws: int = 1000) -> np.ndarray:
"""
Computes the distribution of expected gamma values under uniformity of ranks.

Parameters
----------
num_ranks : int
Number of ranks to use for each gamma.
num_post_draws : int, optional, default 1000
Number of posterior draws that were used to calculate the rank distribution.
num_null_draws : int, optional, default 1000
Number of returned gamma values under uniformity of ranks.

Returns
-------
result : np.ndarray
Array of shape (num_null_draws,) containing gamma values under uniformity of ranks.
"""
z_i = np.arange(1, num_post_draws + 2) / (num_post_draws + 1)
gamma = np.empty(num_null_draws)

# loop non-vectorized to reduce memory footprint
for i in range(num_null_draws):
u = np.random.uniform(size=num_ranks)
F_z = np.mean(u[:, None] < z_i, axis=0)
bin_1 = binom.cdf(num_ranks * F_z, num_ranks, z_i)
bin_2 = 1 - binom.cdf(num_ranks * F_z - 1, num_ranks, z_i)

gamma[i] = 2 * np.min(np.minimum(bin_1, bin_2))

return gamma


def gamma_discrepancy(ranks: np.ndarray, num_post_draws: int = 100) -> float:
"""
Quantifies deviation from uniformity by the likelihood of observing the
most extreme point on the empirical CDF of the given rank distribution
according to [1] (equation 7).

[1] Martin Modrák. Angie H. Moon. Shinyoung Kim. Paul Bürkner. Niko Huurre.
Kateřina Faltejsková. Andrew Gelman. Aki Vehtari.
"Simulation-Based Calibration Checking for Bayesian Computation:
The Choice of Test Quantities Shapes Sensitivity."
Bayesian Anal. 20 (2) 461 - 488, June 2025. https://doi.org/10.1214/23-BA1404

Parameters
----------
ranks : array of shape (num_ranks,)
Empirical rank distribution
num_post_draws : int, optional, default 100
Number of posterior draws used to generate ranks.

Returns
-------
result : float
Gamma discrepancy values for each parameter.
"""
num_ranks = len(ranks)

# observed count of ranks smaller than i
R_i = np.array([sum(ranks < i) for i in range(1, num_post_draws + 2)])

# expected proportion of ranks smaller than i
z_i = np.arange(1, num_post_draws + 2) / (num_post_draws + 1)

bin_1 = binom.cdf(R_i, num_ranks, z_i)
bin_2 = 1 - binom.cdf(R_i - 1, num_ranks, z_i)

# likelihood of obtaining the most extreme point on the empirical CDF
# if the rank distribution was indeed uniform
return float(2 * np.min(np.minimum(bin_1, bin_2)))
13 changes: 7 additions & 6 deletions bayesflow/distributions/diagonal_normal.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,6 @@ def __init__(
self.seed_generator = seed_generator or keras.random.SeedGenerator()

self.dim = None
self.log_normalization_constant = None
self._mean = None
self._std = None

Expand All @@ -71,17 +70,18 @@ def build(self, input_shape: Shape) -> None:
self.mean = ops.cast(ops.broadcast_to(self.mean, (self.dim,)), "float32")
self.std = ops.cast(ops.broadcast_to(self.std, (self.dim,)), "float32")

self.log_normalization_constant = -0.5 * self.dim * math.log(2.0 * math.pi) - ops.sum(ops.log(self.std))

if self.trainable_parameters:
self._mean = self.add_weight(
shape=ops.shape(self.mean),
initializer=keras.initializers.get(self.mean),
initializer=keras.initializers.get(keras.ops.copy(self.mean)),
dtype="float32",
trainable=True,
)
self._std = self.add_weight(
shape=ops.shape(self.std), initializer=keras.initializers.get(self.std), dtype="float32", trainable=True
shape=ops.shape(self.std),
initializer=keras.initializers.get(keras.ops.copy(self.std)),
dtype="float32",
trainable=True,
)
else:
self._mean = self.mean
Expand All @@ -91,7 +91,8 @@ def log_prob(self, samples: Tensor, *, normalize: bool = True) -> Tensor:
result = -0.5 * ops.sum((samples - self._mean) ** 2 / self._std**2, axis=-1)

if normalize:
result += self.log_normalization_constant
log_normalization_constant = -0.5 * self.dim * math.log(2.0 * math.pi) - ops.sum(ops.log(self._std))
result += log_normalization_constant

return result

Expand Down
25 changes: 13 additions & 12 deletions bayesflow/distributions/diagonal_student_t.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,6 @@ def __init__(

self.seed_generator = seed_generator or keras.random.SeedGenerator()

self.log_normalization_constant = None
self.dim = None
self._loc = None
self._scale = None
Expand All @@ -78,21 +77,16 @@ def build(self, input_shape: Shape) -> None:
self.loc = ops.cast(ops.broadcast_to(self.loc, (self.dim,)), "float32")
self.scale = ops.cast(ops.broadcast_to(self.scale, (self.dim,)), "float32")

self.log_normalization_constant = (
-0.5 * self.dim * math.log(self.df)
- 0.5 * self.dim * math.log(math.pi)
- math.lgamma(0.5 * self.df)
+ math.lgamma(0.5 * (self.df + self.dim))
- ops.sum(keras.ops.log(self.scale))
)

if self.trainable_parameters:
self._loc = self.add_weight(
shape=ops.shape(self.loc), initializer=keras.initializers.get(self.loc), dtype="float32", trainable=True
shape=ops.shape(self.loc),
initializer=keras.initializers.get(keras.ops.copy(self.loc)),
dtype="float32",
trainable=True,
)
self._scale = self.add_weight(
shape=ops.shape(self.scale),
initializer=keras.initializers.get(self.scale),
initializer=keras.initializers.get(keras.ops.copy(self.scale)),
dtype="float32",
trainable=True,
)
Expand All @@ -105,7 +99,14 @@ def log_prob(self, samples: Tensor, *, normalize: bool = True) -> Tensor:
result = -0.5 * (self.df + self.dim) * ops.log1p(mahalanobis_term / self.df)

if normalize:
result += self.log_normalization_constant
log_normalization_constant = (
-0.5 * self.dim * math.log(self.df)
- 0.5 * self.dim * math.log(math.pi)
- math.lgamma(0.5 * self.df)
+ math.lgamma(0.5 * (self.df + self.dim))
- ops.sum(keras.ops.log(self._scale))
)
result += log_normalization_constant

return result

Expand Down
2 changes: 1 addition & 1 deletion bayesflow/distributions/mixture.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ def build(self, input_shape: Shape) -> None:

self._mixture_logits = self.add_weight(
shape=(len(self.distributions),),
initializer=keras.initializers.get(self.mixture_logits),
initializer=keras.initializers.get(keras.ops.copy(self.mixture_logits)),
dtype="float32",
trainable=self.trainable_mixture,
)
Expand Down
10 changes: 5 additions & 5 deletions bayesflow/networks/standardization/standardization.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def moving_std(self, index: int) -> Tensor:
"""
return keras.ops.where(
self.moving_m2[index] > 0,
keras.ops.sqrt(self.moving_m2[index] / self.count),
keras.ops.sqrt(self.moving_m2[index] / self.count[index]),
1.0,
)

Expand All @@ -53,7 +53,7 @@ def build(self, input_shape: Shape):
self.moving_m2 = [
self.add_weight(shape=(shape[-1],), initializer="zeros", trainable=False) for shape in flattened_shapes
]
self.count = self.add_weight(shape=(), initializer="zeros", trainable=False)
self.count = [self.add_weight(shape=(), initializer="zeros", trainable=False) for _ in flattened_shapes]

def call(
self,
Expand Down Expand Up @@ -150,7 +150,7 @@ def _update_moments(self, x: Tensor, index: int):
"""

reduce_axes = tuple(range(x.ndim - 1))
batch_count = keras.ops.cast(keras.ops.shape(x)[0], self.count.dtype)
batch_count = keras.ops.cast(keras.ops.prod(keras.ops.shape(x)[:-1]), self.count[index].dtype)

# Compute batch mean and M2 per feature
batch_mean = keras.ops.mean(x, axis=reduce_axes)
Expand All @@ -159,7 +159,7 @@ def _update_moments(self, x: Tensor, index: int):
# Read current totals
mean = self.moving_mean[index]
m2 = self.moving_m2[index]
count = self.count
count = self.count[index]

total_count = count + batch_count
delta = batch_mean - mean
Expand All @@ -169,4 +169,4 @@ def _update_moments(self, x: Tensor, index: int):

self.moving_mean[index].assign(new_mean)
self.moving_m2[index].assign(new_m2)
self.count.assign(total_count)
self.count[index].assign(total_count)
14 changes: 8 additions & 6 deletions bayesflow/scores/multivariate_normal_score.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,13 +82,15 @@ def log_prob(self, x: Tensor, mean: Tensor, cov_chol: Tensor) -> Tensor:
"""
diff = x - mean

# Calculate covariance from Cholesky factors
covariance = keras.ops.matmul(
cov_chol,
keras.ops.swapaxes(cov_chol, -2, -1),
# Calculate precision from Cholesky factors of covariance matrix
cov_chol_inv = keras.ops.inv(cov_chol)
precision = keras.ops.matmul(
keras.ops.swapaxes(cov_chol_inv, -2, -1),
cov_chol_inv,
)
precision = keras.ops.inv(covariance)
log_det_covariance = keras.ops.slogdet(covariance)[1] # Only take the log of the determinant part

# Compute log determinant, exploiting Cholesky factors
log_det_covariance = keras.ops.log(keras.ops.prod(keras.ops.diagonal(cov_chol, axis1=1, axis2=2), axis=1)) * 2

# Compute the quadratic term in the exponential of the multivariate Gaussian
quadratic_term = keras.ops.einsum("...i,...ij,...j->...", diff, precision, diff)
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "bayesflow"
version = "2.0.4"
version = "2.0.5"
authors = [{ name = "The BayesFlow Team" }]
classifiers = [
"Development Status :: 5 - Production/Stable",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@ def test_save_and_load(tmp_path, approximator, train_dataset, validation_dataset
approximator.build(data_shapes)
for layer in approximator.standardize_layers.values():
assert layer.built
assert layer.count == 0
for count in layer.count:
assert count == 0.0
approximator.compute_metrics(**train_dataset[0])

keras.saving.save_model(approximator, tmp_path / "model.keras")
Expand Down
3 changes: 2 additions & 1 deletion tests/test_approximators/test_build.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,5 @@ def test_build(approximator, simulator, batch_size, adapter):
approximator.build(batch_shapes)
for layer in approximator.standardize_layers.values():
assert layer.built
assert layer.count == 0
for count in layer.count:
assert count == 0.0
Loading