Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 27 additions & 1 deletion bayesflow/diagnostics/metrics/calibration_error.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,15 @@

import numpy as np

from ...utils.dict_utils import dicts_to_arrays
from ...utils.dict_utils import dicts_to_arrays, compute_test_quantities


def calibration_error(
estimates: Mapping[str, np.ndarray] | np.ndarray,
targets: Mapping[str, np.ndarray] | np.ndarray,
variable_keys: Sequence[str] = None,
variable_names: Sequence[str] = None,
test_quantities: dict[str, Callable] = None,
resolution: int = 20,
aggregation: Callable = np.median,
min_quantile: float = 0.005,
Expand All @@ -32,6 +33,18 @@ def calibration_error(
By default, select all keys.
variable_names : Sequence[str], optional (default = None)
Optional variable names to show in the output.
test_quantities : dict or None, optional, default: None
A dict that maps plot titles to functions that compute
test quantities based on estimate/target draws.

The dict keys are automatically added to ``variable_keys``
and ``variable_names``.
Test quantity functions are expected to accept a dict of draws with
shape ``(batch_size, ...)`` as the first (typically only)
positional argument and return an NumPy array of shape
``(batch_size,)``.
The functions do not have to deal with an additional
sample dimension, as appropriate reshaping is done internally.
resolution : int, optional, default: 20
The number of credibility intervals (CIs) to consider
aggregation : callable or None, optional, default: np.median
Expand All @@ -55,6 +68,19 @@ def calibration_error(
The (inferred) variable names.
"""

if test_quantities is not None:
updated_data = compute_test_quantities(
targets=targets,
estimates=estimates,
variable_keys=variable_keys,
variable_names=variable_names,
test_quantities=test_quantities,
)
variable_names = updated_data["variable_names"]
variable_keys = updated_data["variable_keys"]
estimates = updated_data["estimates"]
targets = updated_data["targets"]

samples = dicts_to_arrays(
estimates=estimates,
targets=targets,
Expand Down
32 changes: 30 additions & 2 deletions bayesflow/diagnostics/metrics/calibration_log_gamma.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,17 @@
from collections.abc import Mapping, Sequence
from collections.abc import Callable, Mapping, Sequence

import numpy as np
from scipy.stats import binom

from ...utils.dict_utils import dicts_to_arrays
from ...utils.dict_utils import dicts_to_arrays, compute_test_quantities


def calibration_log_gamma(
estimates: Mapping[str, np.ndarray] | np.ndarray,
targets: Mapping[str, np.ndarray] | np.ndarray,
variable_keys: Sequence[str] = None,
variable_names: Sequence[str] = None,
test_quantities: dict[str, Callable] = None,
num_null_draws: int = 1000,
quantile: float = 0.05,
):
Expand Down Expand Up @@ -41,6 +42,18 @@ def calibration_log_gamma(
By default, select all keys.
variable_names : Sequence[str], optional (default = None)
Optional variable names to show in the output.
test_quantities : dict or None, optional, default: None
A dict that maps plot titles to functions that compute
test quantities based on estimate/target draws.

The dict keys are automatically added to ``variable_keys``
and ``variable_names``.
Test quantity functions are expected to accept a dict of draws with
shape ``(batch_size, ...)`` as the first (typically only)
positional argument and return an NumPy array of shape
``(batch_size,)``.
The functions do not have to deal with an additional
sample dimension, as appropriate reshaping is done internally.
quantile : float in (0, 1), optional, default 0.05
The quantile from the null distribution to be used as a threshold.
A lower quantile increases sensitivity to deviations from uniformity.
Expand All @@ -57,6 +70,21 @@ def calibration_log_gamma(
- "variable_names" : str
The (inferred) variable names.
"""

# Optionally, compute and prepend test quantities from draws
if test_quantities is not None:
updated_data = compute_test_quantities(
targets=targets,
estimates=estimates,
variable_keys=variable_keys,
variable_names=variable_names,
test_quantities=test_quantities,
)
variable_names = updated_data["variable_names"]
variable_keys = updated_data["variable_keys"]
estimates = updated_data["estimates"]
targets = updated_data["targets"]

samples = dicts_to_arrays(
estimates=estimates,
targets=targets,
Expand Down
29 changes: 28 additions & 1 deletion bayesflow/diagnostics/metrics/posterior_contraction.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,15 @@

import numpy as np

from ...utils.dict_utils import dicts_to_arrays
from ...utils.dict_utils import dicts_to_arrays, compute_test_quantities


def posterior_contraction(
estimates: Mapping[str, np.ndarray] | np.ndarray,
targets: Mapping[str, np.ndarray] | np.ndarray,
variable_keys: Sequence[str] = None,
variable_names: Sequence[str] = None,
test_quantities: dict[str, Callable] = None,
aggregation: Callable | None = np.median,
) -> dict[str, any]:
"""
Expand All @@ -27,6 +28,18 @@ def posterior_contraction(
By default, select all keys.
variable_names : Sequence[str], optional (default = None)
Optional variable names to show in the output.
test_quantities : dict or None, optional, default: None
A dict that maps plot titles to functions that compute
test quantities based on estimate/target draws.

The dict keys are automatically added to ``variable_keys``
and ``variable_names``.
Test quantity functions are expected to accept a dict of draws with
shape ``(batch_size, ...)`` as the first (typically only)
positional argument and return an NumPy array of shape
``(batch_size,)``.
The functions do not have to deal with an additional
sample dimension, as appropriate reshaping is done internally.
aggregation : callable or None, optional (default = np.median)
Function to aggregate the PC across draws. Typically `np.mean` or `np.median`.
If None is provided, the individual values are returned.
Expand All @@ -50,6 +63,20 @@ def posterior_contraction(
indicate low contraction.
"""

# Optionally, compute and prepend test quantities from draws
if test_quantities is not None:
updated_data = compute_test_quantities(
targets=targets,
estimates=estimates,
variable_keys=variable_keys,
variable_names=variable_names,
test_quantities=test_quantities,
)
variable_names = updated_data["variable_names"]
variable_keys = updated_data["variable_keys"]
estimates = updated_data["estimates"]
targets = updated_data["targets"]

samples = dicts_to_arrays(
estimates=estimates,
targets=targets,
Expand Down
29 changes: 28 additions & 1 deletion bayesflow/diagnostics/metrics/root_mean_squared_error.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,15 @@

import numpy as np

from ...utils.dict_utils import dicts_to_arrays
from ...utils.dict_utils import dicts_to_arrays, compute_test_quantities


def root_mean_squared_error(
estimates: Mapping[str, np.ndarray] | np.ndarray,
targets: Mapping[str, np.ndarray] | np.ndarray,
variable_keys: Sequence[str] = None,
variable_names: Sequence[str] = None,
test_quantities: dict[str, Callable] = None,
normalize: str | None = "range",
aggregation: Callable = np.median,
) -> dict[str, any]:
Expand All @@ -28,6 +29,18 @@ def root_mean_squared_error(
By default, select all keys.
variable_names : Sequence[str], optional (default = None)
Optional variable names to show in the output.
test_quantities : dict or None, optional, default: None
A dict that maps plot titles to functions that compute
test quantities based on estimate/target draws.

The dict keys are automatically added to ``variable_keys``
and ``variable_names``.
Test quantity functions are expected to accept a dict of draws with
shape ``(batch_size, ...)`` as the first (typically only)
positional argument and return an NumPy array of shape
``(batch_size,)``.
The functions do not have to deal with an additional
sample dimension, as appropriate reshaping is done internally.
normalize : str or None, optional (default = "range")
Whether to normalize the RMSE using statistics of the prior samples.
Possible options are ("mean", "range", "median", "iqr", "std", None)
Expand All @@ -52,6 +65,20 @@ def root_mean_squared_error(
The (inferred) variable names.
"""

# Optionally, compute and prepend test quantities from draws
if test_quantities is not None:
updated_data = compute_test_quantities(
targets=targets,
estimates=estimates,
variable_keys=variable_keys,
variable_names=variable_names,
test_quantities=test_quantities,
)
variable_names = updated_data["variable_names"]
variable_keys = updated_data["variable_keys"]
estimates = updated_data["estimates"]
targets = updated_data["targets"]

samples = dicts_to_arrays(
estimates=estimates,
targets=targets,
Expand Down
30 changes: 29 additions & 1 deletion bayesflow/diagnostics/plots/calibration_histogram.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from collections.abc import Sequence, Mapping
from collections.abc import Callable, Mapping, Sequence

import matplotlib.pyplot as plt
import numpy as np
Expand All @@ -8,13 +8,15 @@

from bayesflow.utils import logging
from bayesflow.utils import prepare_plot_data, add_titles_and_labels, prettify_subplots
from bayesflow.utils.dict_utils import compute_test_quantities


def calibration_histogram(
estimates: Mapping[str, np.ndarray] | np.ndarray,
targets: Mapping[str, np.ndarray] | np.ndarray,
variable_keys: Sequence[str] = None,
variable_names: Sequence[str] = None,
test_quantities: dict[str, Callable] = None,
figsize: Sequence[float] = None,
num_bins: int = 10,
binomial_interval: float = 0.99,
Expand Down Expand Up @@ -46,6 +48,18 @@ def calibration_histogram(
By default, select all keys.
variable_names : list or None, optional, default: None
The parameter names for nice plot titles. Inferred if None
test_quantities : dict or None, optional, default: None
A dict that maps plot titles to functions that compute
test quantities based on estimate/target draws.

The dict keys are automatically added to ``variable_keys``
and ``variable_names``.
Test quantity functions are expected to accept a dict of draws with
shape ``(batch_size, ...)`` as the first (typically only)
positional argument and return an NumPy array of shape
``(batch_size,)``.
The functions do not have to deal with an additional
sample dimension, as appropriate reshaping is done internally.
figsize : tuple or None, optional, default : None
The figure size passed to the matplotlib constructor. Inferred if None
num_bins : int, optional, default: 10
Expand Down Expand Up @@ -75,6 +89,20 @@ def calibration_histogram(
If there is a deviation form the expected shapes of `estimates` and `targets`.
"""

# Optionally, compute and prepend test quantities from draws
if test_quantities is not None:
updated_data = compute_test_quantities(
targets=targets,
estimates=estimates,
variable_keys=variable_keys,
variable_names=variable_names,
test_quantities=test_quantities,
)
variable_names = updated_data["variable_names"]
variable_keys = updated_data["variable_keys"]
estimates = updated_data["estimates"]
targets = updated_data["targets"]

plot_data = prepare_plot_data(
estimates=estimates,
targets=targets,
Expand Down
30 changes: 29 additions & 1 deletion bayesflow/diagnostics/plots/coverage.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
from collections.abc import Sequence, Mapping
from collections.abc import Callable, Sequence, Mapping

import matplotlib.pyplot as plt
import numpy as np

from bayesflow.utils import prepare_plot_data, add_titles_and_labels, prettify_subplots, compute_empirical_coverage
from bayesflow.utils.dict_utils import compute_test_quantities


def coverage(
Expand All @@ -12,6 +13,7 @@ def coverage(
difference: bool = False,
variable_keys: Sequence[str] = None,
variable_names: Sequence[str] = None,
test_quantities: dict[str, Callable] = None,
figsize: Sequence[int] = None,
label_fontsize: int = 16,
legend_fontsize: int = 14,
Expand Down Expand Up @@ -50,6 +52,18 @@ def coverage(
By default, select all keys.
variable_names : list or None, optional, default: None
The parameter names for nice plot titles. Inferred if None
test_quantities : dict or None, optional, default: None
A dict that maps plot titles to functions that compute
test quantities based on estimate/target draws.

The dict keys are automatically added to ``variable_keys``
and ``variable_names``.
Test quantity functions are expected to accept a dict of draws with
shape ``(batch_size, ...)`` as the first (typically only)
positional argument and return an NumPy array of shape
``(batch_size,)``.
The functions do not have to deal with an additional
sample dimension, as appropriate reshaping is done internally.
figsize : tuple or None, optional, default: None
The figure size passed to the matplotlib constructor. Inferred if None.
label_fontsize : int, optional, default: 16
Expand Down Expand Up @@ -80,6 +94,20 @@ def coverage(

"""

# Optionally, compute and prepend test quantities from draws
if test_quantities is not None:
updated_data = compute_test_quantities(
targets=targets,
estimates=estimates,
variable_keys=variable_keys,
variable_names=variable_names,
test_quantities=test_quantities,
)
variable_names = updated_data["variable_names"]
variable_keys = updated_data["variable_keys"]
estimates = updated_data["estimates"]
targets = updated_data["targets"]

# Gather plot data and metadata into a dictionary
plot_data = prepare_plot_data(
estimates=estimates,
Expand Down
Loading