diff --git a/bayesflow/diagnostics/metrics/calibration_error.py b/bayesflow/diagnostics/metrics/calibration_error.py index 1d298370c..0b54c257b 100644 --- a/bayesflow/diagnostics/metrics/calibration_error.py +++ b/bayesflow/diagnostics/metrics/calibration_error.py @@ -2,7 +2,7 @@ import numpy as np -from ...utils.dict_utils import dicts_to_arrays +from ...utils.dict_utils import dicts_to_arrays, compute_test_quantities def calibration_error( @@ -10,6 +10,7 @@ def calibration_error( targets: Mapping[str, np.ndarray] | np.ndarray, variable_keys: Sequence[str] = None, variable_names: Sequence[str] = None, + test_quantities: dict[str, Callable] = None, resolution: int = 20, aggregation: Callable = np.median, min_quantile: float = 0.005, @@ -32,6 +33,18 @@ def calibration_error( By default, select all keys. variable_names : Sequence[str], optional (default = None) Optional variable names to show in the output. + test_quantities : dict or None, optional, default: None + A dict that maps plot titles to functions that compute + test quantities based on estimate/target draws. + + The dict keys are automatically added to ``variable_keys`` + and ``variable_names``. + Test quantity functions are expected to accept a dict of draws with + shape ``(batch_size, ...)`` as the first (typically only) + positional argument and return an NumPy array of shape + ``(batch_size,)``. + The functions do not have to deal with an additional + sample dimension, as appropriate reshaping is done internally. resolution : int, optional, default: 20 The number of credibility intervals (CIs) to consider aggregation : callable or None, optional, default: np.median @@ -55,6 +68,19 @@ def calibration_error( The (inferred) variable names. """ + if test_quantities is not None: + updated_data = compute_test_quantities( + targets=targets, + estimates=estimates, + variable_keys=variable_keys, + variable_names=variable_names, + test_quantities=test_quantities, + ) + variable_names = updated_data["variable_names"] + variable_keys = updated_data["variable_keys"] + estimates = updated_data["estimates"] + targets = updated_data["targets"] + samples = dicts_to_arrays( estimates=estimates, targets=targets, diff --git a/bayesflow/diagnostics/metrics/calibration_log_gamma.py b/bayesflow/diagnostics/metrics/calibration_log_gamma.py index 54551c857..a25cf900f 100644 --- a/bayesflow/diagnostics/metrics/calibration_log_gamma.py +++ b/bayesflow/diagnostics/metrics/calibration_log_gamma.py @@ -1,9 +1,9 @@ -from collections.abc import Mapping, Sequence +from collections.abc import Callable, Mapping, Sequence import numpy as np from scipy.stats import binom -from ...utils.dict_utils import dicts_to_arrays +from ...utils.dict_utils import dicts_to_arrays, compute_test_quantities def calibration_log_gamma( @@ -11,6 +11,7 @@ def calibration_log_gamma( targets: Mapping[str, np.ndarray] | np.ndarray, variable_keys: Sequence[str] = None, variable_names: Sequence[str] = None, + test_quantities: dict[str, Callable] = None, num_null_draws: int = 1000, quantile: float = 0.05, ): @@ -41,6 +42,18 @@ def calibration_log_gamma( By default, select all keys. variable_names : Sequence[str], optional (default = None) Optional variable names to show in the output. + test_quantities : dict or None, optional, default: None + A dict that maps plot titles to functions that compute + test quantities based on estimate/target draws. + + The dict keys are automatically added to ``variable_keys`` + and ``variable_names``. + Test quantity functions are expected to accept a dict of draws with + shape ``(batch_size, ...)`` as the first (typically only) + positional argument and return an NumPy array of shape + ``(batch_size,)``. + The functions do not have to deal with an additional + sample dimension, as appropriate reshaping is done internally. quantile : float in (0, 1), optional, default 0.05 The quantile from the null distribution to be used as a threshold. A lower quantile increases sensitivity to deviations from uniformity. @@ -57,6 +70,21 @@ def calibration_log_gamma( - "variable_names" : str The (inferred) variable names. """ + + # Optionally, compute and prepend test quantities from draws + if test_quantities is not None: + updated_data = compute_test_quantities( + targets=targets, + estimates=estimates, + variable_keys=variable_keys, + variable_names=variable_names, + test_quantities=test_quantities, + ) + variable_names = updated_data["variable_names"] + variable_keys = updated_data["variable_keys"] + estimates = updated_data["estimates"] + targets = updated_data["targets"] + samples = dicts_to_arrays( estimates=estimates, targets=targets, diff --git a/bayesflow/diagnostics/metrics/posterior_contraction.py b/bayesflow/diagnostics/metrics/posterior_contraction.py index a8dffb922..94749c7bc 100644 --- a/bayesflow/diagnostics/metrics/posterior_contraction.py +++ b/bayesflow/diagnostics/metrics/posterior_contraction.py @@ -2,7 +2,7 @@ import numpy as np -from ...utils.dict_utils import dicts_to_arrays +from ...utils.dict_utils import dicts_to_arrays, compute_test_quantities def posterior_contraction( @@ -10,6 +10,7 @@ def posterior_contraction( targets: Mapping[str, np.ndarray] | np.ndarray, variable_keys: Sequence[str] = None, variable_names: Sequence[str] = None, + test_quantities: dict[str, Callable] = None, aggregation: Callable | None = np.median, ) -> dict[str, any]: """ @@ -27,6 +28,18 @@ def posterior_contraction( By default, select all keys. variable_names : Sequence[str], optional (default = None) Optional variable names to show in the output. + test_quantities : dict or None, optional, default: None + A dict that maps plot titles to functions that compute + test quantities based on estimate/target draws. + + The dict keys are automatically added to ``variable_keys`` + and ``variable_names``. + Test quantity functions are expected to accept a dict of draws with + shape ``(batch_size, ...)`` as the first (typically only) + positional argument and return an NumPy array of shape + ``(batch_size,)``. + The functions do not have to deal with an additional + sample dimension, as appropriate reshaping is done internally. aggregation : callable or None, optional (default = np.median) Function to aggregate the PC across draws. Typically `np.mean` or `np.median`. If None is provided, the individual values are returned. @@ -50,6 +63,20 @@ def posterior_contraction( indicate low contraction. """ + # Optionally, compute and prepend test quantities from draws + if test_quantities is not None: + updated_data = compute_test_quantities( + targets=targets, + estimates=estimates, + variable_keys=variable_keys, + variable_names=variable_names, + test_quantities=test_quantities, + ) + variable_names = updated_data["variable_names"] + variable_keys = updated_data["variable_keys"] + estimates = updated_data["estimates"] + targets = updated_data["targets"] + samples = dicts_to_arrays( estimates=estimates, targets=targets, diff --git a/bayesflow/diagnostics/metrics/root_mean_squared_error.py b/bayesflow/diagnostics/metrics/root_mean_squared_error.py index 7c3c6305a..d8209ffac 100644 --- a/bayesflow/diagnostics/metrics/root_mean_squared_error.py +++ b/bayesflow/diagnostics/metrics/root_mean_squared_error.py @@ -2,7 +2,7 @@ import numpy as np -from ...utils.dict_utils import dicts_to_arrays +from ...utils.dict_utils import dicts_to_arrays, compute_test_quantities def root_mean_squared_error( @@ -10,6 +10,7 @@ def root_mean_squared_error( targets: Mapping[str, np.ndarray] | np.ndarray, variable_keys: Sequence[str] = None, variable_names: Sequence[str] = None, + test_quantities: dict[str, Callable] = None, normalize: str | None = "range", aggregation: Callable = np.median, ) -> dict[str, any]: @@ -28,6 +29,18 @@ def root_mean_squared_error( By default, select all keys. variable_names : Sequence[str], optional (default = None) Optional variable names to show in the output. + test_quantities : dict or None, optional, default: None + A dict that maps plot titles to functions that compute + test quantities based on estimate/target draws. + + The dict keys are automatically added to ``variable_keys`` + and ``variable_names``. + Test quantity functions are expected to accept a dict of draws with + shape ``(batch_size, ...)`` as the first (typically only) + positional argument and return an NumPy array of shape + ``(batch_size,)``. + The functions do not have to deal with an additional + sample dimension, as appropriate reshaping is done internally. normalize : str or None, optional (default = "range") Whether to normalize the RMSE using statistics of the prior samples. Possible options are ("mean", "range", "median", "iqr", "std", None) @@ -52,6 +65,20 @@ def root_mean_squared_error( The (inferred) variable names. """ + # Optionally, compute and prepend test quantities from draws + if test_quantities is not None: + updated_data = compute_test_quantities( + targets=targets, + estimates=estimates, + variable_keys=variable_keys, + variable_names=variable_names, + test_quantities=test_quantities, + ) + variable_names = updated_data["variable_names"] + variable_keys = updated_data["variable_keys"] + estimates = updated_data["estimates"] + targets = updated_data["targets"] + samples = dicts_to_arrays( estimates=estimates, targets=targets, diff --git a/bayesflow/diagnostics/plots/calibration_histogram.py b/bayesflow/diagnostics/plots/calibration_histogram.py index 7bd4ce90b..1420f7a5f 100644 --- a/bayesflow/diagnostics/plots/calibration_histogram.py +++ b/bayesflow/diagnostics/plots/calibration_histogram.py @@ -1,4 +1,4 @@ -from collections.abc import Sequence, Mapping +from collections.abc import Callable, Mapping, Sequence import matplotlib.pyplot as plt import numpy as np @@ -8,6 +8,7 @@ from bayesflow.utils import logging from bayesflow.utils import prepare_plot_data, add_titles_and_labels, prettify_subplots +from bayesflow.utils.dict_utils import compute_test_quantities def calibration_histogram( @@ -15,6 +16,7 @@ def calibration_histogram( targets: Mapping[str, np.ndarray] | np.ndarray, variable_keys: Sequence[str] = None, variable_names: Sequence[str] = None, + test_quantities: dict[str, Callable] = None, figsize: Sequence[float] = None, num_bins: int = 10, binomial_interval: float = 0.99, @@ -46,6 +48,18 @@ def calibration_histogram( By default, select all keys. variable_names : list or None, optional, default: None The parameter names for nice plot titles. Inferred if None + test_quantities : dict or None, optional, default: None + A dict that maps plot titles to functions that compute + test quantities based on estimate/target draws. + + The dict keys are automatically added to ``variable_keys`` + and ``variable_names``. + Test quantity functions are expected to accept a dict of draws with + shape ``(batch_size, ...)`` as the first (typically only) + positional argument and return an NumPy array of shape + ``(batch_size,)``. + The functions do not have to deal with an additional + sample dimension, as appropriate reshaping is done internally. figsize : tuple or None, optional, default : None The figure size passed to the matplotlib constructor. Inferred if None num_bins : int, optional, default: 10 @@ -75,6 +89,20 @@ def calibration_histogram( If there is a deviation form the expected shapes of `estimates` and `targets`. """ + # Optionally, compute and prepend test quantities from draws + if test_quantities is not None: + updated_data = compute_test_quantities( + targets=targets, + estimates=estimates, + variable_keys=variable_keys, + variable_names=variable_names, + test_quantities=test_quantities, + ) + variable_names = updated_data["variable_names"] + variable_keys = updated_data["variable_keys"] + estimates = updated_data["estimates"] + targets = updated_data["targets"] + plot_data = prepare_plot_data( estimates=estimates, targets=targets, diff --git a/bayesflow/diagnostics/plots/coverage.py b/bayesflow/diagnostics/plots/coverage.py index c632b394f..65f370cbe 100644 --- a/bayesflow/diagnostics/plots/coverage.py +++ b/bayesflow/diagnostics/plots/coverage.py @@ -1,9 +1,10 @@ -from collections.abc import Sequence, Mapping +from collections.abc import Callable, Sequence, Mapping import matplotlib.pyplot as plt import numpy as np from bayesflow.utils import prepare_plot_data, add_titles_and_labels, prettify_subplots, compute_empirical_coverage +from bayesflow.utils.dict_utils import compute_test_quantities def coverage( @@ -12,6 +13,7 @@ def coverage( difference: bool = False, variable_keys: Sequence[str] = None, variable_names: Sequence[str] = None, + test_quantities: dict[str, Callable] = None, figsize: Sequence[int] = None, label_fontsize: int = 16, legend_fontsize: int = 14, @@ -50,6 +52,18 @@ def coverage( By default, select all keys. variable_names : list or None, optional, default: None The parameter names for nice plot titles. Inferred if None + test_quantities : dict or None, optional, default: None + A dict that maps plot titles to functions that compute + test quantities based on estimate/target draws. + + The dict keys are automatically added to ``variable_keys`` + and ``variable_names``. + Test quantity functions are expected to accept a dict of draws with + shape ``(batch_size, ...)`` as the first (typically only) + positional argument and return an NumPy array of shape + ``(batch_size,)``. + The functions do not have to deal with an additional + sample dimension, as appropriate reshaping is done internally. figsize : tuple or None, optional, default: None The figure size passed to the matplotlib constructor. Inferred if None. label_fontsize : int, optional, default: 16 @@ -80,6 +94,20 @@ def coverage( """ + # Optionally, compute and prepend test quantities from draws + if test_quantities is not None: + updated_data = compute_test_quantities( + targets=targets, + estimates=estimates, + variable_keys=variable_keys, + variable_names=variable_names, + test_quantities=test_quantities, + ) + variable_names = updated_data["variable_names"] + variable_keys = updated_data["variable_keys"] + estimates = updated_data["estimates"] + targets = updated_data["targets"] + # Gather plot data and metadata into a dictionary plot_data = prepare_plot_data( estimates=estimates, diff --git a/bayesflow/diagnostics/plots/recovery.py b/bayesflow/diagnostics/plots/recovery.py index f591a4284..a98d3b12c 100644 --- a/bayesflow/diagnostics/plots/recovery.py +++ b/bayesflow/diagnostics/plots/recovery.py @@ -5,6 +5,7 @@ from bayesflow.utils import prepare_plot_data, prettify_subplots, make_quadratic, add_titles_and_labels, add_metric from bayesflow.utils.numpy_utils import credible_interval +from bayesflow.utils.dict_utils import compute_test_quantities def recovery( @@ -12,6 +13,7 @@ def recovery( targets: Mapping[str, np.ndarray] | np.ndarray, variable_keys: Sequence[str] = None, variable_names: Sequence[str] = None, + test_quantities: dict[str, Callable] = None, point_agg: Callable = np.median, uncertainty_agg: Callable = credible_interval, point_agg_kwargs: dict = None, @@ -58,6 +60,18 @@ def recovery( By default, select all keys. variable_names : list or None, optional, default: None The individual parameter names for nice plot titles. Inferred if None + test_quantities : dict or None, optional, default: None + A dict that maps plot titles to functions that compute + test quantities based on estimate/target draws. + + The dict keys are automatically added to ``variable_keys`` + and ``variable_names``. + Test quantity functions are expected to accept a dict of draws with + shape ``(batch_size, ...)`` as the first (typically only) + positional argument and return an NumPy array of shape + ``(batch_size,)``. + The functions do not have to deal with an additional + sample dimension, as appropriate reshaping is done internally. point_agg : callable, optional, default: median Function to compute point estimates. uncertainty_agg : callable, optional, default: credible_interval with coverage probability 95% @@ -104,6 +118,20 @@ def recovery( If there is a deviation from the expected shapes of ``estimates`` and ``targets``. """ + # Optionally, compute and prepend test quantities from draws + if test_quantities is not None: + updated_data = compute_test_quantities( + targets=targets, + estimates=estimates, + variable_keys=variable_keys, + variable_names=variable_names, + test_quantities=test_quantities, + ) + variable_names = updated_data["variable_names"] + variable_keys = updated_data["variable_keys"] + estimates = updated_data["estimates"] + targets = updated_data["targets"] + # Gather plot data and metadata into a dictionary plot_data = prepare_plot_data( estimates=estimates, diff --git a/bayesflow/diagnostics/plots/z_score_contraction.py b/bayesflow/diagnostics/plots/z_score_contraction.py index c013889a0..190a0a608 100644 --- a/bayesflow/diagnostics/plots/z_score_contraction.py +++ b/bayesflow/diagnostics/plots/z_score_contraction.py @@ -1,9 +1,10 @@ -from collections.abc import Sequence, Mapping +from collections.abc import Callable, Sequence, Mapping import matplotlib.pyplot as plt import numpy as np from bayesflow.utils import prepare_plot_data, add_titles_and_labels, prettify_subplots +from bayesflow.utils.dict_utils import compute_test_quantities def z_score_contraction( @@ -11,6 +12,7 @@ def z_score_contraction( targets: Mapping[str, np.ndarray] | np.ndarray, variable_keys: Sequence[str] = None, variable_names: Sequence[str] = None, + test_quantities: dict[str, Callable] = None, figsize: Sequence[int] = None, label_fontsize: int = 16, title_fontsize: int = 18, @@ -63,6 +65,18 @@ def z_score_contraction( By default, select all keys. variable_names : list or None, optional, default: None The parameter names for nice plot titles. Inferred if None + test_quantities : dict or None, optional, default: None + A dict that maps plot titles to functions that compute + test quantities based on estimate/target draws. + + The dict keys are automatically added to ``variable_keys`` + and ``variable_names``. + Test quantity functions are expected to accept a dict of draws with + shape ``(batch_size, ...)`` as the first (typically only) + positional argument and return an NumPy array of shape + ``(batch_size,)``. + The functions do not have to deal with an additional + sample dimension, as appropriate reshaping is done internally. figsize : tuple or None, optional, default : None The figure size passed to the matplotlib constructor. Inferred if None. label_fontsize : int, optional, default: 16 @@ -90,6 +104,20 @@ def z_score_contraction( If there is a deviation from the expected shapes of ``estimates`` and ``targets``. """ + # Optionally, compute and prepend test quantities from draws + if test_quantities is not None: + updated_data = compute_test_quantities( + targets=targets, + estimates=estimates, + variable_keys=variable_keys, + variable_names=variable_names, + test_quantities=test_quantities, + ) + variable_names = updated_data["variable_names"] + variable_keys = updated_data["variable_keys"] + estimates = updated_data["estimates"] + targets = updated_data["targets"] + # Gather plot data and metadata into a dictionary plot_data = prepare_plot_data( estimates=estimates, diff --git a/tests/test_diagnostics/test_diagnostics_metrics.py b/tests/test_diagnostics/test_diagnostics_metrics.py index 5945412c7..daad874d0 100644 --- a/tests/test_diagnostics/test_diagnostics_metrics.py +++ b/tests/test_diagnostics/test_diagnostics_metrics.py @@ -35,6 +35,14 @@ def test_metric_calibration_error(random_estimates, random_targets, var_names): assert out["values"].shape == (random_estimates["sigma"].shape[-1],) assert out["variable_names"] == ["sigma"] + # test quantities + test_quantities = { + r"$\beta_1 + \beta_2$": lambda data: np.sum(data["beta"], axis=-1), + r"$\beta_1 \cdot \beta_2$": lambda data: np.prod(data["beta"], axis=-1), + } + out = bf.diagnostics.metrics.calibration_error(random_estimates, random_targets, test_quantities=test_quantities) + assert out["values"].shape[0] == len(test_quantities) + num_variables(random_estimates) + def test_posterior_contraction(random_estimates, random_targets): # basic functionality: automatic variable names @@ -47,6 +55,16 @@ def test_posterior_contraction(random_estimates, random_targets): out = bf.diagnostics.metrics.posterior_contraction(random_estimates, random_targets, aggregation=None) assert out["values"].shape == (random_estimates["sigma"].shape[0], num_variables(random_estimates)) + # test quantities + test_quantities = { + r"$\beta_1 + \beta_2$": lambda data: np.sum(data["beta"], axis=-1), + r"$\beta_1 \cdot \beta_2$": lambda data: np.prod(data["beta"], axis=-1), + } + out = bf.diagnostics.metrics.posterior_contraction( + random_estimates, random_targets, test_quantities=test_quantities + ) + assert out["values"].shape[0] == len(test_quantities) + num_variables(random_estimates) + def test_root_mean_squared_error(random_estimates, random_targets): # basic functionality: automatic variable names @@ -56,6 +74,16 @@ def test_root_mean_squared_error(random_estimates, random_targets): assert out["metric_name"] == "NRMSE" assert out["variable_names"] == ["beta_0", "beta_1", "sigma"] + # test quantities + test_quantities = { + r"$\beta_1 + \beta_2$": lambda data: np.sum(data["beta"], axis=-1), + r"$\beta_1 \cdot \beta_2$": lambda data: np.prod(data["beta"], axis=-1), + } + out = bf.diagnostics.metrics.root_mean_squared_error( + random_estimates, random_targets, test_quantities=test_quantities + ) + assert out["values"].shape[0] == len(test_quantities) + num_variables(random_estimates) + def test_classifier_two_sample_test(random_samples_a, random_samples_b): metric = bf.diagnostics.metrics.classifier_two_sample_test(estimates=random_samples_a, targets=random_samples_a) @@ -95,6 +123,16 @@ def test_calibration_log_gamma(random_estimates, random_targets): assert out["metric_name"] == "Log Gamma" assert out["variable_names"] == ["beta_0", "beta_1", "sigma"] + # test quantities + test_quantities = { + r"$\beta_1 + \beta_2$": lambda data: np.sum(data["beta"], axis=-1), + r"$\beta_1 \cdot \beta_2$": lambda data: np.prod(data["beta"], axis=-1), + } + out = bf.diagnostics.metrics.calibration_log_gamma( + random_estimates, random_targets, test_quantities=test_quantities + ) + assert out["values"].shape[0] == len(test_quantities) + num_variables(random_estimates) + def test_calibration_log_gamma_end_to_end(): # This is a function test for simulation-based calibration. diff --git a/tests/test_diagnostics/test_diagnostics_plots.py b/tests/test_diagnostics/test_diagnostics_plots.py index 5d4758558..f825c36cc 100644 --- a/tests/test_diagnostics/test_diagnostics_plots.py +++ b/tests/test_diagnostics/test_diagnostics_plots.py @@ -85,6 +85,16 @@ def test_calibration_histogram(random_estimates, random_targets): assert len(out.axes) == num_variables(random_estimates) assert out.axes[0].title._text == "beta_0" + # test quantities + test_quantities = { + r"$\beta_1 + \beta_2$": lambda data: np.sum(data["beta"], axis=-1), + r"$\beta_1 \cdot \beta_2$": lambda data: np.prod(data["beta"], axis=-1), + } + out = bf.diagnostics.plots.calibration_histogram(random_estimates, random_targets, test_quantities=test_quantities) + assert len(out.axes) == len(test_quantities) + num_variables(random_estimates) + assert out.axes[1].title._text == r"$\beta_1 \cdot \beta_2$" + assert out.axes[-1].title._text == r"sigma" + def test_loss(history): out = bf.diagnostics.loss(history) @@ -102,6 +112,16 @@ def test_recovery_bounds(random_estimates, random_targets): assert len(out.axes) == num_variables(random_estimates) assert out.axes[2].title._text == "sigma" + # test quantities + test_quantities = { + r"$\beta_1 + \beta_2$": lambda data: np.sum(data["beta"], axis=-1), + r"$\beta_1 \cdot \beta_2$": lambda data: np.prod(data["beta"], axis=-1), + } + out = bf.diagnostics.plots.calibration_histogram(random_estimates, random_targets, test_quantities=test_quantities) + assert len(out.axes) == len(test_quantities) + num_variables(random_estimates) + assert out.axes[1].title._text == r"$\beta_1 \cdot \beta_2$" + assert out.axes[-1].title._text == r"sigma" + def test_recovery_symmetric(random_estimates, random_targets): # basic functionality: automatic variable names @@ -127,6 +147,16 @@ def test_z_score_contraction(random_estimates, random_targets): assert len(out.axes) == num_variables(random_estimates) assert out.axes[1].title._text == "beta_1" + # test quantities + test_quantities = { + r"$\beta_1 + \beta_2$": lambda data: np.sum(data["beta"], axis=-1), + r"$\beta_1 \cdot \beta_2$": lambda data: np.prod(data["beta"], axis=-1), + } + out = bf.diagnostics.plots.z_score_contraction(random_estimates, random_targets, test_quantities=test_quantities) + assert len(out.axes) == len(test_quantities) + num_variables(random_estimates) + assert out.axes[1].title._text == r"$\beta_1 \cdot \beta_2$" + assert out.axes[-1].title._text == r"sigma" + def test_pairs_samples(random_priors): out = bf.diagnostics.plots.pairs_samples( @@ -291,6 +321,16 @@ def test_coverage(random_estimates, random_targets): assert out.axes[0].get_xlabel() == "Central interval width" assert out.axes[0].get_ylabel() == "Empirical coverage" + # test quantities + test_quantities = { + r"$\beta_1 + \beta_2$": lambda data: np.sum(data["beta"], axis=-1), + r"$\beta_1 \cdot \beta_2$": lambda data: np.prod(data["beta"], axis=-1), + } + out = bf.diagnostics.plots.coverage(random_estimates, random_targets, test_quantities=test_quantities) + assert len(out.axes) == len(test_quantities) + num_variables(random_estimates) + assert out.axes[1].title._text == r"$\beta_1 \cdot \beta_2$" + assert out.axes[-1].title._text == r"sigma" + def test_coverage_diff(random_estimates, random_targets): # basic functionality: automatic variable names