diff --git a/tests/test_diagnostics/test_diagnostics_metrics.py b/tests/test_diagnostics/test_diagnostics_metrics.py index f2c4c73c4..92de891c4 100644 --- a/tests/test_diagnostics/test_diagnostics_metrics.py +++ b/tests/test_diagnostics/test_diagnostics_metrics.py @@ -116,8 +116,8 @@ def run_sbc(N=N, S=S, D=D, bias=0): ranks = np.sum(posterior_draws < prior_draws, axis=0) # this is the distribution of gamma under uniform ranks - gamma_null = bf.diagnostics.metrics.gamma_null_distribution(D, S, num_null_draws=100) - lower, upper = np.quantile(gamma_null, (0.05, 0.995)) + gamma_null = bf.diagnostics.metrics.gamma_null_distribution(D, S, num_null_draws=200) + lower, upper = np.quantile(gamma_null, (0.025, 0.975)) # this is the empirical gamma observed_gamma = bf.diagnostics.metrics.gamma_discrepancy(ranks, num_post_draws=S) @@ -132,7 +132,7 @@ def run_sbc(N=N, S=S, D=D, bias=0): # this test should fail with a probability of 0.1% assert lower_expected <= np.sum(sbc_calibration) <= upper_expected - # sbc should almost always fial for slightly biased posterior draws + # sbc should almost always fail for slightly biased posterior draws sbc_calibration = [run_sbc(N=N, S=S, D=D, bias=1) for _ in range(100)] assert not lower_expected <= np.sum(sbc_calibration) <= upper_expected