Skip to content

Commit 98b9fc5

Browse files
relax test_calibration_log_gamma_end_to_end() by introducing a larger posterior bias and decreasing the alpha error rate to 0.01% (#582)
1 parent 0efcc92 commit 98b9fc5

File tree

1 file changed

+6
-5
lines changed

1 file changed

+6
-5
lines changed

tests/test_diagnostics/test_diagnostics_metrics.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -106,20 +106,21 @@ def test_calibration_log_gamma_end_to_end():
106106
S = 1000 # number of posterior draws
107107
D = 1000 # number of datasets
108108

109+
gamma_null = bf.diagnostics.metrics.gamma_null_distribution(D, S, num_null_draws=10000)
110+
109111
def run_sbc(N=N, S=S, D=D, bias=0):
110112
rng = np.random.default_rng()
111113
prior_draws = rng.beta(2, 2, size=D)
112114
successes = rng.binomial(N, prior_draws)
113115

114116
# Analytical posterior:
115117
# if theta ~ Beta(2, 2), then p(theta|successes) is Beta(2 + successes | 2 + N - successes).
116-
posterior_draws = rng.beta(2 + successes + bias, 2 + N - successes + bias, size=(S, D))
118+
posterior_draws = rng.beta(2 + successes + bias, 2 + N - successes, size=(S, D))
117119

118120
# these ranks are uniform if bias=0
119121
ranks = np.sum(posterior_draws < prior_draws, axis=0)
120122

121123
# this is the distribution of gamma under uniform ranks
122-
gamma_null = bf.diagnostics.metrics.gamma_null_distribution(D, S, num_null_draws=200)
123124
lower, upper = np.quantile(gamma_null, (0.025, 0.975))
124125

125126
# this is the empirical gamma
@@ -130,13 +131,13 @@ def run_sbc(N=N, S=S, D=D, bias=0):
130131
return in_interval
131132

132133
sbc_calibration = [run_sbc(N=N, S=S, D=D) for _ in range(100)]
133-
lower_expected, upper_expected = binom.ppf((0.0005, 0.9995), 100, 0.95)
134+
lower_expected, upper_expected = binom.ppf((0.00005, 0.99995), 100, 0.95)
134135

135-
# this test should fail with a probability of 0.1%
136+
# this test should fail with a probability of 0.01%
136137
assert lower_expected <= np.sum(sbc_calibration) <= upper_expected
137138

138139
# sbc should almost always fail for slightly biased posterior draws
139-
sbc_calibration = [run_sbc(N=N, S=S, D=D, bias=1) for _ in range(100)]
140+
sbc_calibration = [run_sbc(N=N, S=S, D=D, bias=2) for _ in range(100)]
140141
assert not lower_expected <= np.sum(sbc_calibration) <= upper_expected
141142

142143

0 commit comments

Comments
 (0)