diff --git a/tests/test_diagnostics/test_diagnostics_metrics.py b/tests/test_diagnostics/test_diagnostics_metrics.py
index f2c4c73c4..92de891c4 100644
--- a/tests/test_diagnostics/test_diagnostics_metrics.py
+++ b/tests/test_diagnostics/test_diagnostics_metrics.py
@@ -116,8 +116,8 @@ def run_sbc(N=N, S=S, D=D, bias=0):
         ranks = np.sum(posterior_draws < prior_draws, axis=0)
 
         # this is the distribution of gamma under uniform ranks
-        gamma_null = bf.diagnostics.metrics.gamma_null_distribution(D, S, num_null_draws=100)
-        lower, upper = np.quantile(gamma_null, (0.05, 0.995))
+        gamma_null = bf.diagnostics.metrics.gamma_null_distribution(D, S, num_null_draws=200)
+        lower, upper = np.quantile(gamma_null, (0.025, 0.975))
 
         # this is the empirical gamma
         observed_gamma = bf.diagnostics.metrics.gamma_discrepancy(ranks, num_post_draws=S)
@@ -132,7 +132,7 @@ def run_sbc(N=N, S=S, D=D, bias=0):
     # this test should fail with a probability of 0.1%
     assert lower_expected <= np.sum(sbc_calibration) <= upper_expected
 
-    # sbc should almost always fial for slightly biased posterior draws
+    # sbc should almost always fail for slightly biased posterior draws
     sbc_calibration = [run_sbc(N=N, S=S, D=D, bias=1) for _ in range(100)]
     assert not lower_expected <= np.sum(sbc_calibration) <= upper_expected