Skip to content

Commit 08d6004

Browse files
authored
Merge pull request numpy#27034 from WarrenWeckesser/fix-beta-small-params
BUG: random: Fix edge case of Johnk's algorithm for the beta distribution.
2 parents d5f3776 + d2c2837 commit 08d6004

File tree

2 files changed

+38
-8
lines changed

2 files changed

+38
-8
lines changed

numpy/random/src/distributions/distributions.c

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -436,16 +436,23 @@ double random_beta(bitgen_t *bitgen_state, double a, double b) {
436436
XpY = X + Y;
437437
/* Reject if both U and V are 0.0, which is approx 1 in 10^106 */
438438
if ((XpY <= 1.0) && (U + V > 0.0)) {
439-
if (XpY > 0) {
439+
if ((X > 0) && (Y > 0)) {
440440
return X / XpY;
441441
} else {
442-
double logX = log(U) / a;
443-
double logY = log(V) / b;
444-
double logM = logX > logY ? logX : logY;
445-
logX -= logM;
446-
logY -= logM;
447-
448-
return exp(logX - log(exp(logX) + exp(logY)));
442+
/*
443+
* Either X or Y underflowed to 0, so we lost information in
444+
* U**(1/a) or V**(1/b). We still compute X/(X+Y) here, but we
445+
* work with logarithms as much as we can to avoid the underflow.
446+
*/
447+
double logX = log(U)/a;
448+
double logY = log(V)/b;
449+
double delta = logX - logY;
450+
if (delta > 0) {
451+
return exp(-log1p(exp(-delta)));
452+
}
453+
else {
454+
return exp(delta - log1p(exp(delta)));
455+
}
449456
}
450457
}
451458
}

numpy/random/tests/test_generator_mt19937_regressions.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,29 @@ def test_beta_ridiculously_small_parameters(self):
8686
x = self.mt19937.beta(tiny/32, tiny/40, size=50)
8787
assert not np.any(np.isnan(x))
8888

89+
def test_beta_expected_zero_frequency(self):
90+
# gh-24475: For small a and b (e.g. a=0.0025, b=0.0025), beta
91+
# would generate too many zeros.
92+
a = 0.0025
93+
b = 0.0025
94+
n = 1000000
95+
x = self.mt19937.beta(a, b, size=n)
96+
nzeros = np.count_nonzero(x == 0)
97+
# beta CDF at x = np.finfo(np.double).smallest_subnormal/2
98+
# is p = 0.0776169083131899, e.g,
99+
#
100+
# import numpy as np
101+
# from mpmath import mp
102+
# mp.dps = 160
103+
# x = mp.mpf(np.finfo(np.float64).smallest_subnormal)/2
104+
# # CDF of the beta distribution at x:
105+
# p = mp.betainc(a, b, x1=0, x2=x, regularized=True)
106+
# n = 1000000
107+
# exprected_freq = float(n*p)
108+
#
109+
expected_freq = 77616.90831318991
110+
assert 0.95*expected_freq < nzeros < 1.05*expected_freq
111+
89112
def test_choice_sum_of_probs_tolerance(self):
90113
# The sum of probs should be 1.0 with some tolerance.
91114
# For low precision dtypes the tolerance was too tight.

0 commit comments

Comments
 (0)