Skip to content

Commit c977ec5

Browse files
Rewritten mkl_random.multinomial to use MKL's own generator
``` In [1]: import mkl_random, numpy as np In [2]: %time mkl_random.multinomial(527, np.array([1/16, 3/16, 5/16, 7/16], 'd'), size=10**6) CPU times: user 820 ms, sys: 0 ns, total: 820 ms Wall time: 819 ms Out[2]: array([[ 29, 90, 176, 232], [ 32, 91, 185, 219], [ 35, 90, 169, 233], ..., [ 42, 93, 157, 235], [ 28, 105, 152, 242], [ 32, 79, 179, 237]], dtype=int32) In [3]: %timeit mkl_random.multinomial(527, np.array([1/16, 3/16, 5/16, 7/16], 'd'), size=10**6) 803 ms ± 9.77 ms per loop (mean ± std. dev. of 7 runs, 1 loop each) In [4]: %timeit mkl_random.multinomial(527, np.array([1/16, 3/16, 5/16, 7/16], 'd'), size=10**5) 80.5 ms ± 874 µs per loop (mean ± std. dev. of 7 runs, 10 loops each) ``` Previously, ``` In [1]: import mkl_random, numpy as np In [2]: %timeit mkl_random.multinomial(527, np.array([1/16, 3/16, 5/16, 7/16], 'd'), size=10**6) 1.35 s ± 2.35 ms per loop (mean ± std. dev. of 7 runs, 1 loop each) In [3]: %timeit mkl_random.multinomial(527, np.array([1/16, 3/16, 5/16, 7/16], 'd'), size=10**5) 135 ms ± 613 µs per loop (mean ± std. dev. of 7 runs, 10 loops each) ```
1 parent 4bb8055 commit c977ec5

File tree

3 files changed

+32
-198
lines changed

3 files changed

+32
-198
lines changed

mkl_random/mklrand.pyx

Lines changed: 4 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,7 @@ cdef extern from "mkl_distributions.h":
131131
void irk_geometric_vec(irk_state *state, npy_intp len, int *res, double p) nogil
132132
void irk_negbinomial_vec(irk_state *state, npy_intp len, int *res, double a, double p) nogil
133133
void irk_binomial_vec(irk_state *state, npy_intp len, int *res, int n, double p) nogil
134+
void irk_multinomial_vec(irk_state *state, npy_intp len, int *res, int n, int d, double *pvec) nogil
134135
void irk_hypergeometric_vec(irk_state *state, npy_intp len, int *res, int ls, int ss, int ms) nogil
135136

136137
void irk_poisson_vec_PTPE(irk_state *state, npy_intp len, int *res, double lam) nogil
@@ -5544,27 +5545,13 @@ cdef class RandomState:
55445545
raise ValueError("sum(pvals[:-1]) > 1.0")
55455546

55465547
shape = _shape_from_size(size, d)
5547-
55485548
multin = np.zeros(shape, np.int32)
5549+
55495550
mnarr = <ndarray>multin
55505551
mnix = <int*>PyArray_DATA(mnarr)
55515552
sz = PyArray_SIZE(mnarr)
5552-
with self.lock, nogil, cython.cdivision(True):
5553-
i = 0
5554-
while i < sz:
5555-
Sum = 1.0
5556-
dn = n
5557-
for j from 0 <= j < d-1:
5558-
# mnix[i+j] = irk_binomial(self.internal_state, dn, pix[j]/Sum)
5559-
irk_binomial_vec(self.internal_state, 1, mnix + (i+j), dn, pix[j]/Sum)
5560-
dn = dn - mnix[i+j]
5561-
if dn <= 0:
5562-
break
5563-
Sum = Sum - pix[j]
5564-
if dn > 0:
5565-
mnix[i+d-1] = dn
5566-
5567-
i = i + d
5553+
5554+
irk_multinomial_vec(self.internal_state, sz // d, mnix, n, d, pix)
55685555

55695556
return multin
55705557

mkl_random/src/mkl_distributions.cpp

Lines changed: 25 additions & 181 deletions
Original file line numberDiff line numberDiff line change
@@ -1136,6 +1136,31 @@ irk_binomial_vec(irk_state *state, npy_intp len, int *res, const int n, const do
11361136
}
11371137
}
11381138

1139+
void
1140+
irk_multinomial_vec(irk_state *state, npy_intp len, int *res, const int n, const int k, const double *pvec)
1141+
{
1142+
int err;
1143+
1144+
if(len < 1)
1145+
return;
1146+
1147+
if (n==0) {
1148+
memset(res, 0, len*k*sizeof(int));
1149+
}
1150+
else {
1151+
while (len > MKL_INT_MAX) {
1152+
err = viRngMultinomial(VSL_RNG_METHOD_MULTINOMIAL_MULTPOISSON, state->stream, MKL_INT_MAX, res, n, k, pvec);
1153+
assert(err == VSL_STATUS_OK);
1154+
res += k*MKL_INT_MAX;
1155+
len -= k*MKL_INT_MAX;
1156+
}
1157+
1158+
err = viRngMultinomial(VSL_RNG_METHOD_MULTINOMIAL_MULTPOISSON, state->stream, len, res, n, k, pvec);
1159+
assert(err == VSL_STATUS_OK);
1160+
}
1161+
}
1162+
1163+
11391164
void
11401165
irk_geometric_vec(irk_state *state, npy_intp len, int *res, const double p)
11411166
{
@@ -1953,184 +1978,3 @@ static double irk_double(irk_state *state) {
19531978

19541979
return res;
19551980
}
1956-
1957-
#ifdef COMPILE_UNUSED_CODE
1958-
1959-
#ifndef min
1960-
#define min(x,y) (((x)<(y))?(x):(y))
1961-
#endif
1962-
1963-
static long irk_binomial_btpe(irk_state *state, long n, double p)
1964-
{
1965-
double r,q,fm,p1,xm,xl,xr,c,laml,lamr,p2,p3,p4;
1966-
double a,u,v,s,F,rho,t,A,nrq,x1,x2,f1,f2,z,z2,w,w2,x;
1967-
long m,y,k,i;
1968-
1969-
r = min(p, 1.0-p);
1970-
q = 1.0 - r;
1971-
fm = n*r+r;
1972-
m = (long)floor(fm);
1973-
p1 = floor(2.195*sqrt(n*r*q)-4.6*q) + 0.5;
1974-
xm = m + 0.5;
1975-
xl = xm - p1;
1976-
xr = xm + p1;
1977-
c = 0.134 + 20.5/(15.3 + m);
1978-
a = (fm - xl)/(fm-xl*r);
1979-
laml = a*(1.0 + a/2.0);
1980-
a = (xr - fm)/(xr*q);
1981-
lamr = a*(1.0 + a/2.0);
1982-
p2 = p1*(1.0 + 2.0*c);
1983-
p3 = p2 + c/laml;
1984-
p4 = p3 + c/lamr;
1985-
1986-
/* sigh ... */
1987-
Step10:
1988-
nrq = n*r*q;
1989-
u = irk_double(state)*p4;
1990-
v = irk_double(state);
1991-
if (u > p1) goto Step20;
1992-
y = (long)floor(xm - p1*v + u);
1993-
goto Step60;
1994-
1995-
Step20:
1996-
if (u > p2) goto Step30;
1997-
x = xl + (u - p1)/c;
1998-
v = v*c + 1.0 - fabs(m - x + 0.5)/p1;
1999-
if (v > 1.0) goto Step10;
2000-
y = (long)floor(x);
2001-
goto Step50;
2002-
2003-
Step30:
2004-
if (u > p3) goto Step40;
2005-
y = (long)floor(xl + log(v)/laml);
2006-
if (y < 0) goto Step10;
2007-
v = v*(u-p2)*laml;
2008-
goto Step50;
2009-
2010-
Step40:
2011-
y = (long)floor(xr - log(v)/lamr);
2012-
if (y > n) goto Step10;
2013-
v = v*(u-p3)*lamr;
2014-
2015-
Step50:
2016-
k = labs(y - m);
2017-
if ((k > 20) && (k < ((nrq)/2.0 - 1))) goto Step52;
2018-
2019-
s = r/q;
2020-
a = s*(n+1);
2021-
F = 1.0;
2022-
if (m < y)
2023-
{
2024-
for (i=m+1; i<=y; i++)
2025-
{
2026-
F *= (a/i - s);
2027-
}
2028-
}
2029-
else if (m > y)
2030-
{
2031-
for (i=y+1; i<=m; i++)
2032-
{
2033-
F /= (a/i - s);
2034-
}
2035-
}
2036-
if (v > F) goto Step10;
2037-
goto Step60;
2038-
2039-
Step52:
2040-
rho = (k/(nrq))*((k*(k/3.0 + 0.625) + 0.16666666666666666)/nrq + 0.5);
2041-
t = -k*k/(2*nrq);
2042-
A = log(v);
2043-
if (A < (t - rho)) goto Step60;
2044-
if (A > (t + rho)) goto Step10;
2045-
2046-
x1 = y+1;
2047-
f1 = m+1;
2048-
z = n+1-m;
2049-
w = n-y+1;
2050-
x2 = x1*x1;
2051-
f2 = f1*f1;
2052-
z2 = z*z;
2053-
w2 = w*w;
2054-
if (A > (xm*log(f1/x1)
2055-
+ (n-m+0.5)*log(z/w)
2056-
+ (y-m)*log(w*r/(x1*q))
2057-
+ (13680.-(462.-(132.-(99.-140./f2)/f2)/f2)/f2)/f1/166320.
2058-
+ (13680.-(462.-(132.-(99.-140./z2)/z2)/z2)/z2)/z/166320.
2059-
+ (13680.-(462.-(132.-(99.-140./x2)/x2)/x2)/x2)/x1/166320.
2060-
+ (13680.-(462.-(132.-(99.-140./w2)/w2)/w2)/w2)/w/166320.))
2061-
{
2062-
goto Step10;
2063-
}
2064-
2065-
Step60:
2066-
if (p > 0.5)
2067-
{
2068-
y = n - y;
2069-
}
2070-
2071-
return y;
2072-
}
2073-
2074-
static long irk_binomial_inversion(irk_state *state, long n, double p)
2075-
{
2076-
double q, qn, np, px, U;
2077-
long X, bound;
2078-
2079-
q = 1.0 - p;
2080-
qn = exp(n * log(q));
2081-
np = n*p;
2082-
bound = min(n, np + 10.0*sqrt(np*q + 1));
2083-
2084-
X = 0;
2085-
px = qn;
2086-
U = irk_double(state);
2087-
while (U > px)
2088-
{
2089-
X++;
2090-
if (X > bound)
2091-
{
2092-
X = 0;
2093-
px = qn;
2094-
U = irk_double(state);
2095-
} else
2096-
{
2097-
U -= px;
2098-
px = ((n-X+1) * p * px)/(X*q);
2099-
}
2100-
}
2101-
return X;
2102-
}
2103-
2104-
#undef min
2105-
2106-
static long irk_binomial(irk_state *state, long n, double p)
2107-
{
2108-
double q;
2109-
2110-
if (p <= 0.5)
2111-
{
2112-
if (p*n <= 30.0)
2113-
{
2114-
return irk_binomial_inversion(state, n, p);
2115-
}
2116-
else
2117-
{
2118-
return irk_binomial_btpe(state, n, p);
2119-
}
2120-
}
2121-
else
2122-
{
2123-
q = 1.0-p;
2124-
if (q*n <= 30.0)
2125-
{
2126-
return n - irk_binomial_inversion(state, n, q);
2127-
}
2128-
else
2129-
{
2130-
return n - irk_binomial_btpe(state, n, q);
2131-
}
2132-
}
2133-
2134-
}
2135-
2136-
#endif

mkl_random/src/mkl_distributions.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,9 @@ extern void irk_noncentral_f_vec(irk_state *state, npy_intp len, double *res, do
8888
extern void irk_triangular_vec(irk_state *state, npy_intp len, double *res, double left, double mode, double right);
8989

9090
extern void irk_binomial_vec(irk_state *state, npy_intp len, int *res, const int n, const double p);
91+
92+
extern void irk_multinomial_vec(irk_state *state, npy_intp len, int *res, const int n, const int k, const double* pvec);
93+
9194
extern void irk_geometric_vec(irk_state *state, npy_intp len, int *res, const double p);
9295
extern void irk_negbinomial_vec(irk_state *state, npy_intp len, int *res, const double a, const double p);
9396
extern void irk_hypergeometric_vec(irk_state *state, npy_intp len, int *res, const int ls, const int ss, const int ms);

0 commit comments

Comments
 (0)