MAINT Instead of separate functions, add broadcast kwarg and make it be used by multivariate distributions.

twiecki · twiecki · commit 1b01b531be04 · 2016-12-19T19:51:28.000+01:00
diff --git a/pymc3/distributions/continuous.py b/pymc3/distributions/continuous.py
@@ -13,7 +13,7 @@
 import warnings
 
 from . import transforms
-from .dist_math import bound, bound_elemwise, logpow, gammaln, betaln, std_cdf, i0, i1
+from .dist_math import bound, bound, logpow, gammaln, betaln, std_cdf, i0, i1
 from .distribution import Continuous, draw_values, generate_samples
 
 __all__ = ['Uniform', 'Flat', 'Normal', 'Beta', 'Exponential', 'Laplace',
@@ -146,7 +146,7 @@ def random(self, point=None, size=None, repeat=None):
     def logp(self, value):
         lower = self.lower
         upper = self.upper
-        return bound_elemwise(-tt.log(upper - lower),
+        return bound(-tt.log(upper - lower),
                               value >= lower, value <= upper)
 
 
@@ -243,7 +243,7 @@ def logp(self, value):
         sd = self.sd
         tau = self.tau
         mu = self.mu
-        return bound_elemwise((-tau * (value - mu)**2 + tt.log(tau / np.pi / 2.)) / 2.,
+        return bound((-tau * (value - mu)**2 + tt.log(tau / np.pi / 2.)) / 2.,
                      sd > 0)
 
 
@@ -289,7 +289,7 @@ def random(self, point=None, size=None, repeat=None):
     def logp(self, value):
         tau = self.tau
         sd = self.sd
-        return bound_elemwise(-0.5 * tau * value**2 + 0.5 * tt.log(tau * 2. / np.pi),
+        return bound(-0.5 * tau * value**2 + 0.5 * tt.log(tau * 2. / np.pi),
                      value >= 0,
                      tau > 0, sd > 0)
 
@@ -402,7 +402,7 @@ def logp(self, value):
         lam = self.lam
         alpha = self.alpha
         # value *must* be iid. Otherwise this is wrong.
-        return bound_elemwise(logpow(lam / (2. * np.pi), 0.5)
+        return bound(logpow(lam / (2. * np.pi), 0.5)
                      - logpow(value - alpha, 1.5)
                      - (0.5 * lam / (value - alpha)
                         * ((value - alpha - mu) / mu)**2),
@@ -492,7 +492,7 @@ def logp(self, value):
         alpha = self.alpha
         beta = self.beta
 
-        return bound_elemwise(logpow(value, alpha - 1) + logpow(1 - value, beta - 1)
+        return bound(logpow(value, alpha - 1) + logpow(1 - value, beta - 1)
                      - betaln(alpha, beta),
                      value >= 0, value <= 1,
                      alpha > 0, beta > 0)
@@ -537,7 +537,7 @@ def random(self, point=None, size=None, repeat=None):
 
     def logp(self, value):
         lam = self.lam
-        return bound_elemwise(tt.log(lam) - lam * value, value > 0, lam > 0)
+        return bound(tt.log(lam) - lam * value, value > 0, lam > 0)
 
 
 class Laplace(Continuous):
@@ -641,7 +641,7 @@ def random(self, point=None, size=None, repeat=None):
     def logp(self, value):
         mu = self.mu
         tau = self.tau
-        return bound_elemwise(-0.5 * tau * (tt.log(value) - mu)**2
+        return bound(-0.5 * tau * (tt.log(value) - mu)**2
                      + 0.5 * tt.log(tau / (2. * np.pi))
                      - tt.log(value),
                      tau > 0)
@@ -702,7 +702,7 @@ def logp(self, value):
         lam = self.lam
         sd = self.sd
 
-        return bound_elemwise(gammaln((nu + 1.0) / 2.0)
+        return bound(gammaln((nu + 1.0) / 2.0)
                      + .5 * tt.log(lam / (nu * np.pi))
                      - gammaln(nu / 2.0)
                      - (nu + 1.0) / 2.0 * tt.log1p(lam * (value - mu)**2 / nu),
@@ -765,7 +765,7 @@ def random(self, point=None, size=None, repeat=None):
     def logp(self, value):
         alpha = self.alpha
         m = self.m
-        return bound_elemwise(tt.log(alpha) + logpow(m, alpha)
+        return bound(tt.log(alpha) + logpow(m, alpha)
                      - logpow(value, alpha + 1),
                      value >= m, alpha > 0, m > 0)
 
@@ -817,7 +817,7 @@ def random(self, point=None, size=None, repeat=None):
     def logp(self, value):
         alpha = self.alpha
         beta = self.beta
-        return bound_elemwise(- tt.log(np.pi) - tt.log(beta)
+        return bound(- tt.log(np.pi) - tt.log(beta)
                      - tt.log1p(((value - alpha) / beta)**2),
                      beta > 0)
 
@@ -863,7 +863,7 @@ def random(self, point=None, size=None, repeat=None):
 
     def logp(self, value):
         beta = self.beta
-        return bound_elemwise(tt.log(2) - tt.log(np.pi) - tt.log(beta)
+        return bound(tt.log(2) - tt.log(np.pi) - tt.log(beta)
                      - tt.log1p((value / beta)**2),
                      value >= 0, beta > 0)
 
@@ -943,7 +943,7 @@ def random(self, point=None, size=None, repeat=None):
     def logp(self, value):
         alpha = self.alpha
         beta = self.beta
-        return bound_elemwise(
+        return bound(
             -gammaln(alpha) + logpow(
                 beta, alpha) - beta * value + logpow(value, alpha - 1),
 
@@ -1007,7 +1007,7 @@ def random(self, point=None, size=None, repeat=None):
     def logp(self, value):
         alpha = self.alpha
         beta = self.beta
-        return bound_elemwise(logpow(beta, alpha) - gammaln(alpha) - beta / value
+        return bound(logpow(beta, alpha) - gammaln(alpha) - beta / value
                      + logpow(value, -alpha - 1),
                      value > 0, alpha > 0, beta > 0)
 
@@ -1088,7 +1088,7 @@ def _random(a, b, size=None):
     def logp(self, value):
         alpha = self.alpha
         beta = self.beta
-        return bound_elemwise(tt.log(alpha) - tt.log(beta)
+        return bound(tt.log(alpha) - tt.log(beta)
                      + (alpha - 1) * tt.log(value / beta)
                      - (value / beta)**alpha,
                      value >= 0, alpha > 0, beta > 0)
@@ -1161,7 +1161,7 @@ def random(self, point=None, size=None, repeat=None):
                                 size=size)
 
     def logp(self, value):
-        return bound_elemwise(self.dist.logp(value),
+        return bound(self.dist.logp(value),
                      value >= self.lower, value <= self.upper)
 
 
@@ -1286,7 +1286,7 @@ def logp(self, value):
                        + logpow(std_cdf((value - mu) / sigma - sigma / nu), 1.),
                        - tt.log(sigma * tt.sqrt(2 * np.pi))
                        - 0.5 * ((value - mu) / sigma)**2)
-        return bound_elemwise(lp, sigma > 0., nu > 0.)
+        return bound(lp, sigma > 0., nu > 0.)
 
 
 class VonMises(Continuous):
@@ -1335,7 +1335,7 @@ def random(self, point=None, size=None, repeat=None):
     def logp(self, value):
         mu = self.mu
         kappa = self.kappa
-        return bound_elemwise(kappa * tt.cos(mu - value) - tt.log(2 * np.pi * i0(kappa)), value >= -np.pi, value <= np.pi, kappa >= 0)
+        return bound(kappa * tt.cos(mu - value) - tt.log(2 * np.pi * i0(kappa)), value >= -np.pi, value <= np.pi, kappa >= 0)
 
 
 class SkewNormal(Continuous):
@@ -1401,7 +1401,7 @@ def logp(self, value):
         sd = self.sd
         mu = self.mu
         alpha = self.alpha
-        return bound_elemwise(
+        return bound(
             tt.log(1 +
             tt.erf(((value - mu) * tt.sqrt(tau) * alpha) / tt.sqrt(2)))
             + (-tau * (value - mu)**2
diff --git a/pymc3/distributions/discrete.py b/pymc3/distributions/discrete.py
@@ -5,7 +5,7 @@
 import theano.tensor as tt
 from scipy import stats
 
-from .dist_math import bound, bound_elemwise, factln, binomln, betaln, logpow
+from .dist_math import bound, bound, factln, binomln, betaln, logpow
 from .distribution import Discrete, draw_values, generate_samples
 
 __all__ = ['Binomial',  'BetaBinomial',  'Bernoulli',  'Poisson',
@@ -54,7 +54,7 @@ def logp(self, value):
         n = self.n
         p = self.p
 
-        return bound_elemwise(
+        return bound(
             binomln(n, value) + logpow(p, value) + logpow(1 - p, n - value),
             0 <= value, value <= n,
             0 <= p, p <= 1)
@@ -118,7 +118,7 @@ def random(self, point=None, size=None, repeat=None):
     def logp(self, value):
         alpha = self.alpha
         beta = self.beta
-        return bound_elemwise(binomln(self.n, value)
+        return bound(binomln(self.n, value)
                      + betaln(value + alpha, self.n - value + beta)
                      - betaln(alpha, beta),
                      value >= 0, value <= self.n,
@@ -158,7 +158,7 @@ def random(self, point=None, size=None, repeat=None):
 
     def logp(self, value):
         p = self.p
-        return bound_elemwise(
+        return bound(
             tt.switch(value, tt.log(p), tt.log(1 - p)),
             value >= 0, value <= 1,
             p >= 0, p <= 1)
@@ -204,7 +204,7 @@ def random(self, point=None, size=None, repeat=None):
 
     def logp(self, value):
         mu = self.mu
-        log_prob = bound_elemwise(
+        log_prob = bound(
             logpow(mu, value) - factln(value) - mu,
             mu >= 0, value >= 0)
         # Return zero when mu and value are both zero
@@ -255,7 +255,7 @@ def random(self, point=None, size=None, repeat=None):
     def logp(self, value):
         mu = self.mu
         alpha = self.alpha
-        negbinom = bound_elemwise(binomln(value + alpha - 1, value)
+        negbinom = bound(binomln(value + alpha - 1, value)
                          + logpow(mu / (mu + alpha), value)
                          + logpow(alpha / (mu + alpha), alpha),
                          value >= 0, mu > 0, alpha > 0)
@@ -300,7 +300,7 @@ def random(self, point=None, size=None, repeat=None):
 
     def logp(self, value):
         p = self.p
-        return bound_elemwise(tt.log(p) + logpow(1 - p, value - 1),
+        return bound(tt.log(p) + logpow(1 - p, value - 1),
                      0 <= p, p <= 1, value >= 1)
 
 
@@ -348,8 +348,8 @@ def random(self, point=None, size=None, repeat=None):
     def logp(self, value):
         upper = self.upper
         lower = self.lower
-        return bound_elemwise(-tt.log(upper - lower + 1),
-                              lower <= value, value <= upper)
+        return bound(-tt.log(upper - lower + 1),
+                     lower <= value, value <= upper)
 
 
 class Categorical(Discrete):
@@ -408,7 +408,7 @@ def logp(self, value):
             a = tt.log(p[tt.arange(p.shape[0]), value])
         else:
             a = tt.log(p[value])
-        return bound_elemwise(a,
+        return bound(a,
                      value >= 0, value <= (k - 1),
                      sumto1)
 
@@ -439,7 +439,7 @@ def _random(c, dtype=dtype, size=None):
 
     def logp(self, value):
         c = self.c
-        return bound_elemwise(0, tt.eq(value, c))
+        return bound(0, tt.eq(value, c))
 
 def ConstantDist(*args, **kwargs):
     warnings.warn("ConstantDist has been deprecated. In future, use Constant instead.",
diff --git a/pymc3/distributions/dist_math.py b/pymc3/distributions/dist_math.py
@@ -10,24 +10,33 @@
 
 from .special import gammaln, multigammaln
 
-
-def bound_elemwise(logp, *conditions):
+def bound(logp, *conditions, **kwargs):
     """
     Bounds a log probability density with several conditions.
 
-    Respects shape of logp and performs broadcasting when
-    conditions.shape > logp.shape.
-
     Parameters
     ----------
     logp : float
     *conditions : booleans
+    broadcast_conditions : bool (optional, default=True)
+        If True, broadcasts logp to match the largest shape of the conditions.
+        This is used e.g. in DiscreteUniform where logp is a scalar constant and the shape
+        is specified via the conditions.
+        If False, will return the same shape as logp.
+        This is used e.g. in Multinomial where broadcasting can lead to differences in the logp.
 
     Returns
     -------
     logp with elements set to -inf where any condition is False
     """
-    return tt.switch(alltrue_elemwise(conditions), logp, -np.inf)
+    broadcast_conditions = kwargs.get('broadcast_conditions', True)
+
+    if broadcast_conditions:
+        alltrue = alltrue_elemwise
+    else:
+        alltrue = alltrue_scalar
+
+    return tt.switch(alltrue(conditions), logp, -np.inf)
 
 
 def alltrue_elemwise(vals):
@@ -37,23 +46,7 @@ def alltrue_elemwise(vals):
     return ret
 
 
-def bound(logp, *conditions):
-    """
-    Bounds a log probability density with several conditions
-
-    Parameters
-    ----------
-    logp : float
-    *conditions : booleans
-
-    Returns
-    -------
-    logp if all conditions are true
-    -inf if some are false
-    """
-    return tt.switch(alltrue(conditions), logp, -np.inf)
-
-def alltrue(vals):
+def alltrue_scalar(vals):
     return tt.all([tt.all(1 * val) for val in vals])
 
 
diff --git a/pymc3/distributions/multivariate.py b/pymc3/distributions/multivariate.py
@@ -245,7 +245,8 @@ def logp(self, value):
         return bound(tt.sum(logpow(value, a - 1) - gammaln(a), axis=-1)
                      + gammaln(tt.sum(a, axis=-1)),
                      tt.all(value >= 0), tt.all(value <= 1),
-                     k > 1, tt.all(a > 0))
+                     k > 1, tt.all(a > 0),
+                     broadcast_conditions=False)
 
 
 class Multinomial(Discrete):
@@ -323,7 +324,9 @@ def logp(self, x):
             tt.all(tt.eq(tt.sum(x, axis=-1, keepdims=True), n)),
             tt.all(p <= 1),
             tt.all(tt.eq(tt.sum(p, axis=-1), 1)),
-            tt.all(tt.ge(n, 0)))
+            tt.all(tt.ge(n, 0)),
+            broadcast_conditions=False
+        )
 
 
 def posdef(AA):
@@ -443,7 +446,9 @@ def logp(self, X):
                       - 2 * multigammaln(n / 2., p)) / 2,
                      matrix_pos_def(X),
                      tt.eq(X, X.T),
-                     n > (p - 1))
+                     n > (p - 1),
+                     broadcast_conditions=False
+        )
 
 
 def WishartBartlett(name, S, nu, is_cholesky=False, return_cholesky=False, testval=None):
@@ -605,4 +610,6 @@ def logp(self, x):
         return bound(result,
                      tt.all(X <= 1), tt.all(X >= -1),
                      matrix_pos_def(X),
-                     n > 0)
+                     n > 0,
+                     broadcast_conditions=False
+        )
diff --git a/pymc3/tests/test_dist_math.py b/pymc3/tests/test_dist_math.py