Mixture of mixtures (#2904)

Junpeng Lao · hwassner · web-flow · commit 7d327b3c2aea · 2018-03-19T17:03:11.000+01:00
* Mixture of mixtures Following the discussion on Discourse: https://discourse.pymc.io/t/how-can-we-build-a-mixture-of-mixtures/910/ I made some small fix so that it is easier to create multivariate mixture and mixture of mixtures * fix test * add test for multivariate mixture * fix float32 test Co-authored-by: Junpeng Lao <junpeng.lao@unifr.ch> Co-authored-by: Hubert Wassner <hubert.wassner@gmail.com>
diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md
@@ -19,6 +19,7 @@
 - Fix the Binomial likelihood in `.glm.families.Binomial`, with the flexibility of specifying the `n`. 
 - Add `offset` kwarg to `.glm`.
 - Changed the `compare` function to accept a dictionary of model-trace pairs instead of two separate lists of models and traces.
+- add test and support for creating multivariate mixture and mixture of mixtures
 
 ### Fixes
 
diff --git a/pymc3/distributions/mixture.py b/pymc3/distributions/mixture.py
@@ -67,6 +67,7 @@ class Mixture(Distribution):
 
             like = pm.Mixture('like', w=w, comp_dists = [pois1, pois2], observed=data)
     """
+
     def __init__(self, w, comp_dists, *args, **kwargs):
         shape = kwargs.pop('shape', ())
 
@@ -95,7 +96,7 @@ def __init__(self, w, comp_dists, *args, **kwargs):
 
             if 'mode' not in defaults:
                 defaults.append('mode')
-        except AttributeError:
+        except (AttributeError, ValueError):
             pass
 
         super(Mixture, self).__init__(shape, dtype, defaults=defaults,
@@ -109,22 +110,25 @@ def _comp_logp(self, value):
 
             return comp_dists.logp(value_)
         except AttributeError:
-            return tt.stack([comp_dist.logp(value) for comp_dist in comp_dists],
-                            axis=1)
+            return tt.squeeze(tt.stack([comp_dist.logp(value)
+                                        for comp_dist in comp_dists],
+                                       axis=1))
 
     def _comp_means(self):
         try:
             return tt.as_tensor_variable(self.comp_dists.mean)
         except AttributeError:
-            return tt.stack([comp_dist.mean for comp_dist in self.comp_dists],
-                            axis=1)
+            return tt.squeeze(tt.stack([comp_dist.mean
+                                        for comp_dist in self.comp_dists],
+                                       axis=1))
 
     def _comp_modes(self):
         try:
             return tt.as_tensor_variable(self.comp_dists.mode)
         except AttributeError:
-            return tt.stack([comp_dist.mode for comp_dist in self.comp_dists],
-                            axis=1)
+            return tt.squeeze(tt.stack([comp_dist.mode
+                                        for comp_dist in self.comp_dists],
+                                       axis=1))
 
     def _comp_samples(self, point=None, size=None, repeat=None):
         try:
@@ -196,15 +200,16 @@ class NormalMixture(Mixture):
 
     Note: You only have to pass in sd or tau, but not both.
     """
+
     def __init__(self, w, mu, *args, **kwargs):
         _, sd = get_tau_sd(tau=kwargs.pop('tau', None),
                            sd=kwargs.pop('sd', None))
-        
+
         distshape = np.broadcast(mu, sd).shape
         self.mu = mu = tt.as_tensor_variable(mu)
         self.sd = sd = tt.as_tensor_variable(sd)
 
-        if not distshape: 
+        if not distshape:
             distshape = np.broadcast(mu.tag.test_value, sd.tag.test_value).shape
 
         super(NormalMixture, self).__init__(w, Normal.dist(mu, sd=sd, shape=distshape),
diff --git a/pymc3/stats.py b/pymc3/stats.py
@@ -11,7 +11,7 @@
 import pymc3 as pm
 from pymc3.theanof import floatX
 
-from scipy.misc import logsumexp
+from scipy.special import logsumexp
 from scipy.stats import dirichlet
 from scipy.optimize import minimize
 from scipy.signal import fftconvolve
diff --git a/pymc3/tests/test_mixture.py b/pymc3/tests/test_mixture.py
@@ -2,9 +2,12 @@
 from numpy.testing import assert_allclose
 
 from .helpers import SeededTest
-from pymc3 import Dirichlet, Gamma, Metropolis, Mixture, Model, Normal, NormalMixture, Poisson, sample
+from pymc3 import Dirichlet, Gamma, Normal, Lognormal, Poisson, Exponential, \
+    Mixture, NormalMixture, MvNormal, sample, Metropolis, Model
+import scipy.stats as st
+from scipy.special import logsumexp
 from pymc3.theanof import floatX
-
+import theano
 
 # Generate data
 def generate_normal_mixture_data(w, mu, sd, size=1000):
@@ -104,3 +107,105 @@ def test_mixture_list_of_poissons(self):
         assert_allclose(np.sort(trace['mu'].mean(axis=0)),
                         np.sort(self.pois_mu),
                         rtol=0.1, atol=0.1)
+
+    def test_mixture_of_mvn(self):
+        mu1 = np.asarray([0., 1.])
+        cov1 = np.diag([1.5, 2.5])
+        mu2 = np.asarray([1., 0.])
+        cov2 = np.diag([2.5, 3.5])
+        obs = np.asarray([[.5, .5], mu1, mu2])
+        with Model() as model:
+            w = Dirichlet('w', floatX(np.ones(2)), transform=None)
+            mvncomp1 = MvNormal.dist(mu=mu1, cov=cov1)
+            mvncomp2 = MvNormal.dist(mu=mu2, cov=cov2)
+            y = Mixture('x_obs', w, [mvncomp1, mvncomp2],
+                    observed=obs)
+
+        # check logp of each component
+        complogp_st = np.vstack((st.multivariate_normal.logpdf(obs, mu1, cov1),
+                                 st.multivariate_normal.logpdf(obs, mu2, cov2))
+                                ).T
+        complogp = y.distribution._comp_logp(theano.shared(obs)).eval()
+        assert_allclose(complogp, complogp_st)
+
+        # check logp of mixture
+        testpoint = model.test_point
+        mixlogp_st = logsumexp(np.log(testpoint['w']) + complogp_st,
+                               axis=-1, keepdims=True)
+        assert_allclose(y.logp_elemwise(testpoint),
+                        mixlogp_st)
+
+        # check logp of model
+        priorlogp = st.dirichlet.logpdf(x=testpoint['w'],
+                                        alpha=np.ones(2),
+                                        )
+        assert_allclose(model.logp(testpoint),
+                        mixlogp_st.sum() + priorlogp)
+
+    def test_mixture_of_mixture(self):
+        nbr = 4
+        with Model() as model:
+            # mixtures components
+            g_comp = Normal.dist(
+                mu=Exponential('mu_g', lam=1.0, shape=nbr, transform=None),
+                sd=1,
+                shape=nbr)
+            l_comp = Lognormal.dist(
+                mu=Exponential('mu_l', lam=1.0, shape=nbr, transform=None),
+                sd=1,
+                shape=nbr)
+            # weight vector for the mixtures
+            g_w = Dirichlet('g_w', a=floatX(np.ones(nbr)*0.0000001), transform=None)
+            l_w = Dirichlet('l_w', a=floatX(np.ones(nbr)*0.0000001), transform=None)
+            # mixture components
+            g_mix = Mixture.dist(w=g_w, comp_dists=g_comp)
+            l_mix = Mixture.dist(w=l_w, comp_dists=l_comp)
+            # mixture of mixtures
+            mix_w = Dirichlet('mix_w', a=floatX(np.ones(2)), transform=None)
+            mix = Mixture('mix', w=mix_w,
+                          comp_dists=[g_mix, l_mix],
+                          observed=np.exp(self.norm_x))
+
+        test_point = model.test_point
+
+        def mixmixlogp(value, point):
+            priorlogp = st.dirichlet.logpdf(x=point['g_w'],
+                                            alpha=np.ones(nbr)*0.0000001,
+                                            ) + \
+                        st.expon.logpdf(x=point['mu_g']).sum() + \
+                        st.dirichlet.logpdf(x=point['l_w'],
+                                            alpha=np.ones(nbr)*0.0000001,
+                                            ) + \
+                        st.expon.logpdf(x=point['mu_l']).sum() + \
+                        st.dirichlet.logpdf(x=point['mix_w'],
+                                            alpha=np.ones(2),
+                                            )
+            complogp1 = st.norm.logpdf(x=value,
+                                       loc=point['mu_g'])
+            mixlogp1 = logsumexp(np.log(point['g_w']) + complogp1,
+                                 axis=-1, keepdims=True)
+            complogp2 = st.lognorm.logpdf(value, 1., 0., np.exp(point['mu_l']))
+            mixlogp2 = logsumexp(np.log(point['l_w']) + complogp2,
+                                 axis=-1, keepdims=True)
+            complogp_mix = np.concatenate((mixlogp1, mixlogp2), axis=1)
+            mixmixlogpg = logsumexp(np.log(point['mix_w']) + complogp_mix,
+                                    axis=-1, keepdims=True)
+            return priorlogp, mixmixlogpg
+
+        value = np.exp(self.norm_x)[:, None]
+        priorlogp, mixmixlogpg = mixmixlogp(value, test_point)
+
+        # check logp of mixture
+        assert_allclose(mixmixlogpg, mix.logp_elemwise(test_point))
+
+        # check model logp
+        assert_allclose(priorlogp + mixmixlogpg.sum(),
+                        model.logp(test_point))
+
+        # check input and check logp again
+        test_point['g_w'] = np.asarray([.1, .1, .2, .6])
+        test_point['mu_g'] = np.exp(np.random.randn(nbr))
+        priorlogp, mixmixlogpg = mixmixlogp(value, test_point)
+        assert_allclose(mixmixlogpg, mix.logp_elemwise(test_point))
+        assert_allclose(priorlogp + mixmixlogpg.sum(),
+                        model.logp(test_point))