Skip to content

Commit 5f40d1b

Browse files
authored
Gaussian Mixture Models (#285)
1 parent 35581d4 commit 5f40d1b

File tree

8 files changed

+353
-10
lines changed

8 files changed

+353
-10
lines changed

CHANGELOG.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
11
Master Branch
22
=============
33

4+
ADDED:
5+
* Gaussian Mixture Model: `GaussianMixture`.
6+
* Tutorial for how to use `scikit-learn` mixture models to fit a model, and
7+
`chaospy` to generate quasi-random samples and orthogonal polynomials.
48
CHANGED:
59
* `chaospy.Trunc` updated to take both `lower` and `upper` at the same time.
610

chaospy/distributions/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@
7373
from .operators import *
7474
from .constructor import construct
7575
from .approximation import *
76-
from .kernel import GaussianKDE
76+
from .kernel import GaussianKDE, GaussianMixture
7777

7878
from . import (
7979
baseclass, sampler, approximation,
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
"""Kernel density estimation."""
22
from .gaussian import GaussianKDE
3+
from .mixture import GaussianMixture

chaospy/distributions/kernel/baseclass.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -78,8 +78,8 @@ def __init__(
7878
dependency_type="accumulate", length=len(samples),
7979
)
8080

81-
self.samples = samples
82-
self.h_mat = covariance
81+
self._samples = samples
82+
self._covariance = covariance
8383
self._permute = numpy.eye(len(rotation), dtype=int)[rotation]
8484
self._pcovariance = numpy.matmul(numpy.matmul(
8585
self._permute, covariance), self._permute.T)
@@ -107,9 +107,9 @@ def get_parameters(self, idx, cache, assert_numerical=True):
107107

108108
def _pdf(self, x_loc, idx, dim, cache):
109109
"""Kernel density function."""
110-
s, t = numpy.mgrid[:x_loc.shape[-1], :self.samples.shape[-1]]
110+
s, t = numpy.mgrid[:x_loc.shape[-1], :self._samples.shape[-1]]
111111
if not dim:
112-
samples = self.samples[idx, t]
112+
samples = self._samples[idx, t]
113113
z_loc = ((x_loc[s]-samples)*self._fwd_transform[:, 0, 0])
114114
self._zloc = z_loc[:, :, numpy.newaxis]
115115
kernel = self._kernel(self._zloc)/self._inv_transform[:, 0, 0]
@@ -122,7 +122,7 @@ def _pdf(self, x_loc, idx, dim, cache):
122122
x_loc = [self._get_cache(dim_, cache, get=0)
123123
for dim_ in self._rotation[:dim]] + [x_loc]
124124
x_loc = numpy.dstack([x[s] for x in x_loc])
125-
samples = numpy.dstack([self.samples[dim_, t]
125+
samples = numpy.dstack([self._samples[dim_, t]
126126
for dim_ in self._rotation[:dim+1]])
127127
zloc = numpy.sum((x_loc-samples)*self._fwd_transform[:, dim, :dim+1], -1)
128128
self._zloc = numpy.dstack([self._zloc[:, :, :dim], zloc])
@@ -138,9 +138,9 @@ def _pdf(self, x_loc, idx, dim, cache):
138138

139139
def _cdf(self, x_loc, idx, dim, cache):
140140
"""Forward mapping."""
141-
s, t = numpy.mgrid[:x_loc.shape[-1], :self.samples.shape[-1]]
141+
s, t = numpy.mgrid[:x_loc.shape[-1], :self._samples.shape[-1]]
142142
if not dim:
143-
z_loc = (x_loc[s]-self.samples[idx, t])*self._fwd_transform[:, 0, 0]
143+
z_loc = (x_loc[s]-self._samples[idx, t])*self._fwd_transform[:, 0, 0]
144144
self._zloc = z_loc[:, :, numpy.newaxis]
145145
out = numpy.sum(self._ikernel(z_loc)*self.weights, axis=-1)
146146
assert out.shape == x_loc.shape, (out.shape, x_loc.shape)
@@ -150,7 +150,7 @@ def _cdf(self, x_loc, idx, dim, cache):
150150
for dim_ in self._rotation[:dim]] + [x_loc]
151151
x_loc = numpy.dstack([x[s] for x in x_loc])
152152

153-
samples = numpy.dstack([self.samples[dim_, t]
153+
samples = numpy.dstack([self._samples[dim_, t]
154154
for dim_ in self._rotation[:dim+1]])
155155
zloc = numpy.sum((x_loc-samples)*self._fwd_transform[:, dim, :dim+1], -1)
156156
self._zloc = numpy.dstack([self._zloc[:, :, :dim], zloc])
@@ -161,7 +161,7 @@ def _cdf(self, x_loc, idx, dim, cache):
161161

162162
def _ppf(self, u_loc, idx, dim, cache):
163163
"""Inverse mapping."""
164-
xloc0 = numpy.quantile(self.samples[idx], u_loc)
164+
xloc0 = numpy.quantile(self._samples[idx], u_loc)
165165
out = chaospy.approximate_inverse(
166166
self, idx, u_loc, xloc0=xloc0, cache=cache, iterations=1000)
167167
return out

chaospy/distributions/kernel/gaussian.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,14 @@ class GaussianKDE(KernelDensityBaseclass):
4646
4747
"""
4848

49+
@property
50+
def samples(self):
51+
return self._samples
52+
53+
@property
54+
def h_mat(self):
55+
return self._covariance
56+
4957
@staticmethod
5058
def _kernel(z_loc):
5159
"""The underlying density kernel."""
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
"""Gaussian Mixture Model."""
2+
import numpy
3+
4+
from .gaussian import GaussianKDE
5+
6+
7+
class GaussianMixture(GaussianKDE):
8+
"""
9+
Gaussian Mixture Model.
10+
11+
A Gaussian mixture model is a probabilistic model that assumes all the data
12+
points are generated from a mixture of a finite number of Gaussian
13+
distributions with unknown parameters. One can think of mixture models as
14+
generalizing K-means clustering to incorporate information about the
15+
covariance structure of the data as well as the centers of the latent
16+
Gaussians.
17+
18+
Attributes:
19+
means:
20+
Sequence of means.
21+
covariances:
22+
Sequence of covariance matrices.
23+
weights:
24+
How much each sample is weighted. Either a scalar when the samples
25+
are equally weighted, or a vector with the same length as the
26+
number of mixed models.
27+
28+
Examples:
29+
>>> means = ([0, 1], [1, 0])
30+
>>> covariances = ([[1, 0], [0, 1]], [[1, 0.5], [0.5, 1]])
31+
>>> distribution = GaussianMixture(means, covariances)
32+
>>> uloc = [[0, 0, 1, 1], [0, 1, 0, 1]]
33+
>>> distribution.pdf(uloc).round(4)
34+
array([0.0954, 0.092 , 0.1212, 0.0954])
35+
>>> distribution.fwd(uloc).round(4)
36+
array([[0.3293, 0.3293, 0.6707, 0.6707],
37+
[0.3699, 0.6731, 0.3711, 0.734 ]])
38+
>>> distribution.inv(uloc).round(4)
39+
array([[-8.9681, -8.9681, 8.0521, 8.0521],
40+
[-9.862 , 10.1977, -9.5929, 10.2982]])
41+
>>> distribution.mom([(0, 1, 1), (1, 0, 1)]).round(4)
42+
array([0.5 , 0.5 , 0.25])
43+
44+
"""
45+
46+
@property
47+
def means(self):
48+
return self._samples.T
49+
50+
@property
51+
def covariances(self):
52+
return numpy.swapaxes(self._covariance, 0, 2)
53+
54+
def __init__(self, means, covariances, weights=None, rotation=None):
55+
"""
56+
Args:
57+
means (numpy.ndarray):
58+
Sequence of mean values. With shape `(n_components, n_dim)`.
59+
covariances (numpy.ndarray):
60+
Sequence of covariance matrices.
61+
With shape `(n_components, n_dim, n_dim)`.
62+
weights (Optional[numpy.ndarray]):
63+
Weights of the samples. This must have the shape
64+
`(n_components,)`. If omitted, each sample is assumed to be
65+
equally weighted.
66+
67+
"""
68+
means = numpy.atleast_2d(numpy.transpose(means))
69+
n, m = means.shape
70+
71+
covariances = numpy.asfarray(covariances)
72+
if covariances.ndim in (1, 2):
73+
covariances = numpy.broadcast_to(covariances.T, (n, n, m))
74+
else:
75+
covariances = numpy.swapaxes(covariances, 0, 2)
76+
assert covariances.shape == (n, n, m)
77+
super(GaussianMixture, self).__init__(
78+
samples=means, h_mat=covariances, weights=weights, rotation=rotation)

docs/tutorials/README.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ Tutorials
1313
advanced/scikitlearn_regression
1414
advanced/intrusive_galerkin
1515
advanced/kernel_density_estimation
16+
advanced/gaussian_mixture_model
1617
polynomial/expansion_construction
1718
polynomial/polynomial_evaluation
1819
polynomial/wiener_askey_scheme
@@ -76,10 +77,14 @@ Advanced Topics
7677
function of a random variable in a non-parametric way. It works for both
7778
uni-variate and multi-variate data. It includes automatic bandwidth
7879
determination.
80+
`Gaussian Mixture Model`_
81+
A Gaussian mixture model is a probabilistic model constructed from a mixture
82+
of a finite number of Gaussian distributions.
7983

8084
.. _Scikit-Learn Regression: ./advanced/scikitlearn_regression.ipynb
8185
.. _Intrusive Galerkin: ./advanced/intrusive_galerkin.ipynb
8286
.. _Kernel Density Estimation: ./advanced/kernel_density_estimation.ipynb
87+
.. _Gaussian Mixture Model: ./advanced/gaussian_mixture_model.ipynb
8388

8489
Polynomial Behavior
8590
-------------------

0 commit comments

Comments
 (0)