Skip to content

Commit 6fe8a53

Browse files
committed
more docstrings
1 parent 5ed1046 commit 6fe8a53

File tree

1 file changed

+225
-24
lines changed

1 file changed

+225
-24
lines changed

pymc3/gp/gp.py

Lines changed: 225 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,8 @@
1616

1717
class Base(object):
1818
"""
19-
Base class
19+
Base class. Can be used as a GP placeholder object in
20+
additive models for GPs that won't be used for prediction.
2021
"""
2122
def __init__(self, mean_func=None, cov_func=None):
2223
# check if not None, args are correct subclasses.
@@ -56,26 +57,52 @@ class Latent(Base):
5657
R"""
5758
The `gp.Latent` class is a direct implementation of a GP. No addiive
5859
noise is assumed. It is called "Latent" because the underlying function
59-
values are treated as latent variables. It has a `prior` method, and a
60-
`conditional` method. Given a mean and covariance function, the
60+
values are treated as latent variables. It has a `prior` method and a
61+
`conditional` method. Given a mean and covariance function the
6162
function $f(x)$ is modeled as,
6263
6364
.. math::
6465
6566
f(x) \sim \mathcal{GP}\left(\mu(x), k(x, x')\right)
6667
67-
Use the `prior` and `conditional` methods to construct random
68+
Use the `prior` and `conditional` methods to actually construct random
6869
variables representing the unknown, or latent, function whose
6970
distribution is the GP prior or GP conditional. This GP implementation
70-
can be used to implement regression with non-normal likelihoods or
71-
classification.
71+
can be used to implement regression on data that is not normally
72+
distributed.
7273
7374
Parameters
7475
----------
7576
cov_func : None, 2D array, or instance of Covariance
76-
The covariance function. Defaults to matrix of zeros.
77+
The covariance function. Defaults to zero.
7778
mean_func : None, instance of Mean
78-
The mean function. Defaults to a vector of ones.
79+
The mean function. Defaults to zero.
80+
81+
Examples
82+
--------
83+
.. code:: python
84+
85+
# A one dimensional column vector of inputs.
86+
X = np.linspace(0, 1, 10)[:, None]
87+
88+
with pm.Model() as model:
89+
# Specify the covariance function.
90+
cov_func = pm.gp.cov.ExpQuad(1, lengthscales=0.1)
91+
92+
# Specify the GP. The default mean function is `Zero`.
93+
gp = pm.gp.Latent(cov_func=cov_func)
94+
95+
# Place a GP prior over the function f.
96+
f = gp.prior("f", n_points=10, X=X)
97+
98+
...
99+
100+
# After fitting or sampling, specify the distribution
101+
# at new points with .conditional
102+
Xnew = np.linspace(-1, 2, 50)[:, None]
103+
104+
with model:
105+
fcond = gp.conditional("fcond", Xnew=Xnew)
79106
80107
Notes
81108
-----
@@ -84,14 +111,6 @@ class Latent(Base):
84111
85112
- For more information on the `prior` and `conditional` methods,
86113
see their docstrings.
87-
88-
Examples
89-
--------
90-
.. code:: python
91-
92-
with pm.Model() as model:
93-
94-
95114
"""
96115

97116
def __init__(self, mean_func=None, cov_func=None):
@@ -183,9 +202,9 @@ def conditional(self, name, Xnew, n_points=None, given=None):
183202
This is the number of points the GP is evaluated over, the
184203
number of rows in `Xnew`.
185204
given : keyword arguments
186-
The `gp.Latent` argument can optionally take as keyword args,
187-
`X`, `f`, and `gp`. See the tutorial on additive GP models in
188-
PyMC3 for more information.
205+
Can optionally take as keyword args, `X`, `f`, and `gp`.
206+
See the tutorial on additive GP models in PyMC3 for more
207+
information.
189208
"""
190209
givens = self._get_given_vals(**given)
191210
mu, cov = self._build_conditional(Xnew, *givens)
@@ -240,6 +259,27 @@ def _build_conditional(self, Xnew, X, f):
240259
return nu2, mu, covT
241260

242261
def conditional(self, name, Xnew, n_points=None):
262+
R"""
263+
Returns the conditional distribution evaluated over new input
264+
locations `Xnew`. Given a set of function values `f` that
265+
the TP prior was over, the conditional distribution over a
266+
set of new points, `f_*` is
267+
268+
.. math::
269+
270+
f^* \mid f, X, X_{\text{new}} \sim \mathcal{TP}\left(\mu(x), k(x, x'), \nu, \right)
271+
272+
Parameters
273+
----------
274+
name : string
275+
Name of the random variable
276+
Xnew : array-like
277+
Function input values.
278+
n_points : int, optional
279+
Required if `Xnew` is a random variable or a Theano object.
280+
This is the number of points the GP is evaluated over, the
281+
number of rows in `Xnew`.
282+
"""
243283
X = self.X
244284
f = self.f
245285
nu2, mu, covT = self._build_conditional(Xnew, X, f)
@@ -259,13 +299,43 @@ def _build_marginal_likelihood(self, X, noise):
259299
Kxx = self.cov_func(X)
260300
Knx = noise(X)
261301
cov = Kxx + Knx
262-
chol = cholesky(stabilize(cov))
263-
return mu, chol
302+
return mu, cov
264303

265304
def marginal_likelihood(self, name, X, y, noise, n_points=None, is_observed=True):
305+
R"""
306+
Returns the marginal likelihood distribution, given the input
307+
locations `X` and the data `y`. This is integral over the product of the GP
308+
prior and a normal likelihood.
309+
310+
.. math::
311+
312+
y \mid X,\theta \sim \int p(y \mid f,\, X,\, \theta) \, p(f \mid X,\, \theta) \, df
313+
314+
Parameters
315+
----------
316+
name : string
317+
Name of the random variable
318+
X : array-like
319+
Function input values. If one-dimensional, must be a column
320+
vector with shape `(n, 1)`.
321+
y : array-like
322+
Data that is the sum of the function with the GP prior and Gaussian
323+
noise. Must have shape `(n, )`.
324+
noise : scalar, Variable, or Covariance
325+
Standard deviation of the Gaussian noise. Can also be a Covariance for
326+
non-white noise.
327+
n_points : int, optional
328+
Required if `X` is a random variable or a Theano object.
329+
This is the number of points the GP is evaluated over, the
330+
number of rows in `X`.
331+
is_observed : bool
332+
Whether to set `y` as an `observed` variable in the `model`.
333+
Default is `True`.
334+
"""
266335
if not isinstance(noise, Covariance):
267336
noise = pm.gp.cov.WhiteNoise(noise)
268-
mu, chol = self._build_marginal_likelihood(X, noise)
337+
mu, cov = self._build_marginal_likelihood(X, noise)
338+
chol = cholesky(stabilize(cov))
269339
self.X = X
270340
self.y = y
271341
self.noise = noise
@@ -314,26 +384,94 @@ def _build_conditional(self, Xnew, X, y, noise, cov_total, mean_total,
314384
return mu, stabilize(cov)
315385

316386
def conditional(self, name, Xnew, pred_noise=False, n_points=None, **given):
387+
R"""
388+
Returns the conditional distribution evaluated over new input
389+
locations `Xnew`. Given a set of function values `f` that
390+
the GP prior was over, the conditional distribution over a
391+
set of new points, `f_*` is
392+
393+
.. math::
394+
395+
f^* \mid f, X, X_{\text{new}} \sim \mathcal{GP}\left(\mu(x), k(x, x')\right)
396+
397+
Parameters
398+
----------
399+
name : string
400+
Name of the random variable
401+
Xnew : array-like
402+
Function input values. If one-dimensional, must be a column
403+
vector with shape `(n, 1)`.
404+
pred_noise : bool
405+
Whether or not observation noise is included in the conditional.
406+
Default is `False`.
407+
n_points : int, optional
408+
Required if `Xnew` is a random variable or a Theano object.
409+
This is the number of points the GP is evaluated over, the
410+
number of rows in `Xnew`.
411+
given : keyword arguments
412+
Can optionally take as keyword args, `X`, `y`, `noise`,
413+
and `gp`. See the tutorial on additive GP models in PyMC3
414+
for more information.
415+
"""
317416
givens = self._get_given_vals(**given)
318417
mu, cov = self._build_conditional(Xnew, *givens, pred_noise, diag=False)
319418
chol = cholesky(cov)
320419
n_points = infer_shape(Xnew, n_points)
321420
return pm.MvNormal(name, mu=mu, chol=chol, shape=n_points)
322421

323-
def predict(self, Xnew, point=None, diag=False, pred_noise=False, **given):
422+
def predict(self, Xnew, point, diag=False, pred_noise=False, **given):
423+
R"""
424+
Return the mean vector and covariance matrix of the conditional
425+
distribution as numpy arrays, given a `point`, such as the MAP
426+
estimate or a sample from a `trace`.
427+
428+
Parameters
429+
----------
430+
Xnew : array-like
431+
Function input values. If one-dimensional, must be a column
432+
vector with shape `(n, 1)`.
433+
point : pymc3.model.Point
434+
A specific point to condition on.
435+
diag : bool
436+
If `True`, return the diagonal instead of the full covariance
437+
matrix. Default is `False`.
438+
pred_noise : bool
439+
Whether or not observation noise is included in the conditional.
440+
Default is `False`.
441+
given : keyword arguments
442+
Can optionally take the same keyword args as `conditional`.
443+
"""
324444
mu, cov = self.predictt(Xnew, diag, pred_noise, **given)
325445
mu, cov = draw_values([mu, cov], point=point)
326446
return mu, cov
327447

328448
def predictt(self, Xnew, diag=False, pred_noise=False, **given):
449+
R"""
450+
Return the mean vector and covariance matrix of the conditional
451+
distribution as symbolic variables.
452+
453+
Parameters
454+
----------
455+
Xnew : array-like
456+
Function input values. If one-dimensional, must be a column
457+
vector with shape `(n, 1)`.
458+
diag : bool
459+
If `True`, return the diagonal instead of the full covariance
460+
matrix. Default is `False`.
461+
pred_noise : bool
462+
Whether or not observation noise is included in the conditional.
463+
Default is `False`.
464+
given : keyword arguments
465+
Can optionally take the same keyword args as `conditional`.
466+
"""
329467
givens = self._get_given_vals(**given)
330468
mu, cov = self._build_conditional(Xnew, *givens, pred_noise, diag)
331469
return mu, cov
332470

333471

334472
@conditioned_vars(["X", "Xu", "y", "sigma"])
335473
class MarginalSparse(Marginal):
336-
_available_approx = ["FITC", "VFE", "DTC"]
474+
_available_approx = ("FITC", "VFE", "DTC")
337475
""" FITC and VFE sparse approximations
338476
"""
339477
def __init__(self, mean_func=None, cov_func=None, approx="FITC"):
@@ -381,6 +519,37 @@ def _build_marginal_likelihood_logp(self, X, Xu, y, sigma):
381519
return -1.0 * (constant + logdet + quadratic + trace)
382520

383521
def marginal_likelihood(self, name, X, Xu, y, sigma, n_points=None, is_observed=True):
522+
R"""
523+
Returns the approximate marginal likelihood distribution, given the input
524+
locations `X` and the data `y`. This is integral over the product of the GP
525+
prior and a normal likelihood.
526+
527+
.. math::
528+
529+
y \mid X,\theta \sim \int p(y \mid f,\, X,\, \theta) \, p(f \mid X,\, \theta) \, df
530+
531+
Parameters
532+
----------
533+
name : string
534+
Name of the random variable
535+
X : array-like
536+
Function input values. If one-dimensional, must be a column
537+
vector with shape `(n, 1)`.
538+
Xu: array-like
539+
The inducing points. Must have the same number of columns as `X`.
540+
y : array-like
541+
Data that is the sum of the function with the GP prior and Gaussian
542+
noise. Must have shape `(n, )`.
543+
sigma : scalar, Variable
544+
Standard deviation of the Gaussian noise.
545+
n_points : int, optional
546+
Required if `X` is a random variable or a Theano object.
547+
This is the number of points the GP is evaluated over, the
548+
number of rows in `X`.
549+
is_observed : bool
550+
Whether to set `y` as an `observed` variable in the `model`.
551+
Default is `True`.
552+
"""
384553
self.X = X
385554
self.Xu = Xu
386555
self.y = y
@@ -440,4 +609,36 @@ def _get_given_vals(self, **given):
440609
X, Xu, y, sigma = self.X, self.y, self.sigma
441610
return X, y, sigma, cov_total, mean_total
442611

612+
def conditional(self, name, Xnew, pred_noise=False, n_points=None, **given):
613+
R"""
614+
Returns the conditional distribution evaluated over new input
615+
locations `Xnew`. Given a set of function values `f` that
616+
the GP prior was over, the conditional distribution over a
617+
set of new points, `f_*` is
618+
619+
.. math::
620+
621+
f^* \mid f, X, X_{\text{new}} \sim \mathcal{GP}\left(\mu(x), k(x, x')\right)
622+
623+
Parameters
624+
----------
625+
name : string
626+
Name of the random variable
627+
Xnew : array-like
628+
Function input values. If one-dimensional, must be a column
629+
vector with shape `(n, 1)`.
630+
pred_noise : bool
631+
Whether or not observation noise is included in the conditional.
632+
Default is `False`.
633+
n_points : int, optional
634+
Required if `Xnew` is a random variable or a Theano object.
635+
This is the number of points the GP is evaluated over, the
636+
number of rows in `Xnew`.
637+
given : keyword arguments
638+
Can optionally take as keyword args, `X`, `Xu`, `y`, `sigma`,
639+
and `gp`. See the tutorial on additive GP models in PyMC3
640+
for more information.
641+
"""
642+
rv = super(MarginalSparse, self).conditional(name, Xnew, pred_noise, n_points, **given)
643+
return rv
443644

0 commit comments

Comments
 (0)