From 77cc6d219bdd5572cc503deab26a5c334383c04a Mon Sep 17 00:00:00 2001 From: Max Balandat Date: Thu, 25 Oct 2018 05:05:33 -0700 Subject: [PATCH 01/12] [WIP] Heteroskedastic Likelihood This is a proof of concept of how heteroskedastic likelihoods may work. --- gpytorch/functions/__init__.py | 9 +-- gpytorch/lazy/interpolated_lazy_tensor.py | 10 ++-- gpytorch/lazy/lazy_evaluated_kernel_tensor.py | 12 ++-- gpytorch/lazy/lazy_tensor.py | 10 ++-- gpytorch/likelihoods/__init__.py | 16 +++--- gpytorch/likelihoods/gaussian_likelihood.py | 55 +++++++++++-------- gpytorch/likelihoods/homoskedastic_noise.py | 26 +++++++++ .../mlls/exact_marginal_log_likelihood.py | 9 +-- gpytorch/models/__init__.py | 20 ++++--- gpytorch/models/exact_gp.py | 2 + 10 files changed, 106 insertions(+), 63 deletions(-) create mode 100644 gpytorch/likelihoods/homoskedastic_noise.py diff --git a/gpytorch/functions/__init__.py b/gpytorch/functions/__init__.py index d4a4a75e5..a07612db4 100644 --- a/gpytorch/functions/__init__.py +++ b/gpytorch/functions/__init__.py @@ -64,7 +64,7 @@ def dsmm(sparse_mat, dense_mat): return DSMM(sparse_mat)(dense_mat) -def exact_predictive_mean(full_covar, full_mean, train_labels, num_train, likelihood, precomputed_cache=None): +def exact_predictive_mean(full_covar, full_mean, train_inputs, train_labels, num_train, likelihood, precomputed_cache=None): """ Computes the posterior predictive mean of a GP @@ -72,6 +72,7 @@ def exact_predictive_mean(full_covar, full_mean, train_labels, num_train, likeli - full_covar ( (n+t) x (n+t) ) - the block prior covariance matrix of training and testing points - [ K_XX, K_XX*; K_X*X, K_X*X* ] - full_mean (n + t) - the training and test prior means, stacked on top of each other + - train_inputs TODO - train_labels (n) - the training labels minus the training prior mean - noise (1) - the observed noise (from the likelihood) - precomputed_cache - speeds up subsequent computations (default: None) @@ -86,10 +87,10 @@ def exact_predictive_mean(full_covar, full_mean, train_labels, num_train, likeli from ..lazy.non_lazy_tensor import NonLazyTensor full_covar = NonLazyTensor(full_covar) - return full_covar.exact_predictive_mean(full_mean, train_labels, num_train, likelihood, precomputed_cache) + return full_covar.exact_predictive_mean(full_mean, train_inputs, train_labels, num_train, likelihood, precomputed_cache) -def exact_predictive_covar(full_covar, num_train, likelihood, precomputed_cache=None): +def exact_predictive_covar(full_covar, train_inputs, num_train, likelihood, precomputed_cache=None): """ Computes the posterior predictive covariance of a GP @@ -110,7 +111,7 @@ def exact_predictive_covar(full_covar, num_train, likelihood, precomputed_cache= from ..lazy.non_lazy_tensor import NonLazyTensor full_covar = NonLazyTensor(full_covar) - return full_covar.exact_predictive_covar(num_train, likelihood, precomputed_cache) + return full_covar.exact_predictive_covar(train_inputs, num_train, likelihood, precomputed_cache) def log_normal_cdf(x): diff --git a/gpytorch/lazy/interpolated_lazy_tensor.py b/gpytorch/lazy/interpolated_lazy_tensor.py index 679e00cf6..c12747304 100644 --- a/gpytorch/lazy/interpolated_lazy_tensor.py +++ b/gpytorch/lazy/interpolated_lazy_tensor.py @@ -365,7 +365,7 @@ def diag(self): res = res.view(batch_size, n_data, -1).sum(-1) return res - def exact_predictive_mean(self, full_mean, train_labels, num_train, likelihood, precomputed_cache=None): + def exact_predictive_mean(self, full_mean, train_inputs, train_labels, num_train, likelihood, precomputed_cache=None): from ..distributions import MultivariateNormal if precomputed_cache is None: @@ -383,7 +383,7 @@ def exact_predictive_mean(self, full_mean, train_labels, num_train, likelihood, train_mean = full_mean.narrow(-1, 0, train_train_covar.size(-1)) - mvn = likelihood(MultivariateNormal(train_mean, train_train_covar)) + mvn = likelihood(MultivariateNormal(train_mean, train_train_covar), train_inputs) train_mean, train_train_covar = mvn.mean, mvn.lazy_covariance_matrix train_train_covar_inv_labels = train_train_covar.inv_matmul((train_labels - train_mean).unsqueeze(-1)) @@ -423,11 +423,11 @@ def _exact_predictive_covar_inv_quad_form_root(self, precomputed_cache, test_tra res = left_interp(test_interp_indices, test_interp_values, precomputed_cache) return res - def exact_predictive_covar(self, num_train, likelihood, precomputed_cache=None): + def exact_predictive_covar(self, train_inputs, num_train, likelihood, precomputed_cache=None): from ..distributions import MultivariateNormal if not beta_features.fast_pred_var.on() and not beta_features.fast_pred_samples.on(): - return super(InterpolatedLazyTensor, self).exact_predictive_covar(num_train, likelihood, precomputed_cache) + return super(InterpolatedLazyTensor, self).exact_predictive_covar(train_inputs, num_train, likelihood, precomputed_cache) n_test = self.size(-2) - num_train train_interp_indices = self.left_interp_indices.narrow(-2, 0, num_train) @@ -453,7 +453,7 @@ def exact_predictive_covar(self, num_train, likelihood, precomputed_cache=None): ) grv = MultivariateNormal(torch.zeros(1), train_train_covar) - train_train_covar = likelihood(grv).lazy_covariance_matrix + train_train_covar = likelihood(grv, train_inputs).lazy_covariance_matrix # Get probe vectors for inverse root num_probe_vectors = beta_features.fast_pred_var.num_probe_vectors() diff --git a/gpytorch/lazy/lazy_evaluated_kernel_tensor.py b/gpytorch/lazy/lazy_evaluated_kernel_tensor.py index 4acd4c647..b1043c869 100644 --- a/gpytorch/lazy/lazy_evaluated_kernel_tensor.py +++ b/gpytorch/lazy/lazy_evaluated_kernel_tensor.py @@ -168,22 +168,22 @@ def representation_tree(self): def evaluate(self): return self.evaluate_kernel().evaluate() - def exact_predictive_mean(self, full_mean, train_labels, num_train, likelihood, precomputed_cache=None): + def exact_predictive_mean(self, full_mean, train_inputs, train_labels, num_train, likelihood, precomputed_cache=None): if self.kernel.has_custom_exact_predictions: return self.evaluate_kernel().exact_predictive_mean( - full_mean, train_labels, num_train, likelihood, precomputed_cache + full_mean, train_inputs, train_labels, num_train, likelihood, precomputed_cache ) else: return super(LazyEvaluatedKernelTensor, self).exact_predictive_mean( - full_mean, train_labels, num_train, likelihood, precomputed_cache + full_mean, train_inputs, train_labels, num_train, likelihood, precomputed_cache ) - def exact_predictive_covar(self, num_train, likelihood, precomputed_cache=None): + def exact_predictive_covar(self, train_inputs, num_train, likelihood, precomputed_cache=None): if self.kernel.has_custom_exact_predictions: - return self.evaluate_kernel().exact_predictive_covar(num_train, likelihood, precomputed_cache) + return self.evaluate_kernel().exact_predictive_covar(train_inputs, num_train, likelihood, precomputed_cache) else: return super(LazyEvaluatedKernelTensor, self).exact_predictive_covar( - num_train, likelihood, precomputed_cache + train_inputs, num_train, likelihood, precomputed_cache ) def repeat(self, *sizes): diff --git a/gpytorch/lazy/lazy_tensor.py b/gpytorch/lazy/lazy_tensor.py index 1312cff23..4cc2c95e7 100644 --- a/gpytorch/lazy/lazy_tensor.py +++ b/gpytorch/lazy/lazy_tensor.py @@ -428,7 +428,7 @@ def evaluate_kernel(self): """ return self.representation_tree()(*self.representation()) - def exact_predictive_mean(self, full_mean, train_labels, num_train, likelihood, precomputed_cache=None): + def exact_predictive_mean(self, full_mean, train_inputs, train_labels, num_train, likelihood, precomputed_cache=None): """ Computes the posterior predictive covariance of a GP Assumes that self is the block prior covariance matrix of training and testing points @@ -436,6 +436,7 @@ def exact_predictive_mean(self, full_mean, train_labels, num_train, likelihood, Args: full_mean (:obj:`torch.tensor`): the training and test prior means, stacked on top of each other + train_inputs TODO train_labels (:obj:`torch.tensor`): the training labels minus the training prior mean noise (:obj:`torch.tensor`): the observed noise (from the likelihood) precomputed_cache (optional): speeds up subsequent computations (default: None) @@ -453,7 +454,7 @@ def exact_predictive_mean(self, full_mean, train_labels, num_train, likelihood, train_train_covar = self[:num_train, :num_train] train_mean = full_mean.narrow(-1, 0, train_train_covar.size(-1)) - mvn = likelihood(MultivariateNormal(train_mean, train_train_covar)) + mvn = likelihood(MultivariateNormal(train_mean, train_train_covar), train_inputs) train_mean, train_train_covar = mvn.mean, mvn.lazy_covariance_matrix train_labels_offset = train_labels - train_mean @@ -472,13 +473,14 @@ def exact_predictive_mean(self, full_mean, train_labels, num_train, likelihood, res = res + test_mean return res, precomputed_cache.detach() - def exact_predictive_covar(self, num_train, likelihood, precomputed_cache=None): + def exact_predictive_covar(self, train_inputs, num_train, likelihood, precomputed_cache=None): """ Computes the posterior predictive covariance of a GP Assumes that self is the block prior covariance matrix of training and testing points [ K_XX, K_XX*; K_X*X, K_X*X* ] Args: + train_inputs TODO - CAN GET RID OF num_train arg here as well! num_train (int): The number of training points in the full covariance matrix noise (scalar): The observed noise (from the likelihood) precomputed_cache (optional): speeds up subsequent computations (default: None) @@ -498,7 +500,7 @@ def exact_predictive_covar(self, num_train, likelihood, precomputed_cache=None): test_train_covar = self[num_train:, :num_train] test_test_covar = self[num_train:, num_train:] - train_train_covar = likelihood(MultivariateNormal(torch.zeros(1), train_train_covar)).lazy_covariance_matrix + train_train_covar = likelihood(MultivariateNormal(torch.zeros(1), train_train_covar), train_inputs).lazy_covariance_matrix if not beta_features.fast_pred_var.on(): from .matmul_lazy_tensor import MatmulLazyTensor diff --git a/gpytorch/likelihoods/__init__.py b/gpytorch/likelihoods/__init__.py index 4e6f39d5c..9f46edbf8 100644 --- a/gpytorch/likelihoods/__init__.py +++ b/gpytorch/likelihoods/__init__.py @@ -1,18 +1,18 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals +from __future__ import absolute_import, division, print_function, unicode_literals +from .bernoulli_likelihood import BernoulliLikelihood +from .gaussian_likelihood import GaussianLikelihood, HeteroskedasticGaussianLikelihood, HomoskedasticGaussianLikelihood from .likelihood import Likelihood -from .gaussian_likelihood import GaussianLikelihood from .multitask_gaussian_likelihood import MultitaskGaussianLikelihood -from .bernoulli_likelihood import BernoulliLikelihood from .softmax_likelihood import SoftmaxLikelihood + __all__ = [ - "Likelihood", + "BernoulliLikelihood", "GaussianLikelihood", + "HeteroskedasticGaussianLikelihood", + "HomoskedasticGaussianLikelihood", + "Likelihood", "MultitaskGaussianLikelihood", - "BernoulliLikelihood", "SoftmaxLikelihood", ] diff --git a/gpytorch/likelihoods/gaussian_likelihood.py b/gpytorch/likelihoods/gaussian_likelihood.py index 882bce72a..107a555ce 100644 --- a/gpytorch/likelihoods/gaussian_likelihood.py +++ b/gpytorch/likelihoods/gaussian_likelihood.py @@ -1,42 +1,34 @@ from __future__ import absolute_import, division, print_function, unicode_literals import math -import torch -from ..distributions import MultivariateNormal -from ..functions import add_diag -from ..likelihoods import Likelihood + from .. import settings +from ..distributions import MultivariateNormal +from ..lazy import DiagLazyTensor +from .homoskedastic_noise import HomoskedasticNoise +from .likelihood import Likelihood class GaussianLikelihood(Likelihood): - r""" - """ + def __init__(self, noise_covar): + Likelihood.__init__(self) + self.noise_covar = noise_covar - def __init__(self, log_noise_prior=None, batch_size=1): - super(GaussianLikelihood, self).__init__() - self.register_parameter( - name="log_noise", parameter=torch.nn.Parameter(torch.zeros(batch_size, 1)), prior=log_noise_prior - ) - - @property - def noise(self): - return self.log_noise.exp() - - def forward(self, input): + def forward(self, input, *params): if not isinstance(input, MultivariateNormal): raise ValueError("GaussianLikelihood requires a MultivariateNormal input") mean, covar = input.mean, input.lazy_covariance_matrix - noise = self.noise - if covar.ndimension() == 2: - if settings.debug.on() and noise.size(0) > 1: - raise RuntimeError("With batch_size > 1, expected a batched MultivariateNormal distribution.") - noise = noise.squeeze(0) + return input.__class__(mean, covar + self.noise_covar(*params)) - return input.__class__(mean, add_diag(covar, noise)) + +class HomoskedasticGaussianLikelihood(GaussianLikelihood): + def __init__(self, log_noise_prior=None, batch_size=1): + noise_covar = HomoskedasticNoise(log_noise_prior=log_noise_prior, batch_size=1) + super(HomoskedasticGaussianLikelihood, self).__init__(noise_covar) def variational_log_probability(self, input, target): mean, variance = input.mean, input.variance - log_noise = self.log_noise + log_noise = self.noise_covar.log_noise if variance.ndimension() == 1: if settings.debug.on() and log_noise.size(0) > 1: raise RuntimeError("With batch_size > 1, expected a batched MultivariateNormal distribution.") @@ -45,3 +37,18 @@ def variational_log_probability(self, input, target): res = -0.5 * ((target - mean) ** 2 + variance) / log_noise.exp() res += -0.5 * log_noise - 0.5 * math.log(2 * math.pi) return res.sum(0) + + +class HeteroskedasticGaussianLikelihood(GaussianLikelihood): + def __init__(self, log_noise_model): + Likelihood.__init__(self) + self.log_noise_model = log_noise_model + + def forward(self, input, *params): + if not isinstance(input, MultivariateNormal): + raise ValueError("HeteroskedasticGaussianLikelihood requires a MultivariateNormal input") + mean, covar = input.mean, input.lazy_covariance_matrix + # TODO: This is inefficient, fix it! Allow for non-diagonal outputs + log_noise_covar = self.log_noise_model(*params).diag() + noise_covar = DiagLazyTensor(log_noise_covar.exp()) + return input.__class__(mean, covar + noise_covar) diff --git a/gpytorch/likelihoods/homoskedastic_noise.py b/gpytorch/likelihoods/homoskedastic_noise.py new file mode 100644 index 000000000..e559235ae --- /dev/null +++ b/gpytorch/likelihoods/homoskedastic_noise.py @@ -0,0 +1,26 @@ +from __future__ import absolute_import, division, print_function, unicode_literals + +import torch +from torch.nn import Parameter + +from ..lazy import DiagLazyTensor +from ..module import Module + + +class HomoskedasticNoise(Module): + def __init__(self, log_noise_prior=None, batch_size=1): + super(HomoskedasticNoise, self).__init__() + self.register_parameter( + name="log_noise", parameter=Parameter(torch.zeros(batch_size, 1)), prior=log_noise_prior + ) + + def forward(self, params): + noise = self.log_noise.exp() + if isinstance(params, list): + variance_shape = params[0].shape[:-2] + params[0].shape[-1:] + else: + variance_shape = params.shape[:-2] + params.shape[-1:] + if len(variance_shape) == 1: + noise = noise.squeeze(0) + variances = noise * torch.ones(*variance_shape, dtype=noise.dtype, device=noise.device) + return DiagLazyTensor(variances) diff --git a/gpytorch/mlls/exact_marginal_log_likelihood.py b/gpytorch/mlls/exact_marginal_log_likelihood.py index db67dbc59..58b1a6abb 100644 --- a/gpytorch/mlls/exact_marginal_log_likelihood.py +++ b/gpytorch/mlls/exact_marginal_log_likelihood.py @@ -23,12 +23,12 @@ def __init__(self, likelihood, model): raise RuntimeError("Likelihood must be Gaussian for exact inference") super(ExactMarginalLogLikelihood, self).__init__(likelihood, model) - def forward(self, output, target): + def forward(self, output, target, *params): if not isinstance(output, MultivariateNormal): raise RuntimeError("ExactMarginalLogLikelihood can only operate on Gaussian random variables") # Get the log prob of the marginal distribution - output = self.likelihood(output) + output = self.likelihood(output, *params) res = output.log_prob(target) # Add terms for SGPR / when inducing points are learned @@ -36,8 +36,9 @@ def forward(self, output, target): for variational_strategy in self.model.variational_strategies(): if isinstance(variational_strategy, MVNVariationalStrategy): trace_diff = trace_diff.add(variational_strategy.trace_diff()) - trace_diff = trace_diff / self.likelihood.log_noise.exp() - res = res.add(0.5, trace_diff) + if hasattr(self.likelihood, "log_noise"): + trace_diff = trace_diff / self.likelihood.log_noise.exp() + res = res.add(0.5, trace_diff) # Add log probs of priors on the parameters for _, param, prior in self.named_parameter_priors(): diff --git a/gpytorch/models/__init__.py b/gpytorch/models/__init__.py index 43e3024bc..b89915277 100644 --- a/gpytorch/models/__init__.py +++ b/gpytorch/models/__init__.py @@ -1,12 +1,16 @@ -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from __future__ import unicode_literals +from __future__ import absolute_import, division, print_function, unicode_literals -from .gp import GP +from .additive_grid_inducing_variational_gp import AdditiveGridInducingVariationalGP from .exact_gp import ExactGP -from .variational_gp import VariationalGP +from .gp import GP from .grid_inducing_variational_gp import GridInducingVariationalGP -from .additive_grid_inducing_variational_gp import AdditiveGridInducingVariationalGP +from .variational_gp import VariationalGP + -__all__ = ["GP", "ExactGP", "VariationalGP", "GridInducingVariationalGP", "AdditiveGridInducingVariationalGP"] +__all__ = [ + "AdditiveGridInducingVariationalGP", + "ExactGP", + "GP", + "VariationalGP", + "GridInducingVariationalGP", +] diff --git a/gpytorch/models/exact_gp.py b/gpytorch/models/exact_gp.py index b082e1956..b8e3d418b 100644 --- a/gpytorch/models/exact_gp.py +++ b/gpytorch/models/exact_gp.py @@ -148,6 +148,7 @@ def __call__(self, *args, **kwargs): predictive_mean, mean_cache = exact_predictive_mean( full_covar=full_covar, full_mean=full_mean, + train_inputs=train_inputs, train_labels=train_targets, num_train=num_train, likelihood=self.likelihood, @@ -155,6 +156,7 @@ def __call__(self, *args, **kwargs): ) predictive_covar, covar_cache = exact_predictive_covar( full_covar=full_covar, + train_inputs=train_inputs, num_train=num_train, likelihood=self.likelihood, precomputed_cache=self.covar_cache, From 199db3617508c0512e3626ff20a0fcd257dae8d2 Mon Sep 17 00:00:00 2001 From: Max Balandat Date: Mon, 29 Oct 2018 07:37:22 -0700 Subject: [PATCH 02/12] DeprecationWarning for GaussianLikelihood, add MT version --- gpytorch/lazy/diag_lazy_tensor.py | 6 + gpytorch/likelihoods/__init__.py | 5 +- gpytorch/likelihoods/gaussian_likelihood.py | 55 ++++---- .../multitask_gaussian_likelihood.py | 133 ++++++++++++++++-- gpytorch/likelihoods/noise_models.py | 39 +++++ 5 files changed, 202 insertions(+), 36 deletions(-) create mode 100644 gpytorch/likelihoods/noise_models.py diff --git a/gpytorch/lazy/diag_lazy_tensor.py b/gpytorch/lazy/diag_lazy_tensor.py index ac843d722..951d26288 100644 --- a/gpytorch/lazy/diag_lazy_tensor.py +++ b/gpytorch/lazy/diag_lazy_tensor.py @@ -64,6 +64,12 @@ def evaluate(self): else: return super(DiagLazyTensor, self).evaluate() + def exp(self): + return DiagLazyTensor(self._diag.exp()) + + def sqrt(self): + return DiagLazyTensor(self._diag.sqrt()) + def zero_mean_mvn_samples(self, num_samples): if self.ndimension() == 3: base_samples = torch.randn( diff --git a/gpytorch/likelihoods/__init__.py b/gpytorch/likelihoods/__init__.py index 9f46edbf8..cf58d381c 100644 --- a/gpytorch/likelihoods/__init__.py +++ b/gpytorch/likelihoods/__init__.py @@ -1,16 +1,17 @@ from __future__ import absolute_import, division, print_function, unicode_literals from .bernoulli_likelihood import BernoulliLikelihood -from .gaussian_likelihood import GaussianLikelihood, HeteroskedasticGaussianLikelihood, HomoskedasticGaussianLikelihood +from .gaussian_likelihood import GaussianLikelihood, HomoskedasticGaussianLikelihood from .likelihood import Likelihood from .multitask_gaussian_likelihood import MultitaskGaussianLikelihood +from .noise_models import HeteroskedasticNoise from .softmax_likelihood import SoftmaxLikelihood __all__ = [ "BernoulliLikelihood", "GaussianLikelihood", - "HeteroskedasticGaussianLikelihood", + "HeteroskedasticNoise", "HomoskedasticGaussianLikelihood", "Likelihood", "MultitaskGaussianLikelihood", diff --git a/gpytorch/likelihoods/gaussian_likelihood.py b/gpytorch/likelihoods/gaussian_likelihood.py index 107a555ce..fa294dd74 100644 --- a/gpytorch/likelihoods/gaussian_likelihood.py +++ b/gpytorch/likelihoods/gaussian_likelihood.py @@ -1,34 +1,56 @@ from __future__ import absolute_import, division, print_function, unicode_literals import math +import warnings +import logging from .. import settings from ..distributions import MultivariateNormal -from ..lazy import DiagLazyTensor -from .homoskedastic_noise import HomoskedasticNoise +from ..lazy import AddedDiagLazyTensor, DiagLazyTensor from .likelihood import Likelihood +from .noise_models import HomoskedasticNoise + +DEPRECATION_WARNING = "'GaussianLikelihood' was renamed to 'HomoskedasticGaussianLikelihood'" class GaussianLikelihood(Likelihood): - def __init__(self, noise_covar): - Likelihood.__init__(self) - self.noise_covar = noise_covar + def __init__(self, *args, **kwargs): + if len(args) + len(kwargs) == 0 or "log_noise_prior" in kwargs or "batch_size" in kwargs: + warnings.warn(DEPRECATION_WARNING, DeprecationWarning) + logging.warning(DEPRECATION_WARNING) + self.__init__(log_noise_covar=HomoskedasticNoise(*args, **kwargs)) + self._is_homoskedastic = True + else: + super(GaussianLikelihood, self).__init__() + self.log_noise_covar = args[0] if len(kwargs) == 0 else kwargs["log_noise_covar"] def forward(self, input, *params): if not isinstance(input, MultivariateNormal): raise ValueError("GaussianLikelihood requires a MultivariateNormal input") mean, covar = input.mean, input.lazy_covariance_matrix - return input.__class__(mean, covar + self.noise_covar(*params)) + log_noise_covar = self.log_noise_covar(*params) + if isinstance(log_noise_covar, DiagLazyTensor): + full_covar = AddedDiagLazyTensor(covar, log_noise_covar.exp()) + else: + # TODO: Deal with non-diagonal noise covariance models + full_covar = covar + log_noise_covar.exp() + return input.__class__(mean, full_covar) + + def variational_log_probability(self, input, target): + if hasattr(self, "_is_homoskedastic"): + return HomoskedasticGaussianLikelihood.variational_log_probability(self, input, target) + else: + raise NotImplementedError class HomoskedasticGaussianLikelihood(GaussianLikelihood): def __init__(self, log_noise_prior=None, batch_size=1): - noise_covar = HomoskedasticNoise(log_noise_prior=log_noise_prior, batch_size=1) - super(HomoskedasticGaussianLikelihood, self).__init__(noise_covar) + log_noise_covar = HomoskedasticNoise(log_noise_prior=log_noise_prior, batch_size=1) + super(HomoskedasticGaussianLikelihood, self).__init__(log_noise_covar=log_noise_covar) def variational_log_probability(self, input, target): mean, variance = input.mean, input.variance - log_noise = self.noise_covar.log_noise + log_noise = self.log_noise_covar.log_noise if variance.ndimension() == 1: if settings.debug.on() and log_noise.size(0) > 1: raise RuntimeError("With batch_size > 1, expected a batched MultivariateNormal distribution.") @@ -37,18 +59,3 @@ def variational_log_probability(self, input, target): res = -0.5 * ((target - mean) ** 2 + variance) / log_noise.exp() res += -0.5 * log_noise - 0.5 * math.log(2 * math.pi) return res.sum(0) - - -class HeteroskedasticGaussianLikelihood(GaussianLikelihood): - def __init__(self, log_noise_model): - Likelihood.__init__(self) - self.log_noise_model = log_noise_model - - def forward(self, input, *params): - if not isinstance(input, MultivariateNormal): - raise ValueError("HeteroskedasticGaussianLikelihood requires a MultivariateNormal input") - mean, covar = input.mean, input.lazy_covariance_matrix - # TODO: This is inefficient, fix it! Allow for non-diagonal outputs - log_noise_covar = self.log_noise_model(*params).diag() - noise_covar = DiagLazyTensor(log_noise_covar.exp()) - return input.__class__(mean, covar + noise_covar) diff --git a/gpytorch/likelihoods/multitask_gaussian_likelihood.py b/gpytorch/likelihoods/multitask_gaussian_likelihood.py index 2df6d55ab..57623a1c2 100644 --- a/gpytorch/likelihoods/multitask_gaussian_likelihood.py +++ b/gpytorch/likelihoods/multitask_gaussian_likelihood.py @@ -1,10 +1,21 @@ from __future__ import absolute_import, division, print_function, unicode_literals import torch -from ..functions import add_diag -from ..lazy import DiagLazyTensor, KroneckerProductLazyTensor, RootLazyTensor -from ..likelihoods import GaussianLikelihood + from .. import settings +from ..functions import add_diag +from ..lazy import ( + BlockDiagLazyTensor, + DiagLazyTensor, + KroneckerProductLazyTensor, + MatmulLazyTensor, + NonLazyTensor, + RootLazyTensor, +) +from ..likelihoods import GaussianLikelihood, Likelihood + + +DEPRECATION_WARNING = "'MultitaskGaussianLikelihood' was renamed to 'HomoskedasticMultitaskGaussianLikelihood'" def _eval_covar_matrix(task_noise_covar_factor, log_noise): @@ -14,6 +25,12 @@ def _eval_covar_matrix(task_noise_covar_factor, log_noise): ) +def _eval_corr_matrix(task_noise_corr_factor): + M = task_noise_corr_factor.matmul(task_noise_corr_factor.transpose(-1, -2)) + dsqrtinv = 1 / M.diag().sqrt() + return M * dsqrtinv.unsqueeze(-1).matmul(dsqrtinv.unsqueeze(0)) + + class MultitaskGaussianLikelihood(GaussianLikelihood): """ A convenient extension of the :class:`gpytorch.likelihoods.GaussianLikelihood` to the multitask setting that allows @@ -24,11 +41,13 @@ class MultitaskGaussianLikelihood(GaussianLikelihood): Like the Gaussian likelihood, this object can be used with exact inference. """ - def __init__(self, num_tasks, rank=0, task_prior=None, batch_size=1, log_noise_prior=None): + def __init__(self, num_tasks, log_noise_covar, rank=0, task_correlation_prior=None, batch_size=1): """ Args: num_tasks (int): Number of tasks. + log_noise_covar TODO + rank (int): The rank of the task noise covariance matrix to fit. If `rank` is set to 0, then a diagonal covariance matrix is fit. @@ -36,8 +55,99 @@ def __init__(self, num_tasks, rank=0, task_prior=None, batch_size=1, log_noise_p `rank` > 0, or a prior over the log of just the diagonal elements, if `rank` == 0. """ - super(MultitaskGaussianLikelihood, self).__init__(batch_size=batch_size, log_noise_prior=log_noise_prior) + super(MultitaskGaussianLikelihood, self).__init__(log_noise_covar=log_noise_covar) + if rank != 0: + self.register_parameter( + name="task_noise_corr_factor", parameter=torch.nn.Parameter(torch.randn(batch_size, num_tasks, rank)) + ) + if task_correlation_prior is not None: + self.register_derived_prior( + name="MultitaskErrorCorrelationPrior", + prior=task_correlation_prior, + parameter_names=("task_noise_corr_factor",), + transform=_eval_corr_matrix, + ) + elif task_correlation_prior is not None: + raise ValueError("Can only specify task_correlation_prior if rank>0") + self.num_tasks = num_tasks + def forward(self, input, *params): + """ + Adds the log task noises to the diagonal of the covariance matrix of the supplied + :obj:`gpytorch.distributions.MultivariateNormal` or + :obj:`gpytorch.distributions.MultitaskMultivariateNormal`, in case of + `rank` == 0. Otherwise, adds a rank `rank` covariance matrix to it. + + To accomplish this, we form a new :obj:`gpytorch.lazy.KroneckerProductLazyTensor` between :math:`I_{n}`, + an identity matrix with size equal to the data and a (not necessarily diagonal) matrix containing the task + noises :math:`D_{t}`. + + We also incorporate a shared `log_noise` parameter from the base + :class:`gpytorch.likelihoods.GaussianLikelihood` that we extend. + + The final covariance matrix after this method is then :math:`K + D_{t} \otimes I_{n} + \sigma^{2}I_{nt}`. + + Args: + input (:obj:`gpytorch.distributions.MultitaskMultivariateNormal`): Random variable whose covariance + matrix is a :obj:`gpytorch.lazy.LazyTensor` we intend to augment. + Returns: + :obj:`gpytorch.distributions.MultitaskMultivariateNormal`: A new random variable whose covariance + matrix is a :obj:`gpytorch.lazy.LazyTensor` with :math:`D_{t} \otimes I_{n}` and :math:`\sigma^{2}I_{nt}` + added. + """ + mean, covar = input.mean, input.lazy_covariance_matrix + + if hasattr(self, "task_noise_corr_factor"): + task_noise_corr_factor = self.task_noise_corr_factor + if covar.ndimension() == 2: + if settings.debug.on() and task_noise_corr_factor.size(0) > 1: + raise RuntimeError( + "With batch_size > 1, expected a batched MultitaskMultivariateNormal distribution." + ) + task_noise_corr_factor = task_noise_corr_factor.squeeze(0) + # TODO: This is inefficient, find a better way to do this + task_corr = NonLazyTensor(_eval_corr_matrix(task_noise_corr_factor)) + else: + task_corr = DiagLazyTensor( + torch.ones(covar.shape[:-2] + torch.Size([self.num_tasks]), dtype=covar.dtype, device=covar.device) + ) + + log_noise_covar = self.log_noise_covar(*params) # n x num_tasks + D_sem = log_noise_covar.exp().sqrt() + task_covar_blocks = MatmulLazyTensor(MatmulLazyTensor(D_sem, task_corr.repeat(mean.shape[-2], 1, 1)), D_sem) + task_covar = BlockDiagLazyTensor(task_covar_blocks) + return input.__class__(mean, covar + task_covar) + + def variational_log_probability(self, input, target): + raise NotImplementedError + + +class HomoskedasticMultitaskGaussianLikelihood(MultitaskGaussianLikelihood): + """ + A convenient extension of the :class:`gpytorch.likelihoods.GaussianLikelihood` to the multitask setting that allows + for a full cross-task covariance structure for the noise. The fitted covariance matrix has rank `rank`. + If a strictly diagonal task noise covariance matrix is desired, then rank=0 should be set. (This option still + allows for a different `log_noise` parameter for each task.) + + Like the Gaussian likelihood, this object can be used with exact inference. + """ + + def __init__(self, num_tasks, rank=0, task_prior=None, batch_size=1, log_noise_prior=None): + """ + Args: + num_tasks (int): Number of tasks. + + rank (int): The rank of the task noise covariance matrix to fit. If `rank` is set to 0, + then a diagonal covariance matrix is fit. + + task_prior (:obj:`gpytorch.priors.Prior`): Prior to use over the task noise covariance matrix if + `rank` > 0, or a prior over the log of just the diagonal elements, if `rank` == 0. + + """ + Likelihood.__init__(self) + self.register_parameter( + name="log_noise", parameter=torch.nn.Parameter(torch.zeros(batch_size, 1)), prior=log_noise_prior + ) if rank == 0: self.register_parameter( name="log_task_noises", @@ -102,12 +212,14 @@ def forward(self, input): task_noise_covar_factor = task_noise_covar_factor.squeeze(0) task_var_lt = RootLazyTensor(task_noise_covar_factor) + device = ( + self.log_task_noises.device if hasattr(self, "log_task_noises") else self.task_noise_covar_factor.device + ) + if covar.ndimension() == 2: - eye_lt = DiagLazyTensor(torch.ones(covar.size(-1) // self.num_tasks, device=self.log_noise.device)) + eye_lt = DiagLazyTensor(torch.ones(covar.size(-1) // self.num_tasks, device=device)) else: - eye_lt = DiagLazyTensor( - torch.ones(covar.size(0), covar.size(-1) // self.num_tasks, device=self.log_noise.device) - ) + eye_lt = DiagLazyTensor(torch.ones(covar.size(0), covar.size(-1) // self.num_tasks, device=device)) # Make sure the batch sizes are going to match if task_var_lt.size(0) == 1: task_var_lt = task_var_lt.repeat(eye_lt.size(0), 1, 1) @@ -115,12 +227,13 @@ def forward(self, input): covar_kron_lt = KroneckerProductLazyTensor(task_var_lt, eye_lt) covar = covar + covar_kron_lt - noise = self.noise + noise = self.log_noise.exp() if covar.ndimension() == 2: if settings.debug.on() and noise.size(0) > 1: raise RuntimeError("With batch_size > 1, expected a batched MultitaskMultivariateNormal distribution.") noise = noise.squeeze(0) + # set_trace() covar = add_diag(covar, noise) return input.__class__(mean, covar) diff --git a/gpytorch/likelihoods/noise_models.py b/gpytorch/likelihoods/noise_models.py new file mode 100644 index 000000000..572ddb15f --- /dev/null +++ b/gpytorch/likelihoods/noise_models.py @@ -0,0 +1,39 @@ +from __future__ import absolute_import, division, print_function, unicode_literals + +import torch +from torch.nn import Parameter + +from ..distributions import MultivariateNormal +from ..lazy import DiagLazyTensor +from ..module import Module + + +class HomoskedasticNoise(Module): + def __init__(self, log_noise_prior=None, batch_size=1): + super(HomoskedasticNoise, self).__init__() + self.register_parameter( + name="log_noise", parameter=Parameter(torch.zeros(batch_size, 1)), prior=log_noise_prior + ) + + def forward(self, params): + log_noise = self.log_noise + p = params[0] if isinstance(params, list) else params + var_shape = p.shape[:-2] + p.shape[-1:] + if len(var_shape) == 1: + log_noise = log_noise.squeeze(0) + variances = log_noise * torch.ones(*var_shape, dtype=log_noise.dtype, device=log_noise.device) + return DiagLazyTensor(variances) + + +class HeteroskedasticNoise(Module): + def __init__(self, log_noise_model): + super(HeteroskedasticNoise, self).__init__() + self.log_noise_model = log_noise_model + + def forward(self, params): + output = self.log_noise_model(params[0] if isinstance(params, list) or isinstance(params, tuple) else params) + if not isinstance(output, MultivariateNormal): + raise NotImplementedError("Currently only log-noise models that return a MultivariateNormal are supported") + # note: this also works with MultitaskMultivariateNormal, where this + # will return a batched DiagLazyTensors of size n x num_tasks x num_tasks + return DiagLazyTensor(output.mean) From 529456c17464022d42e1c343bec52f2d4d77e79b Mon Sep 17 00:00:00 2001 From: Max Balandat Date: Mon, 29 Oct 2018 17:51:37 -0700 Subject: [PATCH 03/12] New base class, clean up, fix MTGLikelihood --- gpytorch/functions/__init__.py | 3 +- gpytorch/lazy/diag_lazy_tensor.py | 12 ++-- gpytorch/lazy/lazy_evaluated_kernel_tensor.py | 11 +++ gpytorch/lazy/lazy_tensor.py | 1 + gpytorch/likelihoods/__init__.py | 12 +++- gpytorch/likelihoods/gaussian_likelihood.py | 29 +++----- .../multitask_gaussian_likelihood.py | 72 +++++++++++++------ gpytorch/likelihoods/noise_models.py | 16 +++-- .../mlls/exact_marginal_log_likelihood.py | 4 +- gpytorch/models/exact_gp.py | 6 +- 10 files changed, 106 insertions(+), 60 deletions(-) diff --git a/gpytorch/functions/__init__.py b/gpytorch/functions/__init__.py index 69a079ccc..5728f7faf 100644 --- a/gpytorch/functions/__init__.py +++ b/gpytorch/functions/__init__.py @@ -73,8 +73,8 @@ def exact_predictive_mean(full_covar, full_mean, train_inputs, train_labels, num Args: - full_covar ( (n+t) x (n+t) ) - the block prior covariance matrix of training and testing points [ K_XX, K_XX*; K_X*X, K_X*X* ] - - train_inputs TODO - full_mean (n + t) - the training and test prior means, stacked on top of each other + - train_inputs TODO - train_labels (n) - the training labels minus the training prior mean - noise (1) - the observed noise (from the likelihood) - precomputed_cache - speeds up subsequent computations (default: None) @@ -99,6 +99,7 @@ def exact_predictive_covar(full_covar, train_inputs, num_train, likelihood, prec Args: - full_covar ( (n+t) x (n+t) ) - the block prior covariance matrix of training and testing points [ K_XX, K_XX*; K_X*X, K_X*X* ] + - train_inputs TODO - num_train (int) - how many training points are there in the full covariance matrix - noise (1) - the observed noise (from the likelihood) - precomputed_cache - speeds up subsequent computations (default: None) diff --git a/gpytorch/lazy/diag_lazy_tensor.py b/gpytorch/lazy/diag_lazy_tensor.py index d13e6a0cb..bdf9e0c25 100644 --- a/gpytorch/lazy/diag_lazy_tensor.py +++ b/gpytorch/lazy/diag_lazy_tensor.py @@ -64,12 +64,6 @@ def evaluate(self): else: return super(DiagLazyTensor, self).evaluate() - def exp(self): - return DiagLazyTensor(self._diag.exp()) - - def sqrt(self): - return DiagLazyTensor(self._diag.sqrt()) - def sum_batch(self, sum_batch_size=None): if sum_batch_size is None: diag = self._diag.view(-1, self._diag.size(-1)) @@ -78,6 +72,12 @@ def sum_batch(self, sum_batch_size=None): return self.__class__(diag.sum(-2)) + def exp(self): + return DiagLazyTensor(self._diag.exp()) + + def sqrt(self): + return DiagLazyTensor(self._diag.sqrt()) + def zero_mean_mvn_samples(self, num_samples): if self.ndimension() == 3: base_samples = torch.randn( diff --git a/gpytorch/lazy/lazy_evaluated_kernel_tensor.py b/gpytorch/lazy/lazy_evaluated_kernel_tensor.py index b1043c869..f1e8f1852 100644 --- a/gpytorch/lazy/lazy_evaluated_kernel_tensor.py +++ b/gpytorch/lazy/lazy_evaluated_kernel_tensor.py @@ -47,6 +47,17 @@ def _quad_form_derivative(self, left_vecs, right_vecs): def _transpose_nonbatch(self): return self.__class__(self.kernel, self.x2, self.x1, **self.params) + def _batch_get_indices(self, batch_indices, left_indices, right_indices): + from ..kernels import Kernel + + x1 = self.x1[batch_indices, left_indices, :].unsqueeze(0) + x2 = self.x2[batch_indices, right_indices, :].unsqueeze(0) + res = super(Kernel, self.kernel).__call__(x1.transpose(-1, -2), x2.transpose(-1, -2)) + if isinstance(res, LazyTensor): + res = res.evaluate() + res = res.view(-1) + return res + def _get_indices(self, left_indices, right_indices): from ..kernels import Kernel diff --git a/gpytorch/lazy/lazy_tensor.py b/gpytorch/lazy/lazy_tensor.py index 0ff4609a2..0336a27fb 100644 --- a/gpytorch/lazy/lazy_tensor.py +++ b/gpytorch/lazy/lazy_tensor.py @@ -12,6 +12,7 @@ from .. import beta_features, settings from .lazy_tensor_representation_tree import LazyTensorRepresentationTree +from IPython.core.debugger import set_trace class LazyTensor(object): """ diff --git a/gpytorch/likelihoods/__init__.py b/gpytorch/likelihoods/__init__.py index cf58d381c..febade1e7 100644 --- a/gpytorch/likelihoods/__init__.py +++ b/gpytorch/likelihoods/__init__.py @@ -1,19 +1,25 @@ from __future__ import absolute_import, division, print_function, unicode_literals from .bernoulli_likelihood import BernoulliLikelihood -from .gaussian_likelihood import GaussianLikelihood, HomoskedasticGaussianLikelihood +from .gaussian_likelihood import _GaussianLikelihoodBase, GaussianLikelihood from .likelihood import Likelihood -from .multitask_gaussian_likelihood import MultitaskGaussianLikelihood +from .multitask_gaussian_likelihood import ( + _MultitaskGaussianLikelihoodBase, + MultitaskGaussianLikelihood, + MultitaskGaussianLikelihood_Kronecker, +) from .noise_models import HeteroskedasticNoise from .softmax_likelihood import SoftmaxLikelihood __all__ = [ + "_GaussianLikelihoodBase", + "_MultitaskGaussianLikelihoodBase", "BernoulliLikelihood", "GaussianLikelihood", "HeteroskedasticNoise", - "HomoskedasticGaussianLikelihood", "Likelihood", "MultitaskGaussianLikelihood", + "MultitaskGaussianLikelihood_Kronecker", "SoftmaxLikelihood", ] diff --git a/gpytorch/likelihoods/gaussian_likelihood.py b/gpytorch/likelihoods/gaussian_likelihood.py index 8bb6becbc..c566c8220 100644 --- a/gpytorch/likelihoods/gaussian_likelihood.py +++ b/gpytorch/likelihoods/gaussian_likelihood.py @@ -10,43 +10,32 @@ from .likelihood import Likelihood from .noise_models import HomoskedasticNoise -DEPRECATION_WARNING = "'GaussianLikelihood' was renamed to 'HomoskedasticGaussianLikelihood'" - -class GaussianLikelihood(Likelihood): - def __init__(self, *args, **kwargs): - if len(args) + len(kwargs) == 0 or "log_noise_prior" in kwargs or "batch_size" in kwargs: - warnings.warn(DEPRECATION_WARNING, DeprecationWarning) - logging.warning(DEPRECATION_WARNING) - self.__init__(log_noise_covar=HomoskedasticNoise(*args, **kwargs)) - self._is_homoskedastic = True - else: - super(GaussianLikelihood, self).__init__() - self.log_noise_covar = args[0] if len(kwargs) == 0 else kwargs["log_noise_covar"] +class _GaussianLikelihoodBase(Likelihood): + def __init__(self, log_noise_covar): + super(_GaussianLikelihoodBase, self).__init__() + self.log_noise_covar = log_noise_covar def forward(self, input, *params): if not isinstance(input, MultivariateNormal): - raise ValueError("GaussianLikelihood requires a MultivariateNormal input") + raise ValueError("Gaussian Likelihoods require a MultivariateNormal input") mean, covar = input.mean, input.lazy_covariance_matrix log_noise_covar = self.log_noise_covar(*params) if isinstance(log_noise_covar, DiagLazyTensor): full_covar = AddedDiagLazyTensor(covar, log_noise_covar.exp()) else: - # TODO: Deal with non-diagonal noise covariance models + # TODO: Poperly deal with non-diagonal noise covariance models full_covar = covar + log_noise_covar.exp() return input.__class__(mean, full_covar) def variational_log_probability(self, input, target): - if hasattr(self, "_is_homoskedastic"): - return HomoskedasticGaussianLikelihood.variational_log_probability(self, input, target) - else: - raise NotImplementedError + raise NotImplementedError -class HomoskedasticGaussianLikelihood(GaussianLikelihood): +class GaussianLikelihood(_GaussianLikelihoodBase): def __init__(self, log_noise_prior=None, batch_size=1): log_noise_covar = HomoskedasticNoise(log_noise_prior=log_noise_prior, batch_size=1) - super(HomoskedasticGaussianLikelihood, self).__init__(log_noise_covar=log_noise_covar) + super(GaussianLikelihood, self).__init__(log_noise_covar=log_noise_covar) def variational_log_probability(self, input, target): mean, variance = input.mean, input.variance diff --git a/gpytorch/likelihoods/multitask_gaussian_likelihood.py b/gpytorch/likelihoods/multitask_gaussian_likelihood.py index 57623a1c2..be9a2a53b 100644 --- a/gpytorch/likelihoods/multitask_gaussian_likelihood.py +++ b/gpytorch/likelihoods/multitask_gaussian_likelihood.py @@ -12,10 +12,8 @@ NonLazyTensor, RootLazyTensor, ) -from ..likelihoods import GaussianLikelihood, Likelihood - - -DEPRECATION_WARNING = "'MultitaskGaussianLikelihood' was renamed to 'HomoskedasticMultitaskGaussianLikelihood'" +from ..likelihoods import _GaussianLikelihoodBase, Likelihood +from .noise_models import MultitaskHomoskedasticNoise def _eval_covar_matrix(task_noise_covar_factor, log_noise): @@ -31,7 +29,7 @@ def _eval_corr_matrix(task_noise_corr_factor): return M * dsqrtinv.unsqueeze(-1).matmul(dsqrtinv.unsqueeze(0)) -class MultitaskGaussianLikelihood(GaussianLikelihood): +class _MultitaskGaussianLikelihoodBase(_GaussianLikelihoodBase): """ A convenient extension of the :class:`gpytorch.likelihoods.GaussianLikelihood` to the multitask setting that allows for a full cross-task covariance structure for the noise. The fitted covariance matrix has rank `rank`. @@ -46,16 +44,19 @@ def __init__(self, num_tasks, log_noise_covar, rank=0, task_correlation_prior=No Args: num_tasks (int): Number of tasks. - log_noise_covar TODO + log_noise_covar (:obj:`gpytorch.module.Module`): A model for the log-noise covariance. This can be a + simple homoskedastic model, or a GP that is to be fitted on the observed measurement errors. rank (int): The rank of the task noise covariance matrix to fit. If `rank` is set to 0, then a diagonal covariance matrix is fit. - task_prior (:obj:`gpytorch.priors.Prior`): Prior to use over the task noise covariance matrix if - `rank` > 0, or a prior over the log of just the diagonal elements, if `rank` == 0. + task_correlation_prior (:obj:`gpytorch.priors.Prior`): Prior to use over the task noise correlation matrix. + Only used when `rank` > 0. + + batch_size (int): Number of batches. """ - super(MultitaskGaussianLikelihood, self).__init__(log_noise_covar=log_noise_covar) + super(_MultitaskGaussianLikelihoodBase, self).__init__(log_noise_covar=log_noise_covar) if rank != 0: self.register_parameter( name="task_noise_corr_factor", parameter=torch.nn.Parameter(torch.randn(batch_size, num_tasks, rank)) @@ -96,10 +97,11 @@ def forward(self, input, *params): added. """ mean, covar = input.mean, input.lazy_covariance_matrix + batch_shape, n = covar.shape[:-2], covar.shape[-1] // self.num_tasks if hasattr(self, "task_noise_corr_factor"): task_noise_corr_factor = self.task_noise_corr_factor - if covar.ndimension() == 2: + if len(batch_shape) > 0: if settings.debug.on() and task_noise_corr_factor.size(0) > 1: raise RuntimeError( "With batch_size > 1, expected a batched MultitaskMultivariateNormal distribution." @@ -109,20 +111,54 @@ def forward(self, input, *params): task_corr = NonLazyTensor(_eval_corr_matrix(task_noise_corr_factor)) else: task_corr = DiagLazyTensor( - torch.ones(covar.shape[:-2] + torch.Size([self.num_tasks]), dtype=covar.dtype, device=covar.device) + torch.ones(batch_shape + torch.Size([self.num_tasks]), dtype=covar.dtype, device=covar.device) ) - log_noise_covar = self.log_noise_covar(*params) # n x num_tasks + log_noise_covar = self.log_noise_covar(*params) D_sem = log_noise_covar.exp().sqrt() - task_covar_blocks = MatmulLazyTensor(MatmulLazyTensor(D_sem, task_corr.repeat(mean.shape[-2], 1, 1)), D_sem) + task_covar_blocks = MatmulLazyTensor(MatmulLazyTensor(D_sem, task_corr.repeat(n, 1, 1)), D_sem) task_covar = BlockDiagLazyTensor(task_covar_blocks) return input.__class__(mean, covar + task_covar) def variational_log_probability(self, input, target): - raise NotImplementedError + raise NotImplementedError("Variational inference with Multitask Gaussian likelihood is not yet supported") -class HomoskedasticMultitaskGaussianLikelihood(MultitaskGaussianLikelihood): +class MultitaskGaussianLikelihood(_MultitaskGaussianLikelihoodBase): + """ + A convenient extension of the :class:`gpytorch.likelihoods.GaussianLikelihood` to the multitask setting that allows + for a full cross-task covariance structure for the noise. The fitted covariance matrix has rank `rank`. + If a strictly diagonal task noise covariance matrix is desired, then rank=0 should be set. (This option still + allows for a different `log_noise` parameter for each task.) + + Like the Gaussian likelihood, this object can be used with exact inference. + """ + + def __init__(self, num_tasks, rank=0, task_correlation_prior=None, batch_size=1, log_noise_prior=None): + """ + Args: + num_tasks (int): Number of tasks. + + rank (int): The rank of the task noise covariance matrix to fit. If `rank` is set to 0, + then a diagonal covariance matrix is fit. + + task_correlation_prior (:obj:`gpytorch.priors.Prior`): Prior to use over the task noise correlaton matrix. + Only used when `rank` > 0. + + """ + log_noise_covar = MultitaskHomoskedasticNoise( + num_tasks=num_tasks, log_noise_prior=log_noise_prior, batch_size=1 + ) + super(MultitaskGaussianLikelihood, self).__init__( + num_tasks=num_tasks, + log_noise_covar=log_noise_covar, + rank=rank, + task_correlation_prior=task_correlation_prior, + batch_size=batch_size, + ) + + +class MultitaskGaussianLikelihood_Kronecker(_MultitaskGaussianLikelihoodBase): """ A convenient extension of the :class:`gpytorch.likelihoods.GaussianLikelihood` to the multitask setting that allows for a full cross-task covariance structure for the noise. The fitted covariance matrix has rank `rank`. @@ -167,7 +203,7 @@ def __init__(self, num_tasks, rank=0, task_prior=None, batch_size=1, log_noise_p ) self.num_tasks = num_tasks - def forward(self, input): + def forward(self, input, *params): """ Adds the log task noises to the diagonal of the covariance matrix of the supplied :obj:`gpytorch.distributions.MultivariateNormal` or @@ -233,9 +269,5 @@ def forward(self, input): raise RuntimeError("With batch_size > 1, expected a batched MultitaskMultivariateNormal distribution.") noise = noise.squeeze(0) - # set_trace() covar = add_diag(covar, noise) return input.__class__(mean, covar) - - def variational_log_probability(self, input, target): - raise NotImplementedError("Variational inference with Multitask Gaussian likelihood is not yet supported") diff --git a/gpytorch/likelihoods/noise_models.py b/gpytorch/likelihoods/noise_models.py index 572ddb15f..ed25ccb07 100644 --- a/gpytorch/likelihoods/noise_models.py +++ b/gpytorch/likelihoods/noise_models.py @@ -18,11 +18,17 @@ def __init__(self, log_noise_prior=None, batch_size=1): def forward(self, params): log_noise = self.log_noise p = params[0] if isinstance(params, list) else params - var_shape = p.shape[:-2] + p.shape[-1:] - if len(var_shape) == 1: - log_noise = log_noise.squeeze(0) - variances = log_noise * torch.ones(*var_shape, dtype=log_noise.dtype, device=log_noise.device) - return DiagLazyTensor(variances) + n = p.shape[-2] if len(p.shape) > 1 else p.shape[-1] + log_noise_diag = log_noise.repeat(n, 1) + return DiagLazyTensor(log_noise_diag) + + +class MultitaskHomoskedasticNoise(HomoskedasticNoise): + def __init__(self, num_tasks, log_noise_prior=None, batch_size=1): + super(HomoskedasticNoise, self).__init__() + self.register_parameter( + name="log_noise", parameter=Parameter(torch.zeros(batch_size, num_tasks)), prior=log_noise_prior + ) class HeteroskedasticNoise(Module): diff --git a/gpytorch/mlls/exact_marginal_log_likelihood.py b/gpytorch/mlls/exact_marginal_log_likelihood.py index 523f8c351..051beabf4 100644 --- a/gpytorch/mlls/exact_marginal_log_likelihood.py +++ b/gpytorch/mlls/exact_marginal_log_likelihood.py @@ -5,7 +5,7 @@ import torch from .marginal_log_likelihood import MarginalLogLikelihood -from ..likelihoods import GaussianLikelihood +from ..likelihoods import _GaussianLikelihoodBase from ..distributions import MultivariateNormal @@ -18,7 +18,7 @@ def __init__(self, likelihood, model): - likelihood: (Likelihood) - the likelihood for the model - model: (Module) - the exact GP model """ - if not isinstance(likelihood, GaussianLikelihood): + if not isinstance(likelihood, _GaussianLikelihoodBase): raise RuntimeError("Likelihood must be Gaussian for exact inference") super(ExactMarginalLogLikelihood, self).__init__(likelihood, model) diff --git a/gpytorch/models/exact_gp.py b/gpytorch/models/exact_gp.py index 7e5e6b340..9bb17d7c2 100644 --- a/gpytorch/models/exact_gp.py +++ b/gpytorch/models/exact_gp.py @@ -7,7 +7,7 @@ import torch from ..functions import exact_predictive_mean, exact_predictive_covar from ..distributions import MultivariateNormal, MultitaskMultivariateNormal -from ..likelihoods import GaussianLikelihood +from ..likelihoods import _GaussianLikelihoodBase from .. import settings from .gp import GP @@ -18,8 +18,8 @@ def __init__(self, train_inputs, train_targets, likelihood): train_inputs = (train_inputs,) if train_inputs is not None and not all(torch.is_tensor(train_input) for train_input in train_inputs): raise RuntimeError("Train inputs must be a tensor, or a list/tuple of tensors") - if not isinstance(likelihood, GaussianLikelihood): - raise RuntimeError("ExactGP can only handle GaussianLikelihood") + if not isinstance(likelihood, _GaussianLikelihoodBase): + raise RuntimeError("ExactGP can only handle Gaussian Likelihoods") super(ExactGP, self).__init__() if train_inputs is not None: From ad257fde2693855769287a73eeec53240777c872 Mon Sep 17 00:00:00 2001 From: Max Balandat Date: Mon, 29 Oct 2018 18:07:48 -0700 Subject: [PATCH 04/12] Remove stray IPython debugger import --- gpytorch/lazy/lazy_tensor.py | 1 - 1 file changed, 1 deletion(-) diff --git a/gpytorch/lazy/lazy_tensor.py b/gpytorch/lazy/lazy_tensor.py index 0336a27fb..0ff4609a2 100644 --- a/gpytorch/lazy/lazy_tensor.py +++ b/gpytorch/lazy/lazy_tensor.py @@ -12,7 +12,6 @@ from .. import beta_features, settings from .lazy_tensor_representation_tree import LazyTensorRepresentationTree -from IPython.core.debugger import set_trace class LazyTensor(object): """ From fbc57241b529ce921d664d86685935c4d3219feb Mon Sep 17 00:00:00 2001 From: Max Balandat Date: Mon, 5 Nov 2018 16:09:31 -0800 Subject: [PATCH 05/12] Rename log_noise_covar to noise_covar, small fixes --- gpytorch/likelihoods/__init__.py | 4 +- gpytorch/likelihoods/gaussian_likelihood.py | 21 ++++------ .../multitask_gaussian_likelihood.py | 38 ++++++++++--------- gpytorch/likelihoods/noise_models.py | 22 +++++++---- gpytorch/models/exact_gp.py | 2 +- 5 files changed, 45 insertions(+), 42 deletions(-) diff --git a/gpytorch/likelihoods/__init__.py b/gpytorch/likelihoods/__init__.py index febade1e7..e240101bd 100644 --- a/gpytorch/likelihoods/__init__.py +++ b/gpytorch/likelihoods/__init__.py @@ -6,7 +6,7 @@ from .multitask_gaussian_likelihood import ( _MultitaskGaussianLikelihoodBase, MultitaskGaussianLikelihood, - MultitaskGaussianLikelihood_Kronecker, + MultitaskGaussianLikelihoodKronecker, ) from .noise_models import HeteroskedasticNoise from .softmax_likelihood import SoftmaxLikelihood @@ -20,6 +20,6 @@ "HeteroskedasticNoise", "Likelihood", "MultitaskGaussianLikelihood", - "MultitaskGaussianLikelihood_Kronecker", + "MultitaskGaussianLikelihoodKronecker", "SoftmaxLikelihood", ] diff --git a/gpytorch/likelihoods/gaussian_likelihood.py b/gpytorch/likelihoods/gaussian_likelihood.py index c566c8220..2943deb87 100644 --- a/gpytorch/likelihoods/gaussian_likelihood.py +++ b/gpytorch/likelihoods/gaussian_likelihood.py @@ -1,31 +1,24 @@ from __future__ import absolute_import, division, print_function, unicode_literals import math -import warnings -import logging from .. import settings from ..distributions import MultivariateNormal -from ..lazy import AddedDiagLazyTensor, DiagLazyTensor +from ..functions import add_diag from .likelihood import Likelihood from .noise_models import HomoskedasticNoise class _GaussianLikelihoodBase(Likelihood): - def __init__(self, log_noise_covar): + def __init__(self, noise_covar): super(_GaussianLikelihoodBase, self).__init__() - self.log_noise_covar = log_noise_covar + self.noise_covar = noise_covar def forward(self, input, *params): if not isinstance(input, MultivariateNormal): - raise ValueError("Gaussian Likelihoods require a MultivariateNormal input") + raise ValueError("Gaussian likelihoods require a MultivariateNormal input") mean, covar = input.mean, input.lazy_covariance_matrix - log_noise_covar = self.log_noise_covar(*params) - if isinstance(log_noise_covar, DiagLazyTensor): - full_covar = AddedDiagLazyTensor(covar, log_noise_covar.exp()) - else: - # TODO: Poperly deal with non-diagonal noise covariance models - full_covar = covar + log_noise_covar.exp() + full_covar = covar + self.noise_covar(*params) return input.__class__(mean, full_covar) def variational_log_probability(self, input, target): @@ -34,8 +27,8 @@ def variational_log_probability(self, input, target): class GaussianLikelihood(_GaussianLikelihoodBase): def __init__(self, log_noise_prior=None, batch_size=1): - log_noise_covar = HomoskedasticNoise(log_noise_prior=log_noise_prior, batch_size=1) - super(GaussianLikelihood, self).__init__(log_noise_covar=log_noise_covar) + noise_covar = HomoskedasticNoise(log_noise_prior=log_noise_prior, batch_size=1) + super(GaussianLikelihood, self).__init__(noise_covar=noise_covar) def variational_log_probability(self, input, target): mean, variance = input.mean, input.variance diff --git a/gpytorch/likelihoods/multitask_gaussian_likelihood.py b/gpytorch/likelihoods/multitask_gaussian_likelihood.py index be9a2a53b..a5ebdc735 100644 --- a/gpytorch/likelihoods/multitask_gaussian_likelihood.py +++ b/gpytorch/likelihoods/multitask_gaussian_likelihood.py @@ -23,10 +23,12 @@ def _eval_covar_matrix(task_noise_covar_factor, log_noise): ) -def _eval_corr_matrix(task_noise_corr_factor): - M = task_noise_corr_factor.matmul(task_noise_corr_factor.transpose(-1, -2)) - dsqrtinv = 1 / M.diag().sqrt() - return M * dsqrtinv.unsqueeze(-1).matmul(dsqrtinv.unsqueeze(0)) +def _eval_corr_matrix(corr_factor, corr_diag): + M = corr_factor.matmul(corr_factor.transpose(-1, -2)) + idx = torch.arange(M.shape[-1], dtype=torch.long, device=M.device) + M[..., idx, idx] += corr_diag + sem_inv = 1 / torch.diagonal(M, dim1=-2, dim2=-1).sqrt().unsqueeze(-1) + return M * sem_inv.matmul(sem_inv.transpose(-1, -2)) class _MultitaskGaussianLikelihoodBase(_GaussianLikelihoodBase): @@ -39,12 +41,12 @@ class _MultitaskGaussianLikelihoodBase(_GaussianLikelihoodBase): Like the Gaussian likelihood, this object can be used with exact inference. """ - def __init__(self, num_tasks, log_noise_covar, rank=0, task_correlation_prior=None, batch_size=1): + def __init__(self, num_tasks, noise_covar, rank=0, task_correlation_prior=None, batch_size=1): """ Args: num_tasks (int): Number of tasks. - log_noise_covar (:obj:`gpytorch.module.Module`): A model for the log-noise covariance. This can be a + noise_covar (:obj:`gpytorch.module.Module`): A model for the noise covariance. This can be a simple homoskedastic model, or a GP that is to be fitted on the observed measurement errors. rank (int): The rank of the task noise covariance matrix to fit. If `rank` is set to 0, @@ -56,16 +58,19 @@ def __init__(self, num_tasks, log_noise_covar, rank=0, task_correlation_prior=No batch_size (int): Number of batches. """ - super(_MultitaskGaussianLikelihoodBase, self).__init__(log_noise_covar=log_noise_covar) + super(_MultitaskGaussianLikelihoodBase, self).__init__(noise_covar=noise_covar) if rank != 0: self.register_parameter( name="task_noise_corr_factor", parameter=torch.nn.Parameter(torch.randn(batch_size, num_tasks, rank)) ) + self.register_parameter( + name="task_noise_corr_diag", parameter=torch.nn.Parameter(torch.ones(batch_size, num_tasks)) + ) if task_correlation_prior is not None: self.register_derived_prior( name="MultitaskErrorCorrelationPrior", prior=task_correlation_prior, - parameter_names=("task_noise_corr_factor",), + parameter_names=("task_noise_corr_factor", "task_noise_corr_diag"), transform=_eval_corr_matrix, ) elif task_correlation_prior is not None: @@ -101,21 +106,22 @@ def forward(self, input, *params): if hasattr(self, "task_noise_corr_factor"): task_noise_corr_factor = self.task_noise_corr_factor + task_noise_corr_diag = self.task_noise_corr_diag if len(batch_shape) > 0: if settings.debug.on() and task_noise_corr_factor.size(0) > 1: raise RuntimeError( "With batch_size > 1, expected a batched MultitaskMultivariateNormal distribution." ) task_noise_corr_factor = task_noise_corr_factor.squeeze(0) - # TODO: This is inefficient, find a better way to do this - task_corr = NonLazyTensor(_eval_corr_matrix(task_noise_corr_factor)) + task_noise_corr_diag = task_noise_corr_diag.squeeze(0) + # # TODO: This is inefficient, find a better way to do this + task_corr = NonLazyTensor(_eval_corr_matrix(task_noise_corr_factor, task_noise_corr_diag)) else: task_corr = DiagLazyTensor( torch.ones(batch_shape + torch.Size([self.num_tasks]), dtype=covar.dtype, device=covar.device) ) - log_noise_covar = self.log_noise_covar(*params) - D_sem = log_noise_covar.exp().sqrt() + D_sem = self.noise_covar(*params).sqrt() task_covar_blocks = MatmulLazyTensor(MatmulLazyTensor(D_sem, task_corr.repeat(n, 1, 1)), D_sem) task_covar = BlockDiagLazyTensor(task_covar_blocks) return input.__class__(mean, covar + task_covar) @@ -146,19 +152,17 @@ def __init__(self, num_tasks, rank=0, task_correlation_prior=None, batch_size=1, Only used when `rank` > 0. """ - log_noise_covar = MultitaskHomoskedasticNoise( - num_tasks=num_tasks, log_noise_prior=log_noise_prior, batch_size=1 - ) + noise_covar = MultitaskHomoskedasticNoise(num_tasks=num_tasks, log_noise_prior=log_noise_prior, batch_size=1) super(MultitaskGaussianLikelihood, self).__init__( num_tasks=num_tasks, - log_noise_covar=log_noise_covar, + noise_covar=noise_covar, rank=rank, task_correlation_prior=task_correlation_prior, batch_size=batch_size, ) -class MultitaskGaussianLikelihood_Kronecker(_MultitaskGaussianLikelihoodBase): +class MultitaskGaussianLikelihoodKronecker(_MultitaskGaussianLikelihoodBase): """ A convenient extension of the :class:`gpytorch.likelihoods.GaussianLikelihood` to the multitask setting that allows for a full cross-task covariance structure for the noise. The fitted covariance matrix has rank `rank`. diff --git a/gpytorch/likelihoods/noise_models.py b/gpytorch/likelihoods/noise_models.py index ed25ccb07..e0ac097d8 100644 --- a/gpytorch/likelihoods/noise_models.py +++ b/gpytorch/likelihoods/noise_models.py @@ -16,10 +16,14 @@ def __init__(self, log_noise_prior=None, batch_size=1): ) def forward(self, params): - log_noise = self.log_noise + log_noise = self.log_noise.squeeze(0).exp() p = params[0] if isinstance(params, list) else params - n = p.shape[-2] if len(p.shape) > 1 else p.shape[-1] - log_noise_diag = log_noise.repeat(n, 1) + shape = p.shape if len(p.shape) == 1 else p.shape[:-1] + if log_noise.ndimension() > len(shape): + raise RuntimeError("Must provide batched input if in batch mode") + if log_noise.shape[-1] > 1: # deal with multi-task case + shape = shape + torch.Size([log_noise.shape[-1]]) + log_noise_diag = log_noise.expand(shape) return DiagLazyTensor(log_noise_diag) @@ -32,14 +36,16 @@ def __init__(self, num_tasks, log_noise_prior=None, batch_size=1): class HeteroskedasticNoise(Module): - def __init__(self, log_noise_model): + def __init__(self, noise_model, log_scale=True): super(HeteroskedasticNoise, self).__init__() - self.log_noise_model = log_noise_model + self.noise_model = noise_model + self.log_scale = log_scale def forward(self, params): - output = self.log_noise_model(params[0] if isinstance(params, list) or isinstance(params, tuple) else params) + output = self.noise_model(params[0] if isinstance(params, list) or isinstance(params, tuple) else params) if not isinstance(output, MultivariateNormal): - raise NotImplementedError("Currently only log-noise models that return a MultivariateNormal are supported") + raise NotImplementedError("Currently only noise models that return a MultivariateNormal are supported") # note: this also works with MultitaskMultivariateNormal, where this # will return a batched DiagLazyTensors of size n x num_tasks x num_tasks - return DiagLazyTensor(output.mean) + noise_diag = output.mean.exp() if self.log_scale else output.mean + return DiagLazyTensor(noise_diag) diff --git a/gpytorch/models/exact_gp.py b/gpytorch/models/exact_gp.py index 2c36a58b1..07b23ce13 100644 --- a/gpytorch/models/exact_gp.py +++ b/gpytorch/models/exact_gp.py @@ -19,7 +19,7 @@ def __init__(self, train_inputs, train_targets, likelihood): if train_inputs is not None and not all(torch.is_tensor(train_input) for train_input in train_inputs): raise RuntimeError("Train inputs must be a tensor, or a list/tuple of tensors") if not isinstance(likelihood, _GaussianLikelihoodBase): - raise RuntimeError("ExactGP can only handle Gaussian Likelihoods") + raise RuntimeError("ExactGP can only handle Gaussian likelihoods") super(ExactGP, self).__init__() if train_inputs is not None: From 1454558cb21c575cc433eefc39b86b0f49a98403 Mon Sep 17 00:00:00 2001 From: Max Balandat Date: Mon, 5 Nov 2018 18:47:00 -0800 Subject: [PATCH 06/12] Modify HeteroskedasticNoise to subset output This allows to do things like using a MTGP over Y and log_var_Y to get a noise model where the noise level is correlated with the function values, and then subsetting ot the noise estimate to use that in the likelihood of the primary GP. --- gpytorch/likelihoods/noise_models.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/gpytorch/likelihoods/noise_models.py b/gpytorch/likelihoods/noise_models.py index e0ac097d8..713a6a701 100644 --- a/gpytorch/likelihoods/noise_models.py +++ b/gpytorch/likelihoods/noise_models.py @@ -36,10 +36,11 @@ def __init__(self, num_tasks, log_noise_prior=None, batch_size=1): class HeteroskedasticNoise(Module): - def __init__(self, noise_model, log_scale=True): + def __init__(self, noise_model, log_scale=True, noise_indices=None): super(HeteroskedasticNoise, self).__init__() self.noise_model = noise_model self.log_scale = log_scale + self.noise_indices = noise_indices def forward(self, params): output = self.noise_model(params[0] if isinstance(params, list) or isinstance(params, tuple) else params) @@ -47,5 +48,5 @@ def forward(self, params): raise NotImplementedError("Currently only noise models that return a MultivariateNormal are supported") # note: this also works with MultitaskMultivariateNormal, where this # will return a batched DiagLazyTensors of size n x num_tasks x num_tasks - noise_diag = output.mean.exp() if self.log_scale else output.mean - return DiagLazyTensor(noise_diag) + noise_diag = output.mean if self.noise_indices is None else output.mean[..., self.noise_indices] + return DiagLazyTensor(noise_diag.exp() if self.log_scale else noise_diag) From 7abe05a7678c36077b53b5fa49f9588b90aac60f Mon Sep 17 00:00:00 2001 From: Max Balandat Date: Thu, 8 Nov 2018 15:40:17 -0800 Subject: [PATCH 07/12] Bugfixes --- gpytorch/kernels/__init__.py | 2 -- gpytorch/lazy/diag_lazy_tensor.py | 12 ++++----- gpytorch/likelihoods/gaussian_likelihood.py | 4 +-- gpytorch/likelihoods/homoskedastic_noise.py | 26 ------------------- gpytorch/likelihoods/noise_models.py | 2 +- gpytorch/models/exact_gp.py | 2 +- gpytorch/settings.py | 12 +++++++++ ...t_general_multitask_gaussian_likelihood.py | 13 ++++------ 8 files changed, 26 insertions(+), 47 deletions(-) delete mode 100644 gpytorch/likelihoods/homoskedastic_noise.py diff --git a/gpytorch/kernels/__init__.py b/gpytorch/kernels/__init__.py index c41719629..cbab73747 100644 --- a/gpytorch/kernels/__init__.py +++ b/gpytorch/kernels/__init__.py @@ -19,7 +19,6 @@ from .rbf_kernel import RBFKernel from .scale_kernel import ScaleKernel from .spectral_mixture_kernel import SpectralMixtureKernel -from .white_noise_kernel import WhiteNoiseKernel __all__ = [ "Kernel", @@ -41,5 +40,4 @@ "RBFKernel", "ScaleKernel", "SpectralMixtureKernel", - "WhiteNoiseKernel", ] diff --git a/gpytorch/lazy/diag_lazy_tensor.py b/gpytorch/lazy/diag_lazy_tensor.py index 81448e13d..6dcf3bdfd 100644 --- a/gpytorch/lazy/diag_lazy_tensor.py +++ b/gpytorch/lazy/diag_lazy_tensor.py @@ -82,12 +82,6 @@ def sum_batch(self, sum_batch_size=None): return self.__class__(diag.sum(-2)) - def exp(self): - return DiagLazyTensor(self._diag.exp()) - - def sqrt(self): - return DiagLazyTensor(self._diag.sqrt()) - def inv_matmul(self, tensor): is_vec = False if (self.dim() == 2 and tensor.dim() == 1): @@ -167,6 +161,12 @@ def root_inv_decomposition(self, initial_vectors=None, test_vectors=None): else: return super(DiagLazyTensor, self).root_decomposition() + def exp(self): + return DiagLazyTensor(self._diag.exp()) + + def sqrt(self): + return DiagLazyTensor(self._diag.sqrt()) + def zero_mean_mvn_samples(self, num_samples): if self.ndimension() == 3: base_samples = torch.randn( diff --git a/gpytorch/likelihoods/gaussian_likelihood.py b/gpytorch/likelihoods/gaussian_likelihood.py index 0d34efe2d..2ac9a6380 100644 --- a/gpytorch/likelihoods/gaussian_likelihood.py +++ b/gpytorch/likelihoods/gaussian_likelihood.py @@ -32,12 +32,10 @@ def __init__(self, log_noise_prior=None, batch_size=1): def variational_log_probability(self, input, target): mean, variance = input.mean, input.variance - if mean.dim() > target.dim(): target = target.unsqueeze(-1) - log_noise = self.noise_covar.log_noise - + log_noise = self.log_noise_covar.log_noise if variance.ndimension() == 1: if settings.debug.on() and log_noise.size(0) > 1: raise RuntimeError("With batch_size > 1, expected a batched MultivariateNormal distribution.") diff --git a/gpytorch/likelihoods/homoskedastic_noise.py b/gpytorch/likelihoods/homoskedastic_noise.py deleted file mode 100644 index e559235ae..000000000 --- a/gpytorch/likelihoods/homoskedastic_noise.py +++ /dev/null @@ -1,26 +0,0 @@ -from __future__ import absolute_import, division, print_function, unicode_literals - -import torch -from torch.nn import Parameter - -from ..lazy import DiagLazyTensor -from ..module import Module - - -class HomoskedasticNoise(Module): - def __init__(self, log_noise_prior=None, batch_size=1): - super(HomoskedasticNoise, self).__init__() - self.register_parameter( - name="log_noise", parameter=Parameter(torch.zeros(batch_size, 1)), prior=log_noise_prior - ) - - def forward(self, params): - noise = self.log_noise.exp() - if isinstance(params, list): - variance_shape = params[0].shape[:-2] + params[0].shape[-1:] - else: - variance_shape = params.shape[:-2] + params.shape[-1:] - if len(variance_shape) == 1: - noise = noise.squeeze(0) - variances = noise * torch.ones(*variance_shape, dtype=noise.dtype, device=noise.device) - return DiagLazyTensor(variances) diff --git a/gpytorch/likelihoods/noise_models.py b/gpytorch/likelihoods/noise_models.py index 713a6a701..5c955fef8 100644 --- a/gpytorch/likelihoods/noise_models.py +++ b/gpytorch/likelihoods/noise_models.py @@ -43,7 +43,7 @@ def __init__(self, noise_model, log_scale=True, noise_indices=None): self.noise_indices = noise_indices def forward(self, params): - output = self.noise_model(params[0] if isinstance(params, list) or isinstance(params, tuple) else params) + output = self.noise_model(*params if isinstance(params, list) or isinstance(params, tuple) else params) if not isinstance(output, MultivariateNormal): raise NotImplementedError("Currently only noise models that return a MultivariateNormal are supported") # note: this also works with MultitaskMultivariateNormal, where this diff --git a/gpytorch/models/exact_gp.py b/gpytorch/models/exact_gp.py index 07b23ce13..3d2db8fac 100644 --- a/gpytorch/models/exact_gp.py +++ b/gpytorch/models/exact_gp.py @@ -75,7 +75,7 @@ def __call__(self, *args, **kwargs): "train_inputs, train_targets cannot be None in training mode. " "Call .eval() for prior predictions, or call .set_train_data() to add training data." ) - if settings.debug.on(): + if settings.check_training_data.on(): if not all(torch.equal(train_input, input) for train_input, input in zip(train_inputs, inputs)): raise RuntimeError("You must train on the training inputs!") res = super(ExactGP, self).__call__(*inputs, **kwargs) diff --git a/gpytorch/settings.py b/gpytorch/settings.py index 27063b4ac..9a701302b 100644 --- a/gpytorch/settings.py +++ b/gpytorch/settings.py @@ -50,6 +50,18 @@ def __exit__(self, *args): return False +class check_training_data(_feature_flag): + """ + Check whether the correct training data is supplied in Exact GP training mode + Pros: fewer data checks, fewer warning messages + Cons: possibility of supplying incorrect data, model accidentially in wrong mode + + Note: If using a Heteroskedastic Noise model, this will need to be disabled + """ + + _state = True + + class debug(_feature_flag): """ Whether or not to perform "safety" checks on the supplied data. diff --git a/test/likelihoods/test_general_multitask_gaussian_likelihood.py b/test/likelihoods/test_general_multitask_gaussian_likelihood.py index 7dfdea425..631839b6c 100644 --- a/test/likelihoods/test_general_multitask_gaussian_likelihood.py +++ b/test/likelihoods/test_general_multitask_gaussian_likelihood.py @@ -11,6 +11,7 @@ from gpytorch.likelihoods import MultitaskGaussianLikelihood from gpytorch.means import ConstantMean, MultitaskMean from gpytorch.distributions import MultitaskMultivariateNormal +from gpytorch.likelihoods.multitask_gaussian_likelihood import _eval_corr_matrix # Simple training data: let's try to learn a sine function @@ -73,7 +74,7 @@ def test_multitask_low_rank_noise_covar(self): # Again, note feeding duplicated x_data and indices indicating which task output = model(train_x) # TODO: Fix this view call!! - loss = -mll(output, train_y) + loss = -mll(output, train_y, train_x) loss.backward() optimizer.step() @@ -81,13 +82,9 @@ def test_multitask_low_rank_noise_covar(self): model.eval() likelihood.eval() - num_tasks = 2 - task_noise_covar_factor = likelihood.task_noise_covar_factor - log_noise = likelihood.log_noise - task_noise_covar = task_noise_covar_factor.matmul( - task_noise_covar_factor.transpose(-1, -2) - ) + log_noise.exp() * torch.eye(num_tasks) - + task_corr = _eval_corr_matrix(likelihood.task_noise_corr_factor, likelihood.task_noise_corr_diag) + noise_diag = likelihood.noise_covar.log_noise.squeeze().diag().exp().sqrt() + task_noise_covar = noise_diag.matmul(task_corr).matmul(noise_diag) self.assertGreater(task_noise_covar[0, 0, 1].item(), 0.05) From d44a657ff0c631b98aee2acee4d9c544a0091313 Mon Sep 17 00:00:00 2001 From: balandat Date: Fri, 9 Nov 2018 06:40:33 -0800 Subject: [PATCH 08/12] Some test fixes --- gpytorch/likelihoods/gaussian_likelihood.py | 14 ++++++++++++-- test/examples/test_simple_gp_regression.py | 14 +++++++------- 2 files changed, 19 insertions(+), 9 deletions(-) diff --git a/gpytorch/likelihoods/gaussian_likelihood.py b/gpytorch/likelihoods/gaussian_likelihood.py index 2ac9a6380..0bad96e26 100644 --- a/gpytorch/likelihoods/gaussian_likelihood.py +++ b/gpytorch/likelihoods/gaussian_likelihood.py @@ -18,7 +18,12 @@ def forward(self, input, *params): if not isinstance(input, MultivariateNormal): raise ValueError("Gaussian likelihoods require a MultivariateNormal input") mean, covar = input.mean, input.lazy_covariance_matrix - full_covar = covar + self.noise_covar(*params) + if len(params) > 0: + shape = None + else: + shape = mean.shape if len(mean.shape) == 1 else mean.shape[:-1] + noise_covar = self.noise_covar(*params, shape=shape) + full_covar = covar + noise_covar return input.__class__(mean, full_covar) def variational_log_probability(self, input, target): @@ -35,7 +40,7 @@ def variational_log_probability(self, input, target): if mean.dim() > target.dim(): target = target.unsqueeze(-1) - log_noise = self.log_noise_covar.log_noise + log_noise = self.noise_covar.log_noise if variance.ndimension() == 1: if settings.debug.on() and log_noise.size(0) > 1: raise RuntimeError("With batch_size > 1, expected a batched MultivariateNormal distribution.") @@ -63,3 +68,8 @@ def pyro_sample_y(self, variational_dist_f, y_obs, sample_shape, name_prefix="") pyro.sample(name_prefix + "._training_labels", y_dist.independent(1), obs=y_obs) else: pyro.sample(name_prefix + "._training_labels", y_dist, obs=y_obs) + + @property + def log_noise(self): + # TODO: DeprecationWarningcation + return self.noise_covar.log_noise diff --git a/test/examples/test_simple_gp_regression.py b/test/examples/test_simple_gp_regression.py index 63ce09a2f..bc4708192 100644 --- a/test/examples/test_simple_gp_regression.py +++ b/test/examples/test_simple_gp_regression.py @@ -65,7 +65,7 @@ def test_prior(self): ) gp_model.mean_module.initialize(constant=1.5) gp_model.covar_module.base_kernel.initialize(log_lengthscale=0) - likelihood.initialize(log_noise=0) + likelihood.noise_covar.initialize(log_noise=0) # Compute posterior distribution gp_model.eval() @@ -88,7 +88,7 @@ def test_posterior_latent_gp_and_likelihood_without_optimization(self): log_lengthscale=SmoothedBoxPrior(exp(-10), exp(10), sigma=0.5, log_transform=True) ) gp_model.covar_module.base_kernel.initialize(log_lengthscale=-10) - likelihood.initialize(log_noise=-10) + likelihood.noise_covar.initialize(log_noise=-10) # Compute posterior distribution gp_model.eval() @@ -117,7 +117,7 @@ def test_posterior_latent_gp_and_likelihood_with_optimization(self): mll = gpytorch.ExactMarginalLogLikelihood(likelihood, gp_model) gp_model.covar_module.base_kernel.initialize(log_lengthscale=1) gp_model.mean_module.initialize(constant=0) - likelihood.initialize(log_noise=1) + likelihood.noise_covar.initialize(log_noise=1) # Find optimal model hyperparameters gp_model.train() @@ -160,7 +160,7 @@ def test_posterior_latent_gp_and_likelihood_fast_pred_var(self): mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, gp_model) gp_model.covar_module.base_kernel.initialize(log_lengthscale=1) gp_model.mean_module.initialize(constant=0) - likelihood.initialize(log_noise=1) + likelihood.noise_covar.initialize(log_noise=1) # Find optimal model hyperparameters gp_model.train() @@ -191,10 +191,10 @@ def test_posterior_latent_gp_and_likelihood_fast_pred_var(self): # Now bump up the likelihood to something huge # This will make it easy to calculate the variance - likelihood.log_noise.data.fill_(3) + likelihood.noise_covar.log_noise.data.fill_(3) test_function_predictions = likelihood(gp_model(train_x)) - noise = likelihood.log_noise.exp() + noise = likelihood.noise_covar.log_noise.exp() var_diff = (test_function_predictions.variance - noise).abs() self.assertLess(torch.max(var_diff / noise), 0.05) @@ -210,7 +210,7 @@ def test_posterior_latent_gp_and_likelihood_with_optimization_cuda(self): mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, gp_model) gp_model.covar_module.base_kernel.initialize(log_lengthscale=1) gp_model.mean_module.initialize(constant=0) - likelihood.initialize(log_noise=1) + likelihood.noise_covar.initialize(log_noise=1) # Find optimal model hyperparameters gp_model.train() From b40e136d4ce9a1b87b4d2c24fe7e407ebfc3e47e Mon Sep 17 00:00:00 2001 From: balandat Date: Tue, 13 Nov 2018 09:36:39 -0800 Subject: [PATCH 09/12] Address some (not all) issues with multi-dim batch shapes --- gpytorch/kernels/__init__.py | 2 + .../multitask_gaussian_likelihood.py | 43 +++++++++++++------ gpytorch/likelihoods/noise_models.py | 26 +++++------ 3 files changed, 47 insertions(+), 24 deletions(-) diff --git a/gpytorch/kernels/__init__.py b/gpytorch/kernels/__init__.py index cbab73747..c41719629 100644 --- a/gpytorch/kernels/__init__.py +++ b/gpytorch/kernels/__init__.py @@ -19,6 +19,7 @@ from .rbf_kernel import RBFKernel from .scale_kernel import ScaleKernel from .spectral_mixture_kernel import SpectralMixtureKernel +from .white_noise_kernel import WhiteNoiseKernel __all__ = [ "Kernel", @@ -40,4 +41,5 @@ "RBFKernel", "ScaleKernel", "SpectralMixtureKernel", + "WhiteNoiseKernel", ] diff --git a/gpytorch/likelihoods/multitask_gaussian_likelihood.py b/gpytorch/likelihoods/multitask_gaussian_likelihood.py index a5ebdc735..a3d96ea3e 100644 --- a/gpytorch/likelihoods/multitask_gaussian_likelihood.py +++ b/gpytorch/likelihoods/multitask_gaussian_likelihood.py @@ -18,9 +18,9 @@ def _eval_covar_matrix(task_noise_covar_factor, log_noise): num_tasks = task_noise_covar_factor.size(0) - return task_noise_covar_factor.matmul(task_noise_covar_factor.transpose(-1, -2)) + log_noise.exp() * torch.eye( - num_tasks - ) + base_mat = task_noise_covar_factor.matmul(task_noise_covar_factor.transpose(-1, -2)) + diag = log_noise.exp() * torch.eye(num_tasks) + return base_mat + diag def _eval_corr_matrix(corr_factor, corr_diag): @@ -104,7 +104,18 @@ def forward(self, input, *params): mean, covar = input.mean, input.lazy_covariance_matrix batch_shape, n = covar.shape[:-2], covar.shape[-1] // self.num_tasks + if len(batch_shape) > 1: + raise NotImplementedError("Batch shapes with dim > 1 not yet supported for MulitTask Likelihoods") + + # compute the noise covariance + if len(params) > 0: + shape = None + else: + shape = mean.shape if len(mean.shape) == 1 else mean.shape[:-1] + D_sem = self.noise_covar(*params, shape=shape).sqrt() + if hasattr(self, "task_noise_corr_factor"): + # if rank > 0, compute the task correlation matrix task_noise_corr_factor = self.task_noise_corr_factor task_noise_corr_diag = self.task_noise_corr_diag if len(batch_shape) > 0: @@ -114,16 +125,24 @@ def forward(self, input, *params): ) task_noise_corr_factor = task_noise_corr_factor.squeeze(0) task_noise_corr_diag = task_noise_corr_diag.squeeze(0) - # # TODO: This is inefficient, find a better way to do this - task_corr = NonLazyTensor(_eval_corr_matrix(task_noise_corr_factor, task_noise_corr_diag)) + # TODO: This is inefficient, change repeat so it can repeat LazyTensors w/ multiple batch dimensions + task_corr = _eval_corr_matrix(task_noise_corr_factor, task_noise_corr_diag) + exp_shape = batch_shape + torch.Size([n]) + task_corr.shape[-2:] + if len(batch_shape) == 1: + task_corr = task_corr.unsqueeze(-3) + task_corr_exp = NonLazyTensor(task_corr.expand(exp_shape)) else: - task_corr = DiagLazyTensor( - torch.ones(batch_shape + torch.Size([self.num_tasks]), dtype=covar.dtype, device=covar.device) - ) - - D_sem = self.noise_covar(*params).sqrt() - task_covar_blocks = MatmulLazyTensor(MatmulLazyTensor(D_sem, task_corr.repeat(n, 1, 1)), D_sem) - task_covar = BlockDiagLazyTensor(task_covar_blocks) + # otherwise tasks are uncorrelated + diag_shape = batch_shape + torch.Size([n, self.num_tasks]) + task_corr_exp = DiagLazyTensor(torch.ones(diag_shape, dtype=covar.dtype, device=covar.device)) + + task_covar_blocks = MatmulLazyTensor(MatmulLazyTensor(D_sem, task_corr_exp), D_sem) + if len(batch_shape) == 1: + # TODO: Properly support general batch shapes in BlockDiagLazyTensor (no shape arithmetic) + tcb_eval = task_covar_blocks.evaluate() + task_covar = BlockDiagLazyTensor(NonLazyTensor(tcb_eval.view(-1, *tcb_eval.shape[-2:])), num_blocks=tcb_eval.shape[0]) + else: + task_covar = BlockDiagLazyTensor(task_covar_blocks) return input.__class__(mean, covar + task_covar) def variational_log_probability(self, input, target): diff --git a/gpytorch/likelihoods/noise_models.py b/gpytorch/likelihoods/noise_models.py index 5c955fef8..3fbcbbeab 100644 --- a/gpytorch/likelihoods/noise_models.py +++ b/gpytorch/likelihoods/noise_models.py @@ -15,24 +15,23 @@ def __init__(self, log_noise_prior=None, batch_size=1): name="log_noise", parameter=Parameter(torch.zeros(batch_size, 1)), prior=log_noise_prior ) - def forward(self, params): + def forward(self, *params, shape=None): log_noise = self.log_noise.squeeze(0).exp() - p = params[0] if isinstance(params, list) else params - shape = p.shape if len(p.shape) == 1 else p.shape[:-1] - if log_noise.ndimension() > len(shape): - raise RuntimeError("Must provide batched input if in batch mode") + if shape is None: + p = params[0] if torch.is_tensor(params[0]) else params[0][0] + shape = p.shape if len(p.shape) == 1 else p.shape[:-1] if log_noise.shape[-1] > 1: # deal with multi-task case shape = shape + torch.Size([log_noise.shape[-1]]) - log_noise_diag = log_noise.expand(shape) - return DiagLazyTensor(log_noise_diag) + if log_noise.ndimension() > len(shape): + raise RuntimeError("Must provide batched input if in batch mode") + return DiagLazyTensor(log_noise.expand(shape)) class MultitaskHomoskedasticNoise(HomoskedasticNoise): def __init__(self, num_tasks, log_noise_prior=None, batch_size=1): super(HomoskedasticNoise, self).__init__() - self.register_parameter( - name="log_noise", parameter=Parameter(torch.zeros(batch_size, num_tasks)), prior=log_noise_prior - ) + log_noise = Parameter(torch.zeros(batch_size, num_tasks)) + self.register_parameter(name="log_noise", parameter=log_noise, prior=log_noise_prior) class HeteroskedasticNoise(Module): @@ -42,8 +41,11 @@ def __init__(self, noise_model, log_scale=True, noise_indices=None): self.log_scale = log_scale self.noise_indices = noise_indices - def forward(self, params): - output = self.noise_model(*params if isinstance(params, list) or isinstance(params, tuple) else params) + def forward(self, *params, shape=None): + if len(params) == 1 and not torch.is_tensor(params[0]): + output = self.noise_model(*params[0]) + else: + output = self.noise_model(*params) if not isinstance(output, MultivariateNormal): raise NotImplementedError("Currently only noise models that return a MultivariateNormal are supported") # note: this also works with MultitaskMultivariateNormal, where this From 9af9ab51cee0c212ba1c689ae047558889464bc2 Mon Sep 17 00:00:00 2001 From: balandat Date: Thu, 22 Nov 2018 07:39:57 -0800 Subject: [PATCH 10/12] Various fixes, streamline log_ deprecation --- gpytorch/kernels/__init__.py | 6 +- gpytorch/kernels/white_noise_kernel.py | 3 +- gpytorch/lazy/diag_lazy_tensor.py | 6 - gpytorch/likelihoods/__init__.py | 5 +- gpytorch/likelihoods/gaussian_likelihood.py | 2 + .../multitask_gaussian_likelihood.py | 33 ++++- gpytorch/likelihoods/noise_models.py | 104 +++++++++---- gpytorch/module.py | 139 +++++------------- gpytorch/utils/grid.py | 3 +- gpytorch/utils/log_deprecation.py | 31 ++++ test/examples/test_batch_gp_regression.py | 15 +- test/examples/test_grid_gp_regression.py | 28 +++- ...t_kronecker_multitask_ski_gp_regression.py | 49 +++--- test/examples/test_simple_gp_regression.py | 109 +++++++------- test/examples/test_white_noise_regression.py | 105 ++++++------- ...t_general_multitask_gaussian_likelihood.py | 17 ++- 16 files changed, 348 insertions(+), 307 deletions(-) create mode 100644 gpytorch/utils/log_deprecation.py diff --git a/gpytorch/kernels/__init__.py b/gpytorch/kernels/__init__.py index 0025bd8c8..591786d3e 100644 --- a/gpytorch/kernels/__init__.py +++ b/gpytorch/kernels/__init__.py @@ -1,12 +1,12 @@ #!/usr/bin/env python3 -from .kernel import Kernel, AdditiveKernel, ProductKernel from .additive_structure_kernel import AdditiveStructureKernel from .cosine_kernel import CosineKernel -from .grid_kernel import GridKernel from .grid_interpolation_kernel import GridInterpolationKernel +from .grid_kernel import GridKernel from .index_kernel import IndexKernel from .inducing_point_kernel import InducingPointKernel +from .kernel import AdditiveKernel, Kernel, ProductKernel from .lcm_kernel import LCMKernel from .linear_kernel import LinearKernel from .matern_kernel import MaternKernel @@ -18,6 +18,7 @@ from .spectral_mixture_kernel import SpectralMixtureKernel from .white_noise_kernel import WhiteNoiseKernel + __all__ = [ "Kernel", "AdditiveKernel", @@ -27,7 +28,6 @@ "GridInterpolationKernel", "IndexKernel", "InducingPointKernel", - "InducingPointKernelAddedLossTerm", "LCMKernel", "LinearKernel", "MaternKernel", diff --git a/gpytorch/kernels/white_noise_kernel.py b/gpytorch/kernels/white_noise_kernel.py index 8c453edd9..668382428 100644 --- a/gpytorch/kernels/white_noise_kernel.py +++ b/gpytorch/kernels/white_noise_kernel.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 import torch + from . import Kernel from ..lazy import DiagLazyTensor, ZeroLazyTensor @@ -55,4 +56,4 @@ def forward(self, x1, x2, **params): elif x1.size(-2) == x2.size(-2) and x1.size(-2) == self.variances.size(1) and torch.equal(x1, x2): return DiagLazyTensor(self.variances.view(self.variances.size(0), -1)) else: - return ZeroLazyTensor(x1.size(-3), x1.size(-2), x2.size(-2)) + return ZeroLazyTensor(x1.size(-3), x1.size(-2), x2.size(-2), dtype=x1.dtype, device=x1.device) diff --git a/gpytorch/lazy/diag_lazy_tensor.py b/gpytorch/lazy/diag_lazy_tensor.py index abb5e252a..58dd62ef7 100644 --- a/gpytorch/lazy/diag_lazy_tensor.py +++ b/gpytorch/lazy/diag_lazy_tensor.py @@ -176,12 +176,6 @@ def sum_batch(self, sum_batch_size=None): return DiagLazyTensor(self._diag.view(-1, sum_batch_size, self._diag.shape[-1])) return DiagLazyTensor(self._diag.sum([i for i in range(self.batch_dim)])) - def exp(self): - return DiagLazyTensor(self._diag.exp()) - - def sqrt(self): - return DiagLazyTensor(self._diag.sqrt()) - def zero_mean_mvn_samples(self, num_samples): base_samples = torch.randn(num_samples, *self._diag.shape, dtype=self.dtype, device=self.device) return base_samples * self._diag.sqrt() diff --git a/gpytorch/likelihoods/__init__.py b/gpytorch/likelihoods/__init__.py index 50606a51d..d0634be70 100644 --- a/gpytorch/likelihoods/__init__.py +++ b/gpytorch/likelihoods/__init__.py @@ -1,13 +1,12 @@ - #!/usr/bin/env python3 from .likelihood import Likelihood from .bernoulli_likelihood import BernoulliLikelihood -from .gaussian_likelihood import _GaussianLikelihoodBase, GaussianLikelihood +from .gaussian_likelihood import GaussianLikelihood, _GaussianLikelihoodBase from .multitask_gaussian_likelihood import ( - _MultitaskGaussianLikelihoodBase, MultitaskGaussianLikelihood, MultitaskGaussianLikelihoodKronecker, + _MultitaskGaussianLikelihoodBase, ) from .noise_models import HeteroskedasticNoise from .softmax_likelihood import SoftmaxLikelihood diff --git a/gpytorch/likelihoods/gaussian_likelihood.py b/gpytorch/likelihoods/gaussian_likelihood.py index ea788c846..18dffa30c 100644 --- a/gpytorch/likelihoods/gaussian_likelihood.py +++ b/gpytorch/likelihoods/gaussian_likelihood.py @@ -22,8 +22,10 @@ def forward(self, input, *params): raise ValueError("Gaussian likelihoods require a MultivariateNormal input") mean, covar = input.mean, input.lazy_covariance_matrix if len(params) > 0: + # we can infer the shape from the params shape = None else: + # here shape[:-1] is the batch shape requested, and shape[-1] is `n`, the number of points shape = mean.shape if len(mean.shape) == 1 else mean.shape[:-1] noise_covar = self.noise_covar(*params, shape=shape) full_covar = covar + noise_covar diff --git a/gpytorch/likelihoods/multitask_gaussian_likelihood.py b/gpytorch/likelihoods/multitask_gaussian_likelihood.py index c01f9d521..ab69c1fb4 100644 --- a/gpytorch/likelihoods/multitask_gaussian_likelihood.py +++ b/gpytorch/likelihoods/multitask_gaussian_likelihood.py @@ -107,7 +107,7 @@ def forward(self, input, *params): shape = None else: shape = mean.shape if len(mean.shape) == 1 else mean.shape[:-1] - D_sem = self.noise_covar(*params, shape=shape).sqrt() + noise_covar = self.noise_covar(*params, shape=shape) if hasattr(self, "task_noise_corr_factor"): # if rank > 0, compute the task correlation matrix @@ -117,12 +117,12 @@ def forward(self, input, *params): if len(batch_shape) == 1: task_corr = task_corr.unsqueeze(-3) task_corr_exp = NonLazyTensor(task_corr.expand(exp_shape)) + noise_sem = noise_covar.sqrt() + task_covar_blocks = MatmulLazyTensor(MatmulLazyTensor(noise_sem, task_corr_exp), noise_sem) else: # otherwise tasks are uncorrelated - diag_shape = batch_shape + torch.Size([n, self.num_tasks]) - task_corr_exp = DiagLazyTensor(torch.ones(diag_shape, dtype=covar.dtype, device=covar.device)) + task_covar_blocks = noise_covar - task_covar_blocks = MatmulLazyTensor(MatmulLazyTensor(D_sem, task_corr_exp), D_sem) if len(batch_shape) == 1: # TODO: Properly support general batch shapes in BlockDiagLazyTensor (no shape arithmetic) tcb_eval = task_covar_blocks.evaluate() @@ -186,6 +186,31 @@ def __init__( task_correlation_prior=task_correlation_prior, batch_size=batch_size, ) + self._param_transform = param_transform + self._inv_param_transform = _get_inv_param_transform(param_transform, inv_param_transform) + self.register_parameter(name="raw_noise", parameter=torch.nn.Parameter(torch.zeros(batch_size, 1))) + + @property + def noise(self): + return self._param_transform(self.raw_noise) + + @noise.setter + def noise(self, value): + self._set_noise(value) + + def _set_noise(self, value): + self.initialize(raw_noise=self._inv_param_transform(value)) + + def forward(self, input, *params): + mvn = super().forward(input, *params) + mean, covar = mvn.mean, mvn.lazy_covariance_matrix + noise = self.noise + if covar.ndimension() == 2: + if settings.debug.on() and noise.size(0) > 1: + raise RuntimeError("With batch_size > 1, expected a batched MultitaskMultivariateNormal distribution.") + noise = noise.squeeze(0) + covar = add_diag(covar, noise) + return input.__class__(mean, covar) class MultitaskGaussianLikelihoodKronecker(_MultitaskGaussianLikelihoodBase): diff --git a/gpytorch/likelihoods/noise_models.py b/gpytorch/likelihoods/noise_models.py index 3fbcbbeab..94f034bfe 100644 --- a/gpytorch/likelihoods/noise_models.py +++ b/gpytorch/likelihoods/noise_models.py @@ -1,47 +1,101 @@ -from __future__ import absolute_import, division, print_function, unicode_literals +#!/usr/bin/env python3 import torch from torch.nn import Parameter +from torch.nn.functional import softplus from ..distributions import MultivariateNormal from ..lazy import DiagLazyTensor from ..module import Module +from ..utils.broadcasting import _mul_broadcast_shape +from ..utils.transforms import _get_inv_param_transform -class HomoskedasticNoise(Module): - def __init__(self, log_noise_prior=None, batch_size=1): - super(HomoskedasticNoise, self).__init__() - self.register_parameter( - name="log_noise", parameter=Parameter(torch.zeros(batch_size, 1)), prior=log_noise_prior - ) +class _HomoskedasticNoiseBase(Module): + def __init__(self, noise_prior=None, batch_size=1, param_transform=softplus, inv_param_transform=None, num_tasks=1): + super().__init__() + self._param_transform = param_transform + self._inv_param_transform = _get_inv_param_transform(param_transform, inv_param_transform) + self.register_parameter(name="raw_noise", parameter=Parameter(torch.zeros(batch_size, num_tasks))) + if noise_prior is not None: + self.register_prior("noise_prior", noise_prior, lambda: self.noise, lambda v: self._set_noise(v)) + + @property + def noise(self): + return self._param_transform(self.raw_noise) + + @noise.setter + def noise(self, value): + self._set_noise(value) + + def _set_noise(self, value): + self.initialize(raw_noise=self._inv_param_transform(value)) def forward(self, *params, shape=None): - log_noise = self.log_noise.squeeze(0).exp() + """In the homoskedastic case, the parameters are only used to infer the required shape. + Here are the possible scenarios: + 1) non-batched noise, non-batched input, non-MT -> noise_diag shape is `n` + 2) non-batched noise, non-batched input, MT -> noise_diag shape is `nt` + 3) non-batched noise, batched input, non-MT -> noise_diag shape is `b x n` with b' the broadcasted batch shape + 4) non-batched noise, batched input, MT -> noise_diag shape is `b x nt` + 5) batched noise, non-batched input, non-MT -> noise_diag shape is `b x n` + 6) batched noise, non-batched input, MT -> noise_diag shape is `b x nt` + 7) batched noise, batched input, non-MT -> noise_diag shape is `b' x n` + 8) batched noise, batched input, MT -> noise_diag shape is `b' x nt` + where `n` is the number of evaluation points and `t` is the number of tasks (i.e. `num_tasks` of self.noise). + So bascially we should always have this be `b' x nt`, with `b'` appropriately broadcast from the noise parameter + and input batch shapes. `n` and the input batch shape we get either from shape arg or from the params input. + For this it should be sufficient if we take in a single `shape` arg, where the convention is that shape[:-1] + is the batch shape of the input, and shape[-1] is `n`. + """ if shape is None: p = params[0] if torch.is_tensor(params[0]) else params[0][0] shape = p.shape if len(p.shape) == 1 else p.shape[:-1] - if log_noise.shape[-1] > 1: # deal with multi-task case - shape = shape + torch.Size([log_noise.shape[-1]]) - if log_noise.ndimension() > len(shape): - raise RuntimeError("Must provide batched input if in batch mode") - return DiagLazyTensor(log_noise.expand(shape)) + batch_shape, n = shape[:-1], shape[-1] + noise = self.noise + noise_batch_shape = noise.shape[:-1] if noise.shape[-2] > 1 else torch.Size() + num_tasks = noise.shape[-1] + noise = noise.unsqueeze(-2) + batch_shape = _mul_broadcast_shape(noise_batch_shape, batch_shape) + if len(batch_shape) == 0: + noise = noise.squeeze(0) + noise_diag = noise.expand(batch_shape + torch.Size([n, num_tasks])).contiguous() + if num_tasks == 1: + noise_diag = noise_diag.view(*batch_shape, n) + return DiagLazyTensor(noise_diag) -class MultitaskHomoskedasticNoise(HomoskedasticNoise): - def __init__(self, num_tasks, log_noise_prior=None, batch_size=1): - super(HomoskedasticNoise, self).__init__() - log_noise = Parameter(torch.zeros(batch_size, num_tasks)) - self.register_parameter(name="log_noise", parameter=log_noise, prior=log_noise_prior) +class HomoskedasticNoise(_HomoskedasticNoiseBase): + def __init__(self, noise_prior=None, batch_size=1, param_transform=softplus, inv_param_transform=None): + super().__init__( + noise_prior=noise_prior, + batch_size=batch_size, + param_transform=param_transform, + inv_param_transform=inv_param_transform, + num_tasks=1, + ) + + +class MultitaskHomoskedasticNoise(_HomoskedasticNoiseBase): + def __init__(self, num_tasks, noise_prior=None, batch_size=1, param_transform=softplus, inv_param_transform=None): + super().__init__( + noise_prior=noise_prior, + batch_size=batch_size, + param_transform=param_transform, + inv_param_transform=inv_param_transform, + num_tasks=num_tasks, + ) class HeteroskedasticNoise(Module): - def __init__(self, noise_model, log_scale=True, noise_indices=None): - super(HeteroskedasticNoise, self).__init__() + def __init__(self, noise_model, noise_indices=None, noise_transform=torch.exp): + super().__init__() self.noise_model = noise_model - self.log_scale = log_scale - self.noise_indices = noise_indices + self._noise_transform = noise_transform + self._noise_indices = noise_indices + self._noise_transform = noise_transform - def forward(self, *params, shape=None): + def forward(self, *params, batch_shape=None, shape=None): if len(params) == 1 and not torch.is_tensor(params[0]): output = self.noise_model(*params[0]) else: @@ -50,5 +104,5 @@ def forward(self, *params, shape=None): raise NotImplementedError("Currently only noise models that return a MultivariateNormal are supported") # note: this also works with MultitaskMultivariateNormal, where this # will return a batched DiagLazyTensors of size n x num_tasks x num_tasks - noise_diag = output.mean if self.noise_indices is None else output.mean[..., self.noise_indices] - return DiagLazyTensor(noise_diag.exp() if self.log_scale else noise_diag) + noise_diag = output.mean if self._noise_indices is None else output.mean[..., self._noise_indices] + return DiagLazyTensor(self._noise_transform(noise_diag)) diff --git a/gpytorch/module.py b/gpytorch/module.py index f603e319c..3f0461c36 100644 --- a/gpytorch/module.py +++ b/gpytorch/module.py @@ -1,19 +1,20 @@ #!/usr/bin/env python3 +import itertools +import warnings from collections import OrderedDict import torch from torch import nn from torch.distributions import Distribution -import itertools + from .lazy import LazyTensor from .utils.deprecation import DeprecationError -import warnings class Module(nn.Module): def __init__(self): - super(Module, self).__init__() + super().__init__() self._added_loss_terms = OrderedDict() self._priors = OrderedDict() @@ -69,49 +70,27 @@ def initialize(self, **kwargs): kwargs: (param_name, value) - parameter to initialize Value can take the form of a tensor, a float, or an int """ - from .kernels import ( - CosineKernel, - IndexKernel, - MaternKernel, - PeriodicKernel, - RBFKernel, - ScaleKernel, - SpectralMixtureKernel, - ) - - from .likelihoods import GaussianLikelihood, MultitaskGaussianLikelihood - - modules_with_log_params = [ - CosineKernel, - IndexKernel, - MaternKernel, - PeriodicKernel, - RBFKernel, - ScaleKernel, - SpectralMixtureKernel, - GaussianLikelihood, - MultitaskGaussianLikelihood, - ] + from .utils.log_deprecation import MODULES_WITH_LOG_PARAMS for name, val in kwargs.items(): if isinstance(val, int): val = float(val) - if any([isinstance(self, mod_type) for mod_type in modules_with_log_params]) and 'log_' in name: - base_name = name.split('log_')[1] - name = 'raw_' + base_name + if any(isinstance(self, mod_type) for mod_type in MODULES_WITH_LOG_PARAMS) and "log_" in name: + base_name = name.split("log_")[1] + name = "raw_" + base_name if not torch.is_tensor(val): val = self._inv_param_transform(torch.tensor(val).exp()).item() else: val = self._inv_param_transform(val.exp()) - if name not in self._parameters: + if not hasattr(self, name): raise AttributeError("Unknown parameter {p} for {c}".format(p=name, c=self.__class__.__name__)) if torch.is_tensor(val): self.__getattr__(name).data.copy_(val) elif isinstance(val, float): self.__getattr__(name).data.fill_(val) else: - raise AttributeError("Type {t} not valid to initialize parameter {p}".format(t=type(val), p=name)) + raise AttributeError("Type {t} not valid for initializing parameter {p}".format(t=type(val), p=name)) # Ensure value is contained in support of prior (if present) prior_name = "_".join([name, "prior"]) @@ -178,7 +157,7 @@ def register_parameter(self, name, parameter, prior=None): ) if "_parameters" not in self.__dict__: raise AttributeError("Cannot assign parameter before Module.__init__() call") - super(Module, self).register_parameter(name, parameter) + super().register_parameter(name, parameter) def register_prior(self, name, prior, param_or_closure, setting_closure=None): """ @@ -252,56 +231,25 @@ def variational_parameters(self): def _load_from_state_dict( self, state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs ): - from .kernels import ( - CosineKernel, - IndexKernel, - MaternKernel, - PeriodicKernel, - RBFKernel, - ScaleKernel, - SpectralMixtureKernel, - ) - - from .likelihoods import GaussianLikelihood, MultitaskGaussianLikelihood + from .utils.log_deprecation import LOG_DEPRECATION_MSG, MODULES_WITH_LOG_PARAMS local_name_params = itertools.chain(self._parameters.items(), self._buffers.items()) local_state = {k: v.data for k, v in local_name_params if v is not None} - modules_with_log_params = [ - CosineKernel, - IndexKernel, - MaternKernel, - PeriodicKernel, - RBFKernel, - ScaleKernel, - SpectralMixtureKernel, - GaussianLikelihood, - MultitaskGaussianLikelihood, - ] - - if not any([isinstance(self, mod_type) for mod_type in modules_with_log_params]): - return super(Module, self)._load_from_state_dict( - state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs - ) - - super(Module, self)._load_from_state_dict( + super()._load_from_state_dict( state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs ) + if not any(isinstance(self, mod_type) for mod_type in MODULES_WITH_LOG_PARAMS): + return # Load log space parameters and throw deprecation warnings. for name, param in local_state.items(): - if 'raw_' in name: - base_name = name.split('raw_')[1] + if "raw_" in name: + base_name = name.split("raw_")[1] log_name = "log_" + base_name log_key = prefix + log_name if log_key in state_dict and log_key not in local_state: - warnings.warn( - "The '{log_name}' parameter is deprecated in favor of '{name}' because we no longer ensure " - "positiveness with torch.exp for improved stability reasons and will be removed in a future " - "release. To solve this issue, just save this model " - "again.".format(log_name=log_name, name=name), - DeprecationWarning, - ) + warnings.warn(LOG_DEPRECATION_MSG.format(log_name=log_name, name=name), DeprecationWarning) input_param = state_dict[log_key] if isinstance(input_param, nn.Parameter): input_param = input_param.data @@ -315,42 +263,21 @@ def _load_from_state_dict( unexpected_keys.remove(prefix + log_name) def __getattr__(self, name): - from .kernels import ( - CosineKernel, - IndexKernel, - MaternKernel, - PeriodicKernel, - RBFKernel, - ScaleKernel, - SpectralMixtureKernel, - ) - - from .likelihoods import GaussianLikelihood, MultitaskGaussianLikelihood - - modules_with_log_params = [ - CosineKernel, - IndexKernel, - MaternKernel, - PeriodicKernel, - RBFKernel, - ScaleKernel, - SpectralMixtureKernel, - GaussianLikelihood, - MultitaskGaussianLikelihood, - ] - - if not any([isinstance(self, mod_type) for mod_type in modules_with_log_params]) or 'log_' not in name: - return super(Module, self).__getattr__(name) - else: - base_name = name.split('log_')[1] # e.g. log_lengthscale -> lengthscale - raw_name = 'raw_' + base_name - warnings.warn( - "The '{log_name}' parameter is deprecated in favor of '{name}' because we no longer ensure " - "positiveness with torch.exp for improved stability reasons and will be removed in a future " - "release.".format(log_name=name, name=raw_name), - DeprecationWarning, - ) - return super(Module, self).__getattribute__(base_name).log() # Get real param value and transform to log + try: + return super().__getattr__(name) + except AttributeError as e: + from .utils.log_deprecation import LOG_DEPRECATION_MSG, MODULES_WITH_LOG_PARAMS + + if any(isinstance(self, mod_type) for mod_type in MODULES_WITH_LOG_PARAMS) and "log_" in name: + base_name = name.split("log_")[1] # e.g. log_lengthscale -> lengthscale + raw_name = "raw_" + base_name + warnings.warn(LOG_DEPRECATION_MSG.format(log_name=name, name=raw_name), DeprecationWarning) + return super().__getattribute__(base_name).log() # Get real param value and transform to log + else: + try: + return super().__getattribute__(name) + except AttributeError: + raise e def _extract_named_added_loss_terms(module, memo=None, prefix=""): diff --git a/gpytorch/utils/grid.py b/gpytorch/utils/grid.py index 6c37c788b..e10f374a1 100644 --- a/gpytorch/utils/grid.py +++ b/gpytorch/utils/grid.py @@ -1,6 +1,5 @@ #!/usr/bin/env python3 - import math import torch @@ -48,7 +47,7 @@ def choose_grid_size(train_inputs, ratio=1.0): def create_data_from_grid(grid): grid_size = grid.size(-2) grid_dim = grid.size(-1) - grid_data = torch.zeros(int(pow(grid_size, grid_dim)), grid_dim) + grid_data = torch.zeros(int(pow(grid_size, grid_dim)), grid_dim, device=grid.device) prev_points = None for i in range(grid_dim): for j in range(grid_size): diff --git a/gpytorch/utils/log_deprecation.py b/gpytorch/utils/log_deprecation.py new file mode 100644 index 000000000..b7f786f89 --- /dev/null +++ b/gpytorch/utils/log_deprecation.py @@ -0,0 +1,31 @@ +#!/usr/bin/env python3 + +from ..kernels import ( + CosineKernel, + IndexKernel, + MaternKernel, + PeriodicKernel, + RBFKernel, + ScaleKernel, + SpectralMixtureKernel, +) +from ..likelihoods import GaussianLikelihood, MultitaskGaussianLikelihood + + +MODULES_WITH_LOG_PARAMS = [ + CosineKernel, + IndexKernel, + MaternKernel, + PeriodicKernel, + RBFKernel, + ScaleKernel, + SpectralMixtureKernel, + GaussianLikelihood, + MultitaskGaussianLikelihood, +] + +LOG_DEPRECATION_MSG = ( + "The '{log_name}' parameter is deprecated in favor of '{name}' because we no longer ensure " + "positiveness with torch.exp for improved stability reasons and will be removed in a future " + "release." +) diff --git a/test/examples/test_batch_gp_regression.py b/test/examples/test_batch_gp_regression.py index 804773591..49cf27baf 100644 --- a/test/examples/test_batch_gp_regression.py +++ b/test/examples/test_batch_gp_regression.py @@ -1,16 +1,17 @@ #!/usr/bin/env python3 +import math import os import random -import math -import torch import unittest + import gpytorch -from torch import optim +import torch +from gpytorch.distributions import MultivariateNormal from gpytorch.kernels import RBFKernel, ScaleKernel -from gpytorch.means import ConstantMean from gpytorch.likelihoods import GaussianLikelihood -from gpytorch.distributions import MultivariateNormal +from gpytorch.means import ConstantMean +from torch import optim # Batch training test: Let's learn hyperparameters on a sine dataset, but test on a sine dataset and a cosine dataset @@ -120,7 +121,7 @@ def test_train_on_batch_test_on_batch(self): for _ in range(50): optimizer.zero_grad() output = gp_model(train_x12) - loss = -mll(output, train_y12).sum() + loss = -mll(output, train_y12, train_x12).sum() loss.backward() optimizer.step() @@ -159,7 +160,7 @@ def test_train_on_batch_test_on_batch_shared_hypers_over_batch(self): for _ in range(50): optimizer.zero_grad() output = gp_model(train_x12) - loss = -mll(output, train_y12).sum() + loss = -mll(output, train_y12, train_x12).sum() loss.backward() optimizer.step() diff --git a/test/examples/test_grid_gp_regression.py b/test/examples/test_grid_gp_regression.py index ca8e9c9e6..6e53e1b8d 100644 --- a/test/examples/test_grid_gp_regression.py +++ b/test/examples/test_grid_gp_regression.py @@ -1,11 +1,12 @@ #!/usr/bin/env python3 -import gpytorch -import torch import math -import unittest import os import random +import unittest + +import gpytorch +import torch from torch import optim @@ -52,19 +53,26 @@ def tearDown(self): if hasattr(self, "rng_state"): torch.set_rng_state(self.rng_state) - def test_grid_gp_mean_abs_error(self): + def test_grid_gp_mean_abs_error(self, cuda=False): + device = torch.device("cuda") if cuda else torch.device("cpu") grid_bounds = [(0, 1), (0, 2)] grid_size = 25 - grid = torch.zeros(grid_size, len(grid_bounds)) + grid = torch.zeros(grid_size, len(grid_bounds), device=device) for i in range(len(grid_bounds)): grid_diff = float(grid_bounds[i][1] - grid_bounds[i][0]) / (grid_size - 2) - grid[:, i] = torch.linspace(grid_bounds[i][0] - grid_diff, grid_bounds[i][1] + grid_diff, grid_size) + grid[:, i] = torch.linspace( + grid_bounds[i][0] - grid_diff, grid_bounds[i][1] + grid_diff, grid_size, device=device + ) - train_x, train_y, test_x, test_y = make_data(grid) + train_x, train_y, test_x, test_y = make_data(grid, cuda=cuda) likelihood = gpytorch.likelihoods.GaussianLikelihood() gp_model = GridGPRegressionModel(grid, train_x, train_y, likelihood) mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, gp_model) + if cuda: + gp_model.cuda() + likelihood.cuda() + # Optimize the model gp_model.train() likelihood.train() @@ -72,7 +80,7 @@ def test_grid_gp_mean_abs_error(self): optimizer = optim.Adam(list(gp_model.parameters()) + list(likelihood.parameters()), lr=0.1) optimizer.n_iter = 0 with gpytorch.settings.debug(False): - for _ in range(25): + for _ in range(20): optimizer.zero_grad() output = gp_model(train_x) loss = -mll(output, train_y) @@ -96,6 +104,10 @@ def test_grid_gp_mean_abs_error(self): self.assertLess(mean_abs_error.squeeze().item(), 0.3) + def test_grid_gp_mean_abs_error_cuda(self): + if torch.cuda.is_available(): + self.test_grid_gp_mean_abs_error(cuda=True) + if __name__ == "__main__": unittest.main() diff --git a/test/examples/test_kronecker_multitask_ski_gp_regression.py b/test/examples/test_kronecker_multitask_ski_gp_regression.py index 151c43429..6e5d611e1 100644 --- a/test/examples/test_kronecker_multitask_ski_gp_regression.py +++ b/test/examples/test_kronecker_multitask_ski_gp_regression.py @@ -1,28 +1,16 @@ #!/usr/bin/env python3 -from math import pi - import os import random -import torch import unittest +from math import pi + import gpytorch -from gpytorch.kernels import RBFKernel, MultitaskKernel, GridInterpolationKernel -from gpytorch.means import ConstantMean, MultitaskMean -from gpytorch.likelihoods import MultitaskGaussianLikelihood +import torch from gpytorch.distributions import MultitaskMultivariateNormal - - -# Simple training data: let's try to learn a sine function -train_x = torch.linspace(0, 1, 100) - -# y1 function is sin(2*pi*x) with noise N(0, 0.04) -train_y1 = torch.sin(train_x * (2 * pi)) + torch.randn(train_x.size()) * 0.1 -# y2 function is cos(2*pi*x) with noise N(0, 0.04) -train_y2 = torch.cos(train_x * (2 * pi)) + torch.randn(train_x.size()) * 0.1 - -# Create a train_y which interleaves the two -train_y = torch.stack([train_y1, train_y2], -1) +from gpytorch.kernels import GridInterpolationKernel, MultitaskKernel, RBFKernel +from gpytorch.likelihoods import MultitaskGaussianLikelihood +from gpytorch.means import ConstantMean, MultitaskMean class MultitaskGPModel(gpytorch.models.ExactGP): @@ -51,9 +39,25 @@ def tearDown(self): if hasattr(self, "rng_state"): torch.set_rng_state(self.rng_state) - def test_multitask_gp_mean_abs_error(self): + def _get_data(self, cuda=False): + # Simple training data: let's try to learn a sine function + train_x = torch.linspace(0, 1, 100, device=torch.device("cuda") if cuda else torch.device("cpu")) + # y1 function is sin(2*pi*x) with noise N(0, 0.04) + train_y1 = torch.sin(train_x * (2 * pi)) + torch.randn_like(train_x) * 0.1 + # y2 function is cos(2*pi*x) with noise N(0, 0.04) + train_y2 = torch.cos(train_x * (2 * pi)) + torch.randn_like(train_x) * 0.1 + # Create a train_y which interleaves the two + train_y = torch.stack([train_y1, train_y2], -1) + return train_x, train_y + + def test_multitask_gp_mean_abs_error(self, cuda=False): + train_x, train_y = self._get_data(cuda=cuda) likelihood = MultitaskGaussianLikelihood(num_tasks=2) model = MultitaskGPModel(train_x, train_y, likelihood) + + if cuda: + model.cuda() + # Find optimal model hyperparameters model.train() likelihood.train() @@ -79,7 +83,8 @@ def test_multitask_gp_mean_abs_error(self): # Test the model model.eval() likelihood.eval() - test_x = torch.linspace(0, 1, 51) + + test_x = torch.linspace(0, 1, 51, device=torch.device("cuda") if cuda else torch.device("cpu")) test_y1 = torch.sin(test_x * (2 * pi)) test_y2 = torch.cos(test_x * (2 * pi)) test_preds = likelihood(model(test_x)).mean @@ -89,6 +94,10 @@ def test_multitask_gp_mean_abs_error(self): self.assertLess(mean_abs_error_task_1.squeeze().item(), 0.05) self.assertLess(mean_abs_error_task_2.squeeze().item(), 0.05) + def test_multitask_gp_mean_abs_error_cuda(self): + if torch.cuda.is_available(): + self.test_multitask_gp_mean_abs_error(cuda=True) + if __name__ == "__main__": unittest.main() diff --git a/test/examples/test_simple_gp_regression.py b/test/examples/test_simple_gp_regression.py index 91cbaef6a..d7b56b043 100644 --- a/test/examples/test_simple_gp_regression.py +++ b/test/examples/test_simple_gp_regression.py @@ -1,26 +1,18 @@ #!/usr/bin/env python3 -from math import exp, pi - import os import random -import torch import unittest +from math import exp, pi + import gpytorch -from torch import optim +import torch +from gpytorch.distributions import MultivariateNormal from gpytorch.kernels import RBFKernel, ScaleKernel from gpytorch.likelihoods import GaussianLikelihood from gpytorch.means import ConstantMean from gpytorch.priors import SmoothedBoxPrior -from gpytorch.distributions import MultivariateNormal - - -# Simple training data: let's try to learn a sine function -train_x = torch.linspace(0, 1, 11) -train_y = torch.sin(train_x * (2 * pi)) - -test_x = torch.linspace(0, 1, 51) -test_y = torch.sin(test_x * (2 * pi)) +from torch import optim class ExactGPModel(gpytorch.models.ExactGP): @@ -48,7 +40,17 @@ def tearDown(self): if hasattr(self, "rng_state"): torch.set_rng_state(self.rng_state) - def test_prior(self): + def _get_data(self, cuda=False): + device = torch.device("cuda") if cuda else torch.device("cpu") + # Simple training data: let's try to learn a sine function + train_x = torch.linspace(0, 1, 11, device=device) + train_y = torch.sin(train_x * (2 * pi)) + test_x = torch.linspace(0, 1, 51, device=device) + test_y = torch.sin(test_x * (2 * pi)) + return train_x, test_x, train_y, test_y + + def test_prior(self, cuda=False): + train_x, test_x, train_y, test_y = self._get_data(cuda=cuda) # We're manually going to set the hyperparameters to be ridiculous likelihood = GaussianLikelihood(noise_prior=SmoothedBoxPrior(exp(-3), exp(3), sigma=0.1)) gp_model = ExactGPModel(None, None, likelihood) @@ -60,6 +62,10 @@ def test_prior(self): gp_model.covar_module.base_kernel.initialize(log_lengthscale=0) likelihood.initialize(log_noise=0) + if cuda: + gp_model.cuda() + likelihood.cuda() + # Compute posterior distribution gp_model.eval() likelihood.eval() @@ -71,13 +77,22 @@ def test_prior(self): self.assertLess(torch.norm(function_predictions.mean - 1.5), 1e-3) self.assertLess(torch.norm(function_predictions.variance - correct_variance), 1e-3) - def test_posterior_latent_gp_and_likelihood_without_optimization(self): + def test_prior_cuda(self): + if torch.cuda.is_available(): + self.test_prior(cuda=True) + + def test_posterior_latent_gp_and_likelihood_without_optimization(self, cuda=False): + train_x, test_x, train_y, test_y = self._get_data(cuda=cuda) # We're manually going to set the hyperparameters to be ridiculous likelihood = GaussianLikelihood() gp_model = ExactGPModel(train_x, train_y, likelihood) gp_model.covar_module.base_kernel.initialize(raw_lengthscale=-15) likelihood.initialize(log_noise=-15) + if cuda: + gp_model.cuda() + likelihood.cuda() + # Compute posterior distribution gp_model.eval() likelihood.eval() @@ -91,12 +106,17 @@ def test_posterior_latent_gp_and_likelihood_without_optimization(self): self.assertLess(torch.norm(function_predictions.variance), 1e-3) # It shouldn't fit much else though - test_function_predictions = gp_model(torch.tensor([1.1])) + test_function_predictions = gp_model(torch.tensor([1.1]).type_as(test_x)) self.assertLess(torch.norm(test_function_predictions.mean - 0), 1e-4) self.assertLess(torch.norm(test_function_predictions.variance - gp_model.covar_module.outputscale), 1e-4) - def test_posterior_latent_gp_and_likelihood_with_optimization(self): + def test_posterior_latent_gp_and_likelihood_without_optimization_cuda(self): + if torch.cuda.is_available(): + self.test_posterior_latent_gp_and_likelihood_without_optimization(cuda=True) + + def test_posterior_latent_gp_and_likelihood_with_optimization(self, cuda=False): + train_x, test_x, train_y, test_y = self._get_data(cuda=cuda) # We're manually going to set the hyperparameters to something they shouldn't be likelihood = GaussianLikelihood(noise_prior=SmoothedBoxPrior(exp(-3), exp(3), sigma=0.1)) gp_model = ExactGPModel(train_x, train_y, likelihood) @@ -105,6 +125,10 @@ def test_posterior_latent_gp_and_likelihood_with_optimization(self): gp_model.mean_module.initialize(constant=0) likelihood.initialize(log_noise=1) + if cuda: + gp_model.cuda() + likelihood.cuda() + # Find optimal model hyperparameters gp_model.train() likelihood.train() @@ -135,7 +159,12 @@ def test_posterior_latent_gp_and_likelihood_with_optimization(self): self.assertLess(mean_abs_error.item(), 0.05) - def test_posterior_latent_gp_and_likelihood_fast_pred_var(self): + def test_posterior_latent_gp_and_likelihood_with_optimization_cuda(self): + if torch.cuda.is_available(): + self.test_posterior_latent_gp_and_likelihood_with_optimization(cuda=True) + + def test_posterior_latent_gp_and_likelihood_fast_pred_var(self, cuda=False): + train_x, test_x, train_y, test_y = self._get_data(cuda=cuda) with gpytorch.fast_pred_var(), gpytorch.settings.debug(False): # We're manually going to set the hyperparameters to # something they shouldn't be @@ -146,6 +175,10 @@ def test_posterior_latent_gp_and_likelihood_fast_pred_var(self): gp_model.mean_module.initialize(constant=0) likelihood.initialize(log_noise=1) + if cuda: + gp_model.cuda() + likelihood.cuda() + # Find optimal model hyperparameters gp_model.train() likelihood.train() @@ -183,45 +216,9 @@ def test_posterior_latent_gp_and_likelihood_fast_pred_var(self): self.assertLess(torch.max(var_diff / noise), 0.05) - def test_posterior_latent_gp_and_likelihood_with_optimization_cuda(self): + def test_posterior_latent_gp_and_likelihood_fast_pred_var_cuda(self): if torch.cuda.is_available(): - # We're manually going to set the hyperparameters to - # something they shouldn't be - likelihood = GaussianLikelihood(noise_prior=SmoothedBoxPrior(exp(-3), exp(3), sigma=0.1)).cuda() - gp_model = ExactGPModel(train_x.cuda(), train_y.cuda(), likelihood).cuda() - mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, gp_model) - gp_model.covar_module.base_kernel.initialize(log_lengthscale=1) - gp_model.mean_module.initialize(constant=0) - likelihood.initialize(log_noise=1) - - # Find optimal model hyperparameters - gp_model.train() - likelihood.train() - optimizer = optim.Adam(gp_model.parameters(), lr=0.1) - optimizer.n_iter = 0 - for _ in range(50): - optimizer.zero_grad() - output = gp_model(train_x.cuda()) - loss = -mll(output, train_y.cuda()) - loss.backward() - optimizer.n_iter += 1 - optimizer.step() - - for param in gp_model.parameters(): - self.assertTrue(param.grad is not None) - self.assertGreater(param.grad.norm().item(), 0) - for param in likelihood.parameters(): - self.assertTrue(param.grad is not None) - self.assertGreater(param.grad.norm().item(), 0) - optimizer.step() - - # Test the model - gp_model.eval() - likelihood.eval() - test_function_predictions = likelihood(gp_model(test_x.cuda())) - mean_abs_error = torch.mean(torch.abs(test_y.cuda() - test_function_predictions.mean)) - - self.assertLess(mean_abs_error.item(), 0.05) + self.test_posterior_latent_gp_and_likelihood_fast_pred_var(cuda=True) if __name__ == "__main__": diff --git a/test/examples/test_white_noise_regression.py b/test/examples/test_white_noise_regression.py index 4421a7af3..333b150ff 100644 --- a/test/examples/test_white_noise_regression.py +++ b/test/examples/test_white_noise_regression.py @@ -1,26 +1,18 @@ #!/usr/bin/env python3 -from math import exp, pi - import os import random -import torch import unittest +from math import exp, pi + import gpytorch -from torch import optim -from gpytorch.kernels import RBFKernel, WhiteNoiseKernel, ScaleKernel +import torch +from gpytorch.distributions import MultivariateNormal +from gpytorch.kernels import RBFKernel, ScaleKernel, WhiteNoiseKernel from gpytorch.likelihoods import GaussianLikelihood from gpytorch.means import ConstantMean from gpytorch.priors import SmoothedBoxPrior -from gpytorch.distributions import MultivariateNormal - - -# Simple training data: let's try to learn a sine function -train_x = torch.linspace(0, 1, 11) -train_y = torch.sin(train_x * (2 * pi)) - -test_x = torch.linspace(0, 1, 51) -test_y = torch.sin(test_x * (2 * pi)) +from torch import optim class ExactGPModel(gpytorch.models.ExactGP): @@ -50,7 +42,17 @@ def tearDown(self): if hasattr(self, "rng_state"): torch.set_rng_state(self.rng_state) - def test_posterior_latent_gp_and_likelihood_without_optimization(self): + def _get_data(self, cuda=False): + device = torch.device("cuda") if cuda else torch.device("cpu") + # Simple training data: let's try to learn a sine function + train_x = torch.linspace(0, 1, 11, device=device) + train_y = torch.sin(train_x * (2 * pi)) + test_x = torch.linspace(0, 1, 51, device=device) + test_y = torch.sin(test_x * (2 * pi)) + return train_x, test_x, train_y, test_y + + def test_posterior_latent_gp_and_likelihood_without_optimization(self, cuda=False): + train_x, test_x, train_y, test_y = self._get_data(cuda=cuda) with gpytorch.settings.debug(False): # We're manually going to set the hyperparameters to be ridiculous likelihood = GaussianLikelihood(noise_prior=SmoothedBoxPrior(exp(-10), exp(10), sigma=0.25)) @@ -63,6 +65,10 @@ def test_posterior_latent_gp_and_likelihood_without_optimization(self): gp_model.mean_module.initialize(constant=0) likelihood.initialize(log_noise=-10) + if cuda: + gp_model.cuda() + likelihood.cuda() + # Compute posterior distribution gp_model.eval() likelihood.eval() @@ -75,12 +81,17 @@ def test_posterior_latent_gp_and_likelihood_without_optimization(self): self.assertLess(torch.norm(function_predictions.variance), 5e-3) # It shouldn't fit much else though - test_function_predictions = gp_model(torch.tensor([1.1], dtype=torch.float)) + test_function_predictions = gp_model(torch.tensor([1.1]).type_as(test_x)) self.assertLess(torch.norm(test_function_predictions.mean - 0), 1e-4) self.assertLess(torch.norm(test_function_predictions.variance - gp_model.covar_module.outputscale), 1e-4) - def test_posterior_latent_gp_and_likelihood_with_optimization(self): + def test_posterior_latent_gp_and_likelihood_without_optimization_cuda(self): + if torch.cuda.is_available(): + self.test_posterior_latent_gp_and_likelihood_without_optimization(cuda=True) + + def test_posterior_latent_gp_and_likelihood_with_optimization(self, cuda=False): + train_x, test_x, train_y, test_y = self._get_data(cuda=cuda) # We're manually going to set the hyperparameters to something they shouldn't be likelihood = GaussianLikelihood(noise_prior=SmoothedBoxPrior(exp(-3), exp(3), sigma=0.1)) gp_model = ExactGPModel(train_x, train_y, likelihood) @@ -89,9 +100,14 @@ def test_posterior_latent_gp_and_likelihood_with_optimization(self): gp_model.mean_module.initialize(constant=0) likelihood.initialize(log_noise=1) + if cuda: + gp_model.cuda() + likelihood.cuda() + # Find optimal model hyperparameters gp_model.train() likelihood.train() + optimizer = optim.Adam(list(gp_model.parameters()) + list(likelihood.parameters()), lr=0.1) optimizer.n_iter = 0 with gpytorch.settings.debug(False): @@ -119,10 +135,14 @@ def test_posterior_latent_gp_and_likelihood_with_optimization(self): self.assertLess(mean_abs_error.squeeze().item(), 0.05) - def test_posterior_latent_gp_and_likelihood_fast_pred_var(self): + def test_posterior_latent_gp_and_likelihood_with_optimization_cuda(self): + if torch.cuda.is_available(): + self.test_posterior_latent_gp_and_likelihood_with_optimization(cuda=True) + + def test_posterior_latent_gp_and_likelihood_fast_pred_var(self, cuda=False): + train_x, test_x, train_y, test_y = self._get_data(cuda=cuda) with gpytorch.fast_pred_var(), gpytorch.settings.debug(False): - # We're manually going to set the hyperparameters to - # something they shouldn't be + # We're manually going to set the hyperparameters to something they shouldn't be likelihood = GaussianLikelihood(noise_prior=SmoothedBoxPrior(exp(-3), exp(3), sigma=0.1)) gp_model = ExactGPModel(train_x, train_y, likelihood) mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, gp_model) @@ -130,6 +150,10 @@ def test_posterior_latent_gp_and_likelihood_fast_pred_var(self): gp_model.mean_module.initialize(constant=0) likelihood.initialize(log_noise=1) + if cuda: + gp_model.cuda() + likelihood.cuda() + # Find optimal model hyperparameters gp_model.train() likelihood.train() @@ -167,46 +191,9 @@ def test_posterior_latent_gp_and_likelihood_fast_pred_var(self): self.assertLess(torch.max(var_diff / noise), 0.05) - def test_posterior_latent_gp_and_likelihood_with_optimization_cuda(self): + def test_posterior_latent_gp_and_likelihood_fast_pred_var_cuda(self): if torch.cuda.is_available(): - # We're manually going to set the hyperparameters to - # something they shouldn't be - likelihood = GaussianLikelihood(noise_prior=SmoothedBoxPrior(exp(-3), exp(3), sigma=0.1)).cuda() - gp_model = ExactGPModel(train_x.cuda(), train_y.cuda(), likelihood).cuda() - mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, gp_model) - gp_model.rbf_covar_module.initialize(log_lengthscale=1) - gp_model.mean_module.initialize(constant=0) - likelihood.initialize(log_noise=1) - - # Find optimal model hyperparameters - gp_model.train() - likelihood.train() - optimizer = optim.Adam(gp_model.parameters(), lr=0.1) - optimizer.n_iter = 0 - with gpytorch.settings.debug(False): - for _ in range(50): - optimizer.zero_grad() - output = gp_model(train_x.cuda()) - loss = -mll(output, train_y.cuda()) - loss.backward() - optimizer.n_iter += 1 - optimizer.step() - - for param in gp_model.parameters(): - self.assertTrue(param.grad is not None) - self.assertGreater(param.grad.norm().item(), 0) - for param in likelihood.parameters(): - self.assertTrue(param.grad is not None) - self.assertGreater(param.grad.norm().item(), 0) - optimizer.step() - - # Test the model - gp_model.eval() - likelihood.eval() - test_function_predictions = likelihood(gp_model(test_x.cuda())) - mean_abs_error = torch.mean(torch.abs(test_y.cuda() - test_function_predictions.mean)) - - self.assertLess(mean_abs_error.squeeze().item(), 0.05) + self.test_posterior_latent_gp_and_likelihood_fast_pred_var(cuda=True) if __name__ == "__main__": diff --git a/test/likelihoods/test_general_multitask_gaussian_likelihood.py b/test/likelihoods/test_general_multitask_gaussian_likelihood.py index 729a25cef..ef265bf51 100644 --- a/test/likelihoods/test_general_multitask_gaussian_likelihood.py +++ b/test/likelihoods/test_general_multitask_gaussian_likelihood.py @@ -8,10 +8,9 @@ import gpytorch import torch from gpytorch.kernels import MultitaskKernel, RBFKernel -from gpytorch.likelihoods import MultitaskGaussianLikelihood +from gpytorch.likelihoods import MultitaskGaussianLikelihoodKronecker from gpytorch.means import ConstantMean, MultitaskMean from gpytorch.distributions import MultitaskMultivariateNormal -from gpytorch.likelihoods.multitask_gaussian_likelihood import _eval_corr_matrix # Simple training data: let's try to learn a sine function @@ -54,7 +53,7 @@ def tearDown(self): torch.set_rng_state(self.rng_state) def test_multitask_low_rank_noise_covar(self): - likelihood = MultitaskGaussianLikelihood(num_tasks=2, rank=1) + likelihood = MultitaskGaussianLikelihoodKronecker(num_tasks=2, rank=1) model = MultitaskGPModel(train_x, train_y, likelihood) # Find optimal model hyperparameters model.train() @@ -74,7 +73,7 @@ def test_multitask_low_rank_noise_covar(self): # Again, note feeding duplicated x_data and indices indicating which task output = model(train_x) # TODO: Fix this view call!! - loss = -mll(output, train_y, train_x) + loss = -mll(output, train_y) loss.backward() optimizer.step() @@ -82,9 +81,13 @@ def test_multitask_low_rank_noise_covar(self): model.eval() likelihood.eval() - task_corr = _eval_corr_matrix(likelihood.task_noise_corr_factor, likelihood.task_noise_corr_diag) - noise_diag = likelihood.noise_covar.log_noise.squeeze().diag().exp().sqrt() - task_noise_covar = noise_diag.matmul(task_corr).matmul(noise_diag) + num_tasks = 2 + task_noise_covar_factor = likelihood.task_noise_covar_factor + noise = likelihood.noise + task_noise_covar = task_noise_covar_factor.matmul( + task_noise_covar_factor.transpose(-1, -2) + ) + noise * torch.eye(num_tasks) + self.assertGreater(task_noise_covar[0, 0, 1].item(), 0.05) From 864ce0c3596c7e10595b037ea98b06e491aae0ed Mon Sep 17 00:00:00 2001 From: balandat Date: Thu, 22 Nov 2018 08:35:27 -0800 Subject: [PATCH 11/12] Update docstrings, use Kronecker MTGLh in batch MT test case --- gpytorch/likelihoods/gaussian_likelihood.py | 2 + .../multitask_gaussian_likelihood.py | 78 +++++++------------ gpytorch/likelihoods/noise_models.py | 28 +++---- .../test_batch_multitask_gp_regression.py | 8 +- 4 files changed, 50 insertions(+), 66 deletions(-) diff --git a/gpytorch/likelihoods/gaussian_likelihood.py b/gpytorch/likelihoods/gaussian_likelihood.py index 18dffa30c..cf1ae6ff8 100644 --- a/gpytorch/likelihoods/gaussian_likelihood.py +++ b/gpytorch/likelihoods/gaussian_likelihood.py @@ -13,6 +13,8 @@ class _GaussianLikelihoodBase(Likelihood): + """Base class for Gaussian Likelihoods, supporting general heteroskedastic noise models. """ + def __init__(self, noise_covar): super().__init__() self.noise_covar = noise_covar diff --git a/gpytorch/likelihoods/multitask_gaussian_likelihood.py b/gpytorch/likelihoods/multitask_gaussian_likelihood.py index ab69c1fb4..1651b48a4 100644 --- a/gpytorch/likelihoods/multitask_gaussian_likelihood.py +++ b/gpytorch/likelihoods/multitask_gaussian_likelihood.py @@ -20,31 +20,23 @@ class _MultitaskGaussianLikelihoodBase(_GaussianLikelihoodBase): - """ - A convenient extension of the :class:`gpytorch.likelihoods.GaussianLikelihood` to the multitask setting that allows - for a full cross-task covariance structure for the noise. The fitted covariance matrix has rank `rank`. - If a strictly diagonal task noise covariance matrix is desired, then rank=0 should be set. (This option still - allows for a different `log_noise` parameter for each task.) - - Like the Gaussian likelihood, this object can be used with exact inference. - """ + """Base class for multi-task Gaussian Likelihoods, supporting general heteroskedastic noise models. """ def __init__(self, num_tasks, noise_covar, rank=0, task_correlation_prior=None, batch_size=1): """ Args: - num_tasks (int): Number of tasks. - - noise_covar (:obj:`gpytorch.module.Module`): A model for the noise covariance. This can be a - simple homoskedastic model, or a GP that is to be fitted on the observed measurement errors. - - rank (int): The rank of the task noise covariance matrix to fit. If `rank` is set to 0, - then a diagonal covariance matrix is fit. - - task_correlation_prior (:obj:`gpytorch.priors.Prior`): Prior to use over the task noise correlation matrix. - Only used when `rank` > 0. - - batch_size (int): Number of batches. - + num_tasks (int): + Number of tasks. + noise_covar (:obj:`gpytorch.module.Module`): + A model for the noise covariance. This can be a simple homoskedastic noise model, or a GP + that is to be fitted on the observed measurement errors. + rank (int): + The rank of the task noise covariance matrix to fit. If `rank` is set to 0, then a diagonal covariance + matrix is fit. + task_correlation_prior (:obj:`gpytorch.priors.Prior`): + Prior to use over the task noise correlation matrix. Only used when `rank` > 0. + batch_size (int): + Number of batches. """ super().__init__(noise_covar=noise_covar) if rank != 0: @@ -74,27 +66,12 @@ def _eval_corr_matrix(self): def forward(self, input, *params): """ - Adds the log task noises to the diagonal of the covariance matrix of the supplied - :obj:`gpytorch.distributions.MultivariateNormal` or - :obj:`gpytorch.distributions.MultitaskMultivariateNormal`, in case of - `rank` == 0. Otherwise, adds a rank `rank` covariance matrix to it. + Adds the task noises to the diagonal of the covariance matrix of the supplied + :obj:`gpytorch.distributions.MultivariateNormal` or :obj:`gpytorch.distributions.MultitaskMultivariateNormal`, + in case of `rank` == 0. Otherwise, adds a rank `rank` covariance matrix to it. - To accomplish this, we form a new :obj:`gpytorch.lazy.KroneckerProductLazyTensor` between :math:`I_{n}`, - an identity matrix with size equal to the data and a (not necessarily diagonal) matrix containing the task - noises :math:`D_{t}`. - - We also incorporate a shared `log_noise` parameter from the base - :class:`gpytorch.likelihoods.GaussianLikelihood` that we extend. - - The final covariance matrix after this method is then :math:`K + D_{t} \otimes I_{n} + \sigma^{2}I_{nt}`. - - Args: - input (:obj:`gpytorch.distributions.MultitaskMultivariateNormal`): Random variable whose covariance - matrix is a :obj:`gpytorch.lazy.LazyTensor` we intend to augment. - Returns: - :obj:`gpytorch.distributions.MultitaskMultivariateNormal`: A new random variable whose covariance - matrix is a :obj:`gpytorch.lazy.LazyTensor` with :math:`D_{t} \otimes I_{n}` and :math:`\sigma^{2}I_{nt}` - added. + This scales the task correlations appropriately by the variances at the different points provided + by the noise variance model (evalutated at the provided params) """ mean, covar = input.mean, input.lazy_covariance_matrix batch_shape, n = covar.shape[:-2], covar.shape[-1] // self.num_tasks @@ -142,9 +119,12 @@ class MultitaskGaussianLikelihood(_MultitaskGaussianLikelihoodBase): A convenient extension of the :class:`gpytorch.likelihoods.GaussianLikelihood` to the multitask setting that allows for a full cross-task covariance structure for the noise. The fitted covariance matrix has rank `rank`. If a strictly diagonal task noise covariance matrix is desired, then rank=0 should be set. (This option still - allows for a different `log_noise` parameter for each task.) + allows for a different `log_noise` parameter for each task.). This likelihood assumes homoskedastic noise. Like the Gaussian likelihood, this object can be used with exact inference. + + Note: This currently does not yet support batched training and evaluation. If you need support for this, + use MultitaskGaussianLikelihoodKronecker for the time being. """ def __init__( @@ -218,9 +198,12 @@ class MultitaskGaussianLikelihoodKronecker(_MultitaskGaussianLikelihoodBase): A convenient extension of the :class:`gpytorch.likelihoods.GaussianLikelihood` to the multitask setting that allows for a full cross-task covariance structure for the noise. The fitted covariance matrix has rank `rank`. If a strictly diagonal task noise covariance matrix is desired, then rank=0 should be set. (This option still - allows for a different `log_noise` parameter for each task.) + allows for a different `noise` parameter for each task.) Like the Gaussian likelihood, this object can be used with exact inference. + + Note: This Likelihood is scheduled to be deprecated and replaced by an improved version of + `MultitaskGaussianLikelihood`. Use this only for compatibility with batched Multitask models. """ def __init__( @@ -284,16 +267,15 @@ def _eval_covar_matrix(self): def forward(self, input, *params): """ - Adds the log task noises to the diagonal of the covariance matrix of the supplied - :obj:`gpytorch.distributions.MultivariateNormal` or - :obj:`gpytorch.distributions.MultitaskMultivariateNormal`, in case of - `rank` == 0. Otherwise, adds a rank `rank` covariance matrix to it. + Adds the task noises to the diagonal of the covariance matrix of the supplied + :obj:`gpytorch.distributions.MultivariateNormal` or :obj:`gpytorch.distributions.MultitaskMultivariateNormal`, + in case of `rank` == 0. Otherwise, adds a rank `rank` covariance matrix to it. To accomplish this, we form a new :obj:`gpytorch.lazy.KroneckerProductLazyTensor` between :math:`I_{n}`, an identity matrix with size equal to the data and a (not necessarily diagonal) matrix containing the task noises :math:`D_{t}`. - We also incorporate a shared `raw_noise` parameter from the base + We also incorporate a shared `noise` parameter from the base :class:`gpytorch.likelihoods.GaussianLikelihood` that we extend. The final covariance matrix after this method is then :math:`K + D_{t} \otimes I_{n} + \sigma^{2}I_{nt}`. diff --git a/gpytorch/likelihoods/noise_models.py b/gpytorch/likelihoods/noise_models.py index 94f034bfe..96474a7c4 100644 --- a/gpytorch/likelihoods/noise_models.py +++ b/gpytorch/likelihoods/noise_models.py @@ -34,29 +34,29 @@ def _set_noise(self, value): def forward(self, *params, shape=None): """In the homoskedastic case, the parameters are only used to infer the required shape. Here are the possible scenarios: - 1) non-batched noise, non-batched input, non-MT -> noise_diag shape is `n` - 2) non-batched noise, non-batched input, MT -> noise_diag shape is `nt` - 3) non-batched noise, batched input, non-MT -> noise_diag shape is `b x n` with b' the broadcasted batch shape - 4) non-batched noise, batched input, MT -> noise_diag shape is `b x nt` - 5) batched noise, non-batched input, non-MT -> noise_diag shape is `b x n` - 6) batched noise, non-batched input, MT -> noise_diag shape is `b x nt` - 7) batched noise, batched input, non-MT -> noise_diag shape is `b' x n` - 8) batched noise, batched input, MT -> noise_diag shape is `b' x nt` + - non-batched noise, non-batched input, non-MT -> noise_diag shape is `n` + - non-batched noise, non-batched input, MT -> noise_diag shape is `nt` + - non-batched noise, batched input, non-MT -> noise_diag shape is `b x n` with b' the broadcasted batch shape + - non-batched noise, batched input, MT -> noise_diag shape is `b x nt` + - batched noise, non-batched input, non-MT -> noise_diag shape is `b x n` + - batched noise, non-batched input, MT -> noise_diag shape is `b x nt` + - batched noise, batched input, non-MT -> noise_diag shape is `b' x n` + - batched noise, batched input, MT -> noise_diag shape is `b' x nt` where `n` is the number of evaluation points and `t` is the number of tasks (i.e. `num_tasks` of self.noise). - So bascially we should always have this be `b' x nt`, with `b'` appropriately broadcast from the noise parameter - and input batch shapes. `n` and the input batch shape we get either from shape arg or from the params input. - For this it should be sufficient if we take in a single `shape` arg, where the convention is that shape[:-1] - is the batch shape of the input, and shape[-1] is `n`. + So bascially the shape is always `b' x nt`, with `b'` appropriately broadcast from the noise parameter and input + batch shapes. `n` and the input batch shape are determined either from the shape arg or from the params input. + For this it is sufficient to take in a single `shape` arg, with the convention that shape[:-1] is the batch shape + of the input, and shape[-1] is `n`. """ if shape is None: p = params[0] if torch.is_tensor(params[0]) else params[0][0] shape = p.shape if len(p.shape) == 1 else p.shape[:-1] - batch_shape, n = shape[:-1], shape[-1] noise = self.noise + batch_shape, n = shape[:-1], shape[-1] noise_batch_shape = noise.shape[:-1] if noise.shape[-2] > 1 else torch.Size() num_tasks = noise.shape[-1] - noise = noise.unsqueeze(-2) batch_shape = _mul_broadcast_shape(noise_batch_shape, batch_shape) + noise = noise.unsqueeze(-2) if len(batch_shape) == 0: noise = noise.squeeze(0) noise_diag = noise.expand(batch_shape + torch.Size([n, num_tasks])).contiguous() diff --git a/test/examples/test_batch_multitask_gp_regression.py b/test/examples/test_batch_multitask_gp_regression.py index a51425de4..89223bcf7 100644 --- a/test/examples/test_batch_multitask_gp_regression.py +++ b/test/examples/test_batch_multitask_gp_regression.py @@ -9,7 +9,7 @@ from torch import optim from gpytorch.kernels import RBFKernel, MultitaskKernel from gpytorch.means import ConstantMean, MultitaskMean -from gpytorch.likelihoods import MultitaskGaussianLikelihood +from gpytorch.likelihoods import MultitaskGaussianLikelihoodKronecker from gpytorch.distributions import MultitaskMultivariateNormal @@ -69,7 +69,7 @@ def tearDown(self): def test_train_on_single_set_test_on_batch(self): # We're manually going to set the hyperparameters to something they shouldn't be - likelihood = MultitaskGaussianLikelihood( + likelihood = MultitaskGaussianLikelihoodKronecker( noise_prior=gpytorch.priors.NormalPrior(loc=torch.zeros(1), scale=torch.ones(1)), num_tasks=2 ) gp_model = ExactGPModel(train_x1, train_y1, likelihood) @@ -112,7 +112,7 @@ def test_train_on_single_set_test_on_batch(self): def test_train_on_batch_test_on_batch(self): # We're manually going to set the hyperparameters to something they shouldn't be - likelihood = MultitaskGaussianLikelihood( + likelihood = MultitaskGaussianLikelihoodKronecker( noise_prior=gpytorch.priors.NormalPrior(loc=torch.zeros(2), scale=torch.ones(2)), batch_size=2, num_tasks=2 ) gp_model = ExactGPModel(train_x12, train_y12, likelihood, batch_size=2) @@ -151,7 +151,7 @@ def test_train_on_batch_test_on_batch(self): def test_train_on_batch_test_on_batch_shared_hypers_over_batch(self): # We're manually going to set the hyperparameters to something they shouldn't be - likelihood = MultitaskGaussianLikelihood( + likelihood = MultitaskGaussianLikelihoodKronecker( noise_prior=gpytorch.priors.NormalPrior(loc=torch.zeros(2), scale=torch.ones(2)), batch_size=1, num_tasks=2 ) gp_model = ExactGPModel(train_x12, train_y12, likelihood, batch_size=1) From d6ca194fb9d1bb72c41e876d8dbfa2a1e36465ef Mon Sep 17 00:00:00 2001 From: balandat Date: Thu, 22 Nov 2018 08:42:49 -0800 Subject: [PATCH 12/12] Fix lint --- gpytorch/likelihoods/noise_models.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/gpytorch/likelihoods/noise_models.py b/gpytorch/likelihoods/noise_models.py index 96474a7c4..805570bba 100644 --- a/gpytorch/likelihoods/noise_models.py +++ b/gpytorch/likelihoods/noise_models.py @@ -43,10 +43,10 @@ def forward(self, *params, shape=None): - batched noise, batched input, non-MT -> noise_diag shape is `b' x n` - batched noise, batched input, MT -> noise_diag shape is `b' x nt` where `n` is the number of evaluation points and `t` is the number of tasks (i.e. `num_tasks` of self.noise). - So bascially the shape is always `b' x nt`, with `b'` appropriately broadcast from the noise parameter and input - batch shapes. `n` and the input batch shape are determined either from the shape arg or from the params input. - For this it is sufficient to take in a single `shape` arg, with the convention that shape[:-1] is the batch shape - of the input, and shape[-1] is `n`. + So bascially the shape is always `b' x nt`, with `b'` appropriately broadcast from the noise parameter and + input batch shapes. `n` and the input batch shape are determined either from the shape arg or from the params + input. For this it is sufficient to take in a single `shape` arg, with the convention that shape[:-1] is the + batch shape of the input, and shape[-1] is `n`. """ if shape is None: p = params[0] if torch.is_tensor(params[0]) else params[0][0]