diff --git a/gpytorch/functions/__init__.py b/gpytorch/functions/__init__.py index e94f7e5aa..ad62644d9 100644 --- a/gpytorch/functions/__init__.py +++ b/gpytorch/functions/__init__.py @@ -64,7 +64,14 @@ def dsmm(sparse_mat, dense_mat): def exact_predictive_mean( - full_covar, full_mean, train_labels, num_train, likelihood, precomputed_cache=None, non_batch_train=False + full_covar, + full_mean, + train_inputs, + train_labels, + num_train, + likelihood, + precomputed_cache=None, + non_batch_train=False, ): """ Computes the posterior predictive mean of a GP @@ -73,6 +80,7 @@ def exact_predictive_mean( - full_covar ( (n+t) x (n+t) ) - the block prior covariance matrix of training and testing points [ K_XX, K_XX*; K_X*X, K_X*X* ] - full_mean (n + t) - the training and test prior means, stacked on top of each other + - train_inputs (:obj:`torch.tensor`) - The training data inputs - train_labels (n) - the training labels minus the training prior mean - noise (1) - the observed noise (from the likelihood) - precomputed_cache - speeds up subsequent computations (default: None) @@ -88,17 +96,20 @@ def exact_predictive_mean( full_covar = NonLazyTensor(full_covar) return full_covar.exact_predictive_mean( - full_mean, train_labels, num_train, likelihood, precomputed_cache, non_batch_train + full_mean, train_inputs, train_labels, num_train, likelihood, precomputed_cache, non_batch_train ) -def exact_predictive_covar(full_covar, num_train, likelihood, precomputed_cache=None, non_batch_train=False): +def exact_predictive_covar( + full_covar, train_inputs, num_train, likelihood, precomputed_cache=None, non_batch_train=False +): """ Computes the posterior predictive covariance of a GP Args: - full_covar ( (n+t) x (n+t) ) - the block prior covariance matrix of training and testing points [ K_XX, K_XX*; K_X*X, K_X*X* ] + - train_inputs TODO - num_train (int) - how many training points are there in the full covariance matrix - noise (1) - the observed noise (from the likelihood) - precomputed_cache - speeds up subsequent computations (default: None) @@ -113,7 +124,8 @@ def exact_predictive_covar(full_covar, num_train, likelihood, precomputed_cache= from ..lazy.non_lazy_tensor import NonLazyTensor full_covar = NonLazyTensor(full_covar) - return full_covar.exact_predictive_covar(num_train, likelihood, precomputed_cache, non_batch_train) + + return full_covar.exact_predictive_covar(train_inputs, num_train, likelihood, precomputed_cache, non_batch_train) def log_normal_cdf(x): diff --git a/gpytorch/kernels/__init__.py b/gpytorch/kernels/__init__.py index 0025bd8c8..591786d3e 100644 --- a/gpytorch/kernels/__init__.py +++ b/gpytorch/kernels/__init__.py @@ -1,12 +1,12 @@ #!/usr/bin/env python3 -from .kernel import Kernel, AdditiveKernel, ProductKernel from .additive_structure_kernel import AdditiveStructureKernel from .cosine_kernel import CosineKernel -from .grid_kernel import GridKernel from .grid_interpolation_kernel import GridInterpolationKernel +from .grid_kernel import GridKernel from .index_kernel import IndexKernel from .inducing_point_kernel import InducingPointKernel +from .kernel import AdditiveKernel, Kernel, ProductKernel from .lcm_kernel import LCMKernel from .linear_kernel import LinearKernel from .matern_kernel import MaternKernel @@ -18,6 +18,7 @@ from .spectral_mixture_kernel import SpectralMixtureKernel from .white_noise_kernel import WhiteNoiseKernel + __all__ = [ "Kernel", "AdditiveKernel", @@ -27,7 +28,6 @@ "GridInterpolationKernel", "IndexKernel", "InducingPointKernel", - "InducingPointKernelAddedLossTerm", "LCMKernel", "LinearKernel", "MaternKernel", diff --git a/gpytorch/kernels/white_noise_kernel.py b/gpytorch/kernels/white_noise_kernel.py index 8c453edd9..668382428 100644 --- a/gpytorch/kernels/white_noise_kernel.py +++ b/gpytorch/kernels/white_noise_kernel.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 import torch + from . import Kernel from ..lazy import DiagLazyTensor, ZeroLazyTensor @@ -55,4 +56,4 @@ def forward(self, x1, x2, **params): elif x1.size(-2) == x2.size(-2) and x1.size(-2) == self.variances.size(1) and torch.equal(x1, x2): return DiagLazyTensor(self.variances.view(self.variances.size(0), -1)) else: - return ZeroLazyTensor(x1.size(-3), x1.size(-2), x2.size(-2)) + return ZeroLazyTensor(x1.size(-3), x1.size(-2), x2.size(-2), dtype=x1.dtype, device=x1.device) diff --git a/gpytorch/lazy/interpolated_lazy_tensor.py b/gpytorch/lazy/interpolated_lazy_tensor.py index c03e01692..f36e045e0 100644 --- a/gpytorch/lazy/interpolated_lazy_tensor.py +++ b/gpytorch/lazy/interpolated_lazy_tensor.py @@ -363,7 +363,14 @@ def diag(self): return res def exact_predictive_mean( - self, full_mean, train_labels, num_train, likelihood, precomputed_cache=None, non_batch_train=False + self, + full_mean, + train_inputs, + train_labels, + num_train, + likelihood, + precomputed_cache=None, + non_batch_train=False, ): from ..distributions import MultivariateNormal @@ -382,7 +389,7 @@ def exact_predictive_mean( train_mean = full_mean.narrow(-1, 0, train_train_covar.size(-1)) - mvn = likelihood(MultivariateNormal(train_mean, train_train_covar)) + mvn = likelihood(MultivariateNormal(train_mean, train_train_covar), train_inputs) train_mean, train_train_covar = mvn.mean, mvn.lazy_covariance_matrix train_train_covar_inv_labels = train_train_covar.inv_matmul((train_labels - train_mean).unsqueeze(-1)) @@ -422,11 +429,15 @@ def _exact_predictive_covar_inv_quad_form_root(self, precomputed_cache, test_tra res = left_interp(test_interp_indices, test_interp_values, precomputed_cache) return res - def exact_predictive_covar(self, num_train, likelihood, precomputed_cache=None, non_batch_train=False): + def exact_predictive_covar( + self, train_inputs, num_train, likelihood, precomputed_cache=None, non_batch_train=False + ): from ..distributions import MultivariateNormal if not beta_features.fast_pred_var.on() and not beta_features.fast_pred_samples.on(): - return super(InterpolatedLazyTensor, self).exact_predictive_covar(num_train, likelihood, precomputed_cache) + return super(InterpolatedLazyTensor, self).exact_predictive_covar( + train_inputs, num_train, likelihood, precomputed_cache + ) n_test = self.size(-2) - num_train train_interp_indices = self.left_interp_indices.narrow(-2, 0, num_train) @@ -452,7 +463,7 @@ def exact_predictive_covar(self, num_train, likelihood, precomputed_cache=None, ) grv = MultivariateNormal(torch.zeros(1), train_train_covar) - train_train_covar = likelihood(grv).lazy_covariance_matrix + train_train_covar = likelihood(grv, train_inputs).lazy_covariance_matrix # Get probe vectors for inverse root num_probe_vectors = beta_features.fast_pred_var.num_probe_vectors() diff --git a/gpytorch/lazy/lazy_evaluated_kernel_tensor.py b/gpytorch/lazy/lazy_evaluated_kernel_tensor.py index bd1dc4742..af2f63585 100644 --- a/gpytorch/lazy/lazy_evaluated_kernel_tensor.py +++ b/gpytorch/lazy/lazy_evaluated_kernel_tensor.py @@ -44,6 +44,17 @@ def _quad_form_derivative(self, left_vecs, right_vecs): def _transpose_nonbatch(self): return self.__class__(self.kernel, self.x2, self.x1, **self.params) + def _batch_get_indices(self, batch_indices, left_indices, right_indices): + from ..kernels import Kernel + + x1 = self.x1[batch_indices, left_indices, :].unsqueeze(0) + x2 = self.x2[batch_indices, right_indices, :].unsqueeze(0) + res = super(Kernel, self.kernel).__call__(x1.transpose(-1, -2), x2.transpose(-1, -2)) + if isinstance(res, LazyTensor): + res = res.evaluate() + res = res.view(-1) + return res + def _get_indices(self, left_indices, right_indices): from ..kernels import Kernel @@ -166,25 +177,34 @@ def evaluate(self): return self.evaluate_kernel().evaluate() def exact_predictive_mean( - self, full_mean, train_labels, num_train, likelihood, precomputed_cache=None, non_batch_train=False + self, + full_mean, + train_inputs, + train_labels, + num_train, + likelihood, + precomputed_cache=None, + non_batch_train=False, ): if self.kernel.has_custom_exact_predictions: return self.evaluate_kernel().exact_predictive_mean( - full_mean, train_labels, num_train, likelihood, precomputed_cache, non_batch_train + full_mean, train_inputs, train_labels, num_train, likelihood, precomputed_cache, non_batch_train ) else: return super(LazyEvaluatedKernelTensor, self).exact_predictive_mean( - full_mean, train_labels, num_train, likelihood, precomputed_cache, non_batch_train + full_mean, train_inputs, train_labels, num_train, likelihood, precomputed_cache, non_batch_train ) - def exact_predictive_covar(self, num_train, likelihood, precomputed_cache=None, non_batch_train=False): + def exact_predictive_covar( + self, train_inputs, num_train, likelihood, precomputed_cache=None, non_batch_train=False + ): if self.kernel.has_custom_exact_predictions: return self.evaluate_kernel().exact_predictive_covar( - num_train, likelihood, precomputed_cache, non_batch_train + train_inputs, num_train, likelihood, precomputed_cache, non_batch_train ) else: return super(LazyEvaluatedKernelTensor, self).exact_predictive_covar( - num_train, likelihood, precomputed_cache, non_batch_train + train_inputs, num_train, likelihood, precomputed_cache, non_batch_train ) def repeat(self, *sizes): diff --git a/gpytorch/lazy/lazy_tensor.py b/gpytorch/lazy/lazy_tensor.py index 5872c87b7..c503b5658 100644 --- a/gpytorch/lazy/lazy_tensor.py +++ b/gpytorch/lazy/lazy_tensor.py @@ -505,7 +505,14 @@ def evaluate_kernel(self): return self.representation_tree()(*self.representation()) def exact_predictive_mean( - self, full_mean, train_labels, num_train, likelihood, precomputed_cache=None, non_batch_train=False + self, + full_mean, + train_inputs, + train_labels, + num_train, + likelihood, + precomputed_cache=None, + non_batch_train=False, ): """ Computes the posterior predictive covariance of a GP @@ -514,6 +521,7 @@ def exact_predictive_mean( Args: full_mean (:obj:`torch.tensor`): the training and test prior means, stacked on top of each other + train_inputs (:obj:`torch.tensor`): The training data inputs train_labels (:obj:`torch.tensor`): the training labels minus the training prior mean noise (:obj:`torch.tensor`): the observed noise (from the likelihood) precomputed_cache (optional): speeds up subsequent computations (default: None) @@ -537,7 +545,8 @@ def exact_predictive_mean( if non_batch_train and train_mean.dim() == 2: train_mean = train_mean[0] train_labels = train_labels[0] - mvn = likelihood(MultivariateNormal(train_mean, train_train_covar)) + mvn = likelihood(MultivariateNormal(train_mean, train_train_covar), train_inputs) + train_mean, train_train_covar = mvn.mean, mvn.lazy_covariance_matrix train_labels_offset = train_labels - train_mean @@ -563,13 +572,16 @@ def exact_predictive_mean( return res, precomputed_cache.detach() - def exact_predictive_covar(self, num_train, likelihood, precomputed_cache=None, non_batch_train=False): + def exact_predictive_covar( + self, train_inputs, num_train, likelihood, precomputed_cache=None, non_batch_train=False + ): """ Computes the posterior predictive covariance of a GP Assumes that self is the block prior covariance matrix of training and testing points [ K_XX, K_XX*; K_X*X, K_X*X* ] Args: + train_inputs (:obj:`torch.tensor`): The training data inputs num_train (int): The number of training points in the full covariance matrix noise (scalar): The observed noise (from the likelihood) precomputed_cache (optional): speeds up subsequent computations (default: None) @@ -589,7 +601,9 @@ def exact_predictive_covar(self, num_train, likelihood, precomputed_cache=None, test_train_covar = self[num_train:, :num_train] test_test_covar = self[num_train:, num_train:] - train_train_covar = likelihood(MultivariateNormal(torch.zeros(1), train_train_covar)).lazy_covariance_matrix + train_train_covar = likelihood( + MultivariateNormal(torch.zeros(1), train_train_covar), train_inputs + ).lazy_covariance_matrix if not beta_features.fast_pred_var.on(): from .matmul_lazy_tensor import MatmulLazyTensor diff --git a/gpytorch/likelihoods/__init__.py b/gpytorch/likelihoods/__init__.py index da966d001..d0634be70 100644 --- a/gpytorch/likelihoods/__init__.py +++ b/gpytorch/likelihoods/__init__.py @@ -1,15 +1,25 @@ #!/usr/bin/env python3 from .likelihood import Likelihood -from .gaussian_likelihood import GaussianLikelihood -from .multitask_gaussian_likelihood import MultitaskGaussianLikelihood from .bernoulli_likelihood import BernoulliLikelihood +from .gaussian_likelihood import GaussianLikelihood, _GaussianLikelihoodBase +from .multitask_gaussian_likelihood import ( + MultitaskGaussianLikelihood, + MultitaskGaussianLikelihoodKronecker, + _MultitaskGaussianLikelihoodBase, +) +from .noise_models import HeteroskedasticNoise from .softmax_likelihood import SoftmaxLikelihood + __all__ = [ - "Likelihood", + "_GaussianLikelihoodBase", + "_MultitaskGaussianLikelihoodBase", + "BernoulliLikelihood", "GaussianLikelihood", + "HeteroskedasticNoise", + "Likelihood", "MultitaskGaussianLikelihood", - "BernoulliLikelihood", + "MultitaskGaussianLikelihoodKronecker", "SoftmaxLikelihood", ] diff --git a/gpytorch/likelihoods/gaussian_likelihood.py b/gpytorch/likelihoods/gaussian_likelihood.py index ff60f3dd2..cf1ae6ff8 100644 --- a/gpytorch/likelihoods/gaussian_likelihood.py +++ b/gpytorch/likelihoods/gaussian_likelihood.py @@ -1,64 +1,89 @@ #!/usr/bin/env python3 import math -import torch + +from torch.nn.functional import softplus + +from .. import settings from ..distributions import MultivariateNormal -from ..functions import add_diag -from ..likelihoods import Likelihood from ..lazy import DiagLazyTensor -from .. import settings +from ..likelihoods import Likelihood from ..utils.deprecation import _deprecate_kwarg -from ..utils.transforms import _get_inv_param_transform -from torch.nn.functional import softplus +from .noise_models import HomoskedasticNoise + + +class _GaussianLikelihoodBase(Likelihood): + """Base class for Gaussian Likelihoods, supporting general heteroskedastic noise models. """ + + def __init__(self, noise_covar): + super().__init__() + self.noise_covar = noise_covar + + def forward(self, input, *params): + if not isinstance(input, MultivariateNormal): + raise ValueError("Gaussian likelihoods require a MultivariateNormal input") + mean, covar = input.mean, input.lazy_covariance_matrix + if len(params) > 0: + # we can infer the shape from the params + shape = None + else: + # here shape[:-1] is the batch shape requested, and shape[-1] is `n`, the number of points + shape = mean.shape if len(mean.shape) == 1 else mean.shape[:-1] + noise_covar = self.noise_covar(*params, shape=shape) + full_covar = covar + noise_covar + return input.__class__(mean, full_covar) + def variational_log_probability(self, input, target): + raise NotImplementedError -class GaussianLikelihood(Likelihood): - r""" - """ +class GaussianLikelihood(_GaussianLikelihoodBase): def __init__(self, noise_prior=None, batch_size=1, param_transform=softplus, inv_param_transform=None, **kwargs): noise_prior = _deprecate_kwarg(kwargs, "log_noise_prior", "noise_prior", noise_prior) - super(GaussianLikelihood, self).__init__() - self._param_transform = param_transform - self._inv_param_transform = _get_inv_param_transform(param_transform, inv_param_transform) - self.register_parameter(name="raw_noise", parameter=torch.nn.Parameter(torch.zeros(batch_size, 1))) - if noise_prior is not None: - self.register_prior("noise_prior", noise_prior, lambda: self.noise, lambda v: self._set_noise(v)) + noise_covar = HomoskedasticNoise( + noise_prior=noise_prior, + batch_size=batch_size, + param_transform=param_transform, + inv_param_transform=inv_param_transform, + ) + super().__init__(noise_covar=noise_covar) + + def _param_transform(self, value): + return self.noise_covar._param_transform(value) + + def _inv_param_transform(self, value): + return self.noise_covar._inv_param_transform(value) @property def noise(self): - return self._param_transform(self.raw_noise) + return self.noise_covar.noise @noise.setter def noise(self, value): - self._set_noise(value) - - def _set_noise(self, value): - self.initialize(raw_noise=self._inv_param_transform(value)) + self.noise_covar.noise = value - def forward(self, input): - if not isinstance(input, MultivariateNormal): - raise ValueError("GaussianLikelihood requires a MultivariateNormal input") - mean, covar = input.mean, input.lazy_covariance_matrix - noise = self.noise - if covar.ndimension() == 2: - if settings.debug.on() and noise.size(0) > 1: - raise RuntimeError("With batch_size > 1, expected a batched MultivariateNormal distribution.") - noise = noise.squeeze(0) + @property + def raw_noise(self): + return self.noise_covar.raw_noise - return input.__class__(mean, add_diag(covar, noise)) + @raw_noise.setter + def raw_noise(self, value): + self.noise_covar.raw_noise = value def variational_log_probability(self, input, target): mean, variance = input.mean, input.variance - log_noise = self.log_noise + noise = self.noise_covar.noise + + if mean.dim() > target.dim(): + target = target.unsqueeze(-1) if variance.ndimension() == 1: - if settings.debug.on() and log_noise.size(0) > 1: + if settings.debug.on() and noise.size(0) > 1: raise RuntimeError("With batch_size > 1, expected a batched MultivariateNormal distribution.") - log_noise = log_noise.squeeze(0) + noise = noise.squeeze(0) - res = -0.5 * ((target - mean) ** 2 + variance) / self.noise - res += -0.5 * log_noise - 0.5 * math.log(2 * math.pi) + res = -0.5 * ((target - mean) ** 2 + variance) / noise + res += -0.5 * noise.log() - 0.5 * math.log(2 * math.pi) return res.sum(-1) def pyro_sample_y(self, variational_dist_f, y_obs, sample_shape, name_prefix=""): diff --git a/gpytorch/likelihoods/multitask_gaussian_likelihood.py b/gpytorch/likelihoods/multitask_gaussian_likelihood.py index afb3d4245..1651b48a4 100644 --- a/gpytorch/likelihoods/multitask_gaussian_likelihood.py +++ b/gpytorch/likelihoods/multitask_gaussian_likelihood.py @@ -1,29 +1,137 @@ #!/usr/bin/env python3 import torch -from ..functions import add_diag -from ..lazy import DiagLazyTensor, KroneckerProductLazyTensor, RootLazyTensor -from ..likelihoods import GaussianLikelihood +from torch.nn.functional import softplus + from .. import settings +from ..functions import add_diag +from ..lazy import ( + BlockDiagLazyTensor, + DiagLazyTensor, + KroneckerProductLazyTensor, + MatmulLazyTensor, + NonLazyTensor, + RootLazyTensor, +) +from ..likelihoods import Likelihood, _GaussianLikelihoodBase from ..utils.deprecation import _deprecate_kwarg -from torch.nn.functional import softplus +from ..utils.transforms import _get_inv_param_transform +from .noise_models import MultitaskHomoskedasticNoise + + +class _MultitaskGaussianLikelihoodBase(_GaussianLikelihoodBase): + """Base class for multi-task Gaussian Likelihoods, supporting general heteroskedastic noise models. """ + + def __init__(self, num_tasks, noise_covar, rank=0, task_correlation_prior=None, batch_size=1): + """ + Args: + num_tasks (int): + Number of tasks. + noise_covar (:obj:`gpytorch.module.Module`): + A model for the noise covariance. This can be a simple homoskedastic noise model, or a GP + that is to be fitted on the observed measurement errors. + rank (int): + The rank of the task noise covariance matrix to fit. If `rank` is set to 0, then a diagonal covariance + matrix is fit. + task_correlation_prior (:obj:`gpytorch.priors.Prior`): + Prior to use over the task noise correlation matrix. Only used when `rank` > 0. + batch_size (int): + Number of batches. + """ + super().__init__(noise_covar=noise_covar) + if rank != 0: + self.register_parameter( + name="task_noise_corr_factor", parameter=torch.nn.Parameter(torch.randn(batch_size, num_tasks, rank)) + ) + self.register_parameter( + name="task_noise_corr_diag", parameter=torch.nn.Parameter(torch.ones(batch_size, num_tasks)) + ) + if task_correlation_prior is not None: + self.register_prior( + "MultitaskErrorCorrelationPrior", task_correlation_prior, lambda: self._eval_corr_matrix + ) + elif task_correlation_prior is not None: + raise ValueError("Can only specify task_correlation_prior if rank>0") + self.num_tasks = num_tasks + self.rank = rank + + def _eval_corr_matrix(self): + corr_factor = self.task_noise_corr_factor.squeeze(0) + corr_diag = self.task_noise_corr_diag.squeeze(0) + M = corr_factor.matmul(corr_factor.transpose(-1, -2)) + idx = torch.arange(M.shape[-1], dtype=torch.long, device=M.device) + M[..., idx, idx] += corr_diag + sem_inv = 1 / torch.diagonal(M, dim1=-2, dim2=-1).sqrt().unsqueeze(-1) + return M * sem_inv.matmul(sem_inv.transpose(-1, -2)) + + def forward(self, input, *params): + """ + Adds the task noises to the diagonal of the covariance matrix of the supplied + :obj:`gpytorch.distributions.MultivariateNormal` or :obj:`gpytorch.distributions.MultitaskMultivariateNormal`, + in case of `rank` == 0. Otherwise, adds a rank `rank` covariance matrix to it. + + This scales the task correlations appropriately by the variances at the different points provided + by the noise variance model (evalutated at the provided params) + """ + mean, covar = input.mean, input.lazy_covariance_matrix + batch_shape, n = covar.shape[:-2], covar.shape[-1] // self.num_tasks + + if len(batch_shape) > 1: + raise NotImplementedError("Batch shapes with dim > 1 not yet supported for MulitTask Likelihoods") + + # compute the noise covariance + if len(params) > 0: + shape = None + else: + shape = mean.shape if len(mean.shape) == 1 else mean.shape[:-1] + noise_covar = self.noise_covar(*params, shape=shape) + + if hasattr(self, "task_noise_corr_factor"): + # if rank > 0, compute the task correlation matrix + # TODO: This is inefficient, change repeat so it can repeat LazyTensors w/ multiple batch dimensions + task_corr = self._eval_corr_matrix() + exp_shape = batch_shape + torch.Size([n]) + task_corr.shape[-2:] + if len(batch_shape) == 1: + task_corr = task_corr.unsqueeze(-3) + task_corr_exp = NonLazyTensor(task_corr.expand(exp_shape)) + noise_sem = noise_covar.sqrt() + task_covar_blocks = MatmulLazyTensor(MatmulLazyTensor(noise_sem, task_corr_exp), noise_sem) + else: + # otherwise tasks are uncorrelated + task_covar_blocks = noise_covar + + if len(batch_shape) == 1: + # TODO: Properly support general batch shapes in BlockDiagLazyTensor (no shape arithmetic) + tcb_eval = task_covar_blocks.evaluate() + task_covar = BlockDiagLazyTensor( + NonLazyTensor(tcb_eval.view(-1, *tcb_eval.shape[-2:])), num_blocks=tcb_eval.shape[0] + ) + else: + task_covar = BlockDiagLazyTensor(task_covar_blocks) + return input.__class__(mean, covar + task_covar) + + def variational_log_probability(self, input, target): + raise NotImplementedError("Variational inference with Multitask Gaussian likelihood is not yet supported") -class MultitaskGaussianLikelihood(GaussianLikelihood): +class MultitaskGaussianLikelihood(_MultitaskGaussianLikelihoodBase): """ A convenient extension of the :class:`gpytorch.likelihoods.GaussianLikelihood` to the multitask setting that allows for a full cross-task covariance structure for the noise. The fitted covariance matrix has rank `rank`. If a strictly diagonal task noise covariance matrix is desired, then rank=0 should be set. (This option still - allows for a different `log_noise` parameter for each task.) + allows for a different `log_noise` parameter for each task.). This likelihood assumes homoskedastic noise. Like the Gaussian likelihood, this object can be used with exact inference. + + Note: This currently does not yet support batched training and evaluation. If you need support for this, + use MultitaskGaussianLikelihoodKronecker for the time being. """ def __init__( self, num_tasks, rank=0, - task_prior=None, + task_correlation_prior=None, batch_size=1, noise_prior=None, param_transform=softplus, @@ -37,18 +145,94 @@ def __init__( rank (int): The rank of the task noise covariance matrix to fit. If `rank` is set to 0, then a diagonal covariance matrix is fit. - task_prior (:obj:`gpytorch.priors.Prior`): Prior to use over the task noise covariance matrix if - `rank` > 0, or a prior over the log of just the diagonal elements, if `rank` == 0. + task_correlation_prior (:obj:`gpytorch.priors.Prior`): Prior to use over the task noise correlaton matrix. + Only used when `rank` > 0. """ - noise_prior = _deprecate_kwarg(kwargs, "log_noise_prior", "noise_prior", noise_prior) - super(MultitaskGaussianLikelihood, self).__init__( - batch_size=batch_size, + task_correlation_prior = _deprecate_kwarg( + kwargs, "task_prior", "task_correlation_prior", task_correlation_prior + ) + noise_covar = MultitaskHomoskedasticNoise( + num_tasks=num_tasks, noise_prior=noise_prior, + batch_size=batch_size, param_transform=param_transform, inv_param_transform=inv_param_transform, ) + super().__init__( + num_tasks=num_tasks, + noise_covar=noise_covar, + rank=rank, + task_correlation_prior=task_correlation_prior, + batch_size=batch_size, + ) + self._param_transform = param_transform + self._inv_param_transform = _get_inv_param_transform(param_transform, inv_param_transform) + self.register_parameter(name="raw_noise", parameter=torch.nn.Parameter(torch.zeros(batch_size, 1))) + + @property + def noise(self): + return self._param_transform(self.raw_noise) + + @noise.setter + def noise(self, value): + self._set_noise(value) + def _set_noise(self, value): + self.initialize(raw_noise=self._inv_param_transform(value)) + + def forward(self, input, *params): + mvn = super().forward(input, *params) + mean, covar = mvn.mean, mvn.lazy_covariance_matrix + noise = self.noise + if covar.ndimension() == 2: + if settings.debug.on() and noise.size(0) > 1: + raise RuntimeError("With batch_size > 1, expected a batched MultitaskMultivariateNormal distribution.") + noise = noise.squeeze(0) + covar = add_diag(covar, noise) + return input.__class__(mean, covar) + + +class MultitaskGaussianLikelihoodKronecker(_MultitaskGaussianLikelihoodBase): + """ + A convenient extension of the :class:`gpytorch.likelihoods.GaussianLikelihood` to the multitask setting that allows + for a full cross-task covariance structure for the noise. The fitted covariance matrix has rank `rank`. + If a strictly diagonal task noise covariance matrix is desired, then rank=0 should be set. (This option still + allows for a different `noise` parameter for each task.) + + Like the Gaussian likelihood, this object can be used with exact inference. + + Note: This Likelihood is scheduled to be deprecated and replaced by an improved version of + `MultitaskGaussianLikelihood`. Use this only for compatibility with batched Multitask models. + """ + + def __init__( + self, + num_tasks, + rank=0, + task_prior=None, + batch_size=1, + noise_prior=None, + param_transform=softplus, + inv_param_transform=None, + **kwargs + ): + """ + Args: + num_tasks (int): Number of tasks. + + rank (int): The rank of the task noise covariance matrix to fit. If `rank` is set to 0, + then a diagonal covariance matrix is fit. + + task_prior (:obj:`gpytorch.priors.Prior`): Prior to use over the task noise covariance matrix if + `rank` > 0, or a prior over the log of just the diagonal elements, if `rank` == 0. + + """ + noise_prior = _deprecate_kwarg(kwargs, "log_noise_prior", "noise_prior", noise_prior) + super(Likelihood, self).__init__() + self._param_transform = param_transform + self._inv_param_transform = _get_inv_param_transform(param_transform, inv_param_transform) + self.register_parameter(name="raw_noise", parameter=torch.nn.Parameter(torch.zeros(batch_size, 1))) if rank == 0: self.register_parameter( name="raw_task_noises", parameter=torch.nn.Parameter(torch.zeros(batch_size, num_tasks)) @@ -62,25 +246,36 @@ def __init__( if task_prior is not None: self.register_prior("MultitaskErrorCovariancePrior", task_prior, self._eval_covar_matrix) self.num_tasks = num_tasks + self.rank = rank - def _eval_covar_matrix(self, task_noise_covar_factor, raw_noise): - num_tasks = task_noise_covar_factor.size(0) - noise = self._param_transform(raw_noise) - D = noise * torch.eye(num_tasks, dtype=noise.dtype, device=noise.device) - return task_noise_covar_factor.matmul(task_noise_covar_factor.transpose(-1, -2)) + D + @property + def noise(self): + return self._param_transform(self.raw_noise) - def forward(self, input): + @noise.setter + def noise(self, value): + self._set_noise(value) + + def _set_noise(self, value): + self.initialize(raw_noise=self._inv_param_transform(value)) + + def _eval_covar_matrix(self): + covar_factor = self.task_noise_covar_factor + noise = self.noise + D = noise * torch.eye(self.num_tasks, dtype=noise.dtype, device=noise.device) + return covar_factor.matmul(covar_factor.transpose(-1, -2)) + D + + def forward(self, input, *params): """ - Adds the log task noises to the diagonal of the covariance matrix of the supplied - :obj:`gpytorch.distributions.MultivariateNormal` or - :obj:`gpytorch.distributions.MultitaskMultivariateNormal`, in case of - `rank` == 0. Otherwise, adds a rank `rank` covariance matrix to it. + Adds the task noises to the diagonal of the covariance matrix of the supplied + :obj:`gpytorch.distributions.MultivariateNormal` or :obj:`gpytorch.distributions.MultitaskMultivariateNormal`, + in case of `rank` == 0. Otherwise, adds a rank `rank` covariance matrix to it. To accomplish this, we form a new :obj:`gpytorch.lazy.KroneckerProductLazyTensor` between :math:`I_{n}`, an identity matrix with size equal to the data and a (not necessarily diagonal) matrix containing the task noises :math:`D_{t}`. - We also incorporate a shared `raw_noise` parameter from the base + We also incorporate a shared `noise` parameter from the base :class:`gpytorch.likelihoods.GaussianLikelihood` that we extend. The final covariance matrix after this method is then :math:`K + D_{t} \otimes I_{n} + \sigma^{2}I_{nt}`. @@ -95,15 +290,16 @@ def forward(self, input): """ mean, covar = input.mean, input.lazy_covariance_matrix - if hasattr(self, "raw_task_noises"): - noises = self._param_transform(self.raw_task_noises) + if self.rank == 0: + task_noises = self._param_transform(self.raw_task_noises) if covar.ndimension() == 2: - if settings.debug.on() and noises.size(0) > 1: + if settings.debug.on() and task_noises.size(0) > 1: raise RuntimeError( "With batch_size > 1, expected a batched MultitaskMultivariateNormal distribution." ) - noises = noises.squeeze(0) - task_var_lt = DiagLazyTensor(noises) + task_noises = task_noises.squeeze(0) + task_var_lt = DiagLazyTensor(task_noises) + device = task_noises.device else: task_noise_covar_factor = self.task_noise_covar_factor if covar.ndimension() == 2: @@ -113,13 +309,12 @@ def forward(self, input): ) task_noise_covar_factor = task_noise_covar_factor.squeeze(0) task_var_lt = RootLazyTensor(task_noise_covar_factor) + device = task_noise_covar_factor.device if covar.ndimension() == 2: - eye_lt = DiagLazyTensor(torch.ones(covar.size(-1) // self.num_tasks, device=self.log_noise.device)) + eye_lt = DiagLazyTensor(torch.ones(covar.size(-1) // self.num_tasks, device=device)) else: - eye_lt = DiagLazyTensor( - torch.ones(covar.size(0), covar.size(-1) // self.num_tasks, device=self.log_noise.device) - ) + eye_lt = DiagLazyTensor(torch.ones(covar.size(0), covar.size(-1) // self.num_tasks, device=device)) # Make sure the batch sizes are going to match if task_var_lt.size(0) == 1: task_var_lt = task_var_lt.repeat(eye_lt.size(0), 1, 1) @@ -135,6 +330,3 @@ def forward(self, input): covar = add_diag(covar, noise) return input.__class__(mean, covar) - - def variational_log_probability(self, input, target): - raise NotImplementedError("Variational inference with Multitask Gaussian likelihood is not yet supported") diff --git a/gpytorch/likelihoods/noise_models.py b/gpytorch/likelihoods/noise_models.py new file mode 100644 index 000000000..805570bba --- /dev/null +++ b/gpytorch/likelihoods/noise_models.py @@ -0,0 +1,108 @@ +#!/usr/bin/env python3 + +import torch +from torch.nn import Parameter +from torch.nn.functional import softplus + +from ..distributions import MultivariateNormal +from ..lazy import DiagLazyTensor +from ..module import Module +from ..utils.broadcasting import _mul_broadcast_shape +from ..utils.transforms import _get_inv_param_transform + + +class _HomoskedasticNoiseBase(Module): + def __init__(self, noise_prior=None, batch_size=1, param_transform=softplus, inv_param_transform=None, num_tasks=1): + super().__init__() + self._param_transform = param_transform + self._inv_param_transform = _get_inv_param_transform(param_transform, inv_param_transform) + self.register_parameter(name="raw_noise", parameter=Parameter(torch.zeros(batch_size, num_tasks))) + if noise_prior is not None: + self.register_prior("noise_prior", noise_prior, lambda: self.noise, lambda v: self._set_noise(v)) + + @property + def noise(self): + return self._param_transform(self.raw_noise) + + @noise.setter + def noise(self, value): + self._set_noise(value) + + def _set_noise(self, value): + self.initialize(raw_noise=self._inv_param_transform(value)) + + def forward(self, *params, shape=None): + """In the homoskedastic case, the parameters are only used to infer the required shape. + Here are the possible scenarios: + - non-batched noise, non-batched input, non-MT -> noise_diag shape is `n` + - non-batched noise, non-batched input, MT -> noise_diag shape is `nt` + - non-batched noise, batched input, non-MT -> noise_diag shape is `b x n` with b' the broadcasted batch shape + - non-batched noise, batched input, MT -> noise_diag shape is `b x nt` + - batched noise, non-batched input, non-MT -> noise_diag shape is `b x n` + - batched noise, non-batched input, MT -> noise_diag shape is `b x nt` + - batched noise, batched input, non-MT -> noise_diag shape is `b' x n` + - batched noise, batched input, MT -> noise_diag shape is `b' x nt` + where `n` is the number of evaluation points and `t` is the number of tasks (i.e. `num_tasks` of self.noise). + So bascially the shape is always `b' x nt`, with `b'` appropriately broadcast from the noise parameter and + input batch shapes. `n` and the input batch shape are determined either from the shape arg or from the params + input. For this it is sufficient to take in a single `shape` arg, with the convention that shape[:-1] is the + batch shape of the input, and shape[-1] is `n`. + """ + if shape is None: + p = params[0] if torch.is_tensor(params[0]) else params[0][0] + shape = p.shape if len(p.shape) == 1 else p.shape[:-1] + noise = self.noise + batch_shape, n = shape[:-1], shape[-1] + noise_batch_shape = noise.shape[:-1] if noise.shape[-2] > 1 else torch.Size() + num_tasks = noise.shape[-1] + batch_shape = _mul_broadcast_shape(noise_batch_shape, batch_shape) + noise = noise.unsqueeze(-2) + if len(batch_shape) == 0: + noise = noise.squeeze(0) + noise_diag = noise.expand(batch_shape + torch.Size([n, num_tasks])).contiguous() + if num_tasks == 1: + noise_diag = noise_diag.view(*batch_shape, n) + return DiagLazyTensor(noise_diag) + + +class HomoskedasticNoise(_HomoskedasticNoiseBase): + def __init__(self, noise_prior=None, batch_size=1, param_transform=softplus, inv_param_transform=None): + super().__init__( + noise_prior=noise_prior, + batch_size=batch_size, + param_transform=param_transform, + inv_param_transform=inv_param_transform, + num_tasks=1, + ) + + +class MultitaskHomoskedasticNoise(_HomoskedasticNoiseBase): + def __init__(self, num_tasks, noise_prior=None, batch_size=1, param_transform=softplus, inv_param_transform=None): + super().__init__( + noise_prior=noise_prior, + batch_size=batch_size, + param_transform=param_transform, + inv_param_transform=inv_param_transform, + num_tasks=num_tasks, + ) + + +class HeteroskedasticNoise(Module): + def __init__(self, noise_model, noise_indices=None, noise_transform=torch.exp): + super().__init__() + self.noise_model = noise_model + self._noise_transform = noise_transform + self._noise_indices = noise_indices + self._noise_transform = noise_transform + + def forward(self, *params, batch_shape=None, shape=None): + if len(params) == 1 and not torch.is_tensor(params[0]): + output = self.noise_model(*params[0]) + else: + output = self.noise_model(*params) + if not isinstance(output, MultivariateNormal): + raise NotImplementedError("Currently only noise models that return a MultivariateNormal are supported") + # note: this also works with MultitaskMultivariateNormal, where this + # will return a batched DiagLazyTensors of size n x num_tasks x num_tasks + noise_diag = output.mean if self._noise_indices is None else output.mean[..., self._noise_indices] + return DiagLazyTensor(self._noise_transform(noise_diag)) diff --git a/gpytorch/mlls/exact_marginal_log_likelihood.py b/gpytorch/mlls/exact_marginal_log_likelihood.py index 4f3faa4f7..36dfd15d3 100644 --- a/gpytorch/mlls/exact_marginal_log_likelihood.py +++ b/gpytorch/mlls/exact_marginal_log_likelihood.py @@ -2,7 +2,7 @@ import torch from .marginal_log_likelihood import MarginalLogLikelihood -from ..likelihoods import GaussianLikelihood +from ..likelihoods import _GaussianLikelihoodBase from ..distributions import MultivariateNormal @@ -15,16 +15,16 @@ def __init__(self, likelihood, model): - likelihood: (Likelihood) - the likelihood for the model - model: (Module) - the exact GP model """ - if not isinstance(likelihood, GaussianLikelihood): + if not isinstance(likelihood, _GaussianLikelihoodBase): raise RuntimeError("Likelihood must be Gaussian for exact inference") super(ExactMarginalLogLikelihood, self).__init__(likelihood, model) - def forward(self, output, target): + def forward(self, output, target, *params): if not isinstance(output, MultivariateNormal): raise RuntimeError("ExactMarginalLogLikelihood can only operate on Gaussian random variables") # Get the log prob of the marginal distribution - output = self.likelihood(output) + output = self.likelihood(output, *params) res = output.log_prob(target) # Add terms for SGPR / when inducing points are learned diff --git a/gpytorch/models/exact_gp.py b/gpytorch/models/exact_gp.py index d8ac46819..c91726467 100644 --- a/gpytorch/models/exact_gp.py +++ b/gpytorch/models/exact_gp.py @@ -4,7 +4,7 @@ import torch from ..functions import exact_predictive_mean, exact_predictive_covar from ..distributions import MultivariateNormal, MultitaskMultivariateNormal -from ..likelihoods import GaussianLikelihood +from ..likelihoods import _GaussianLikelihoodBase from .. import settings from .gp import GP @@ -15,8 +15,8 @@ def __init__(self, train_inputs, train_targets, likelihood): train_inputs = (train_inputs,) if train_inputs is not None and not all(torch.is_tensor(train_input) for train_input in train_inputs): raise RuntimeError("Train inputs must be a tensor, or a list/tuple of tensors") - if not isinstance(likelihood, GaussianLikelihood): - raise RuntimeError("ExactGP can only handle GaussianLikelihood") + if not isinstance(likelihood, _GaussianLikelihoodBase): + raise RuntimeError("ExactGP can only handle Gaussian likelihoods") super(ExactGP, self).__init__() if train_inputs is not None: @@ -72,7 +72,7 @@ def __call__(self, *args, **kwargs): "train_inputs, train_targets cannot be None in training mode. " "Call .eval() for prior predictions, or call .set_train_data() to add training data." ) - if settings.debug.on(): + if settings.check_training_data.on(): if not all(torch.equal(train_input, input) for train_input, input in zip(train_inputs, inputs)): raise RuntimeError("You must train on the training inputs!") res = super(ExactGP, self).__call__(*inputs, **kwargs) @@ -150,6 +150,7 @@ def __call__(self, *args, **kwargs): predictive_mean, mean_cache = exact_predictive_mean( full_covar=full_covar, full_mean=full_mean, + train_inputs=train_inputs, train_labels=train_targets, num_train=num_train, likelihood=self.likelihood, @@ -158,6 +159,7 @@ def __call__(self, *args, **kwargs): ) predictive_covar, covar_cache = exact_predictive_covar( full_covar=full_covar, + train_inputs=train_inputs, num_train=num_train, likelihood=self.likelihood, precomputed_cache=self.covar_cache, diff --git a/gpytorch/module.py b/gpytorch/module.py index f603e319c..3f0461c36 100644 --- a/gpytorch/module.py +++ b/gpytorch/module.py @@ -1,19 +1,20 @@ #!/usr/bin/env python3 +import itertools +import warnings from collections import OrderedDict import torch from torch import nn from torch.distributions import Distribution -import itertools + from .lazy import LazyTensor from .utils.deprecation import DeprecationError -import warnings class Module(nn.Module): def __init__(self): - super(Module, self).__init__() + super().__init__() self._added_loss_terms = OrderedDict() self._priors = OrderedDict() @@ -69,49 +70,27 @@ def initialize(self, **kwargs): kwargs: (param_name, value) - parameter to initialize Value can take the form of a tensor, a float, or an int """ - from .kernels import ( - CosineKernel, - IndexKernel, - MaternKernel, - PeriodicKernel, - RBFKernel, - ScaleKernel, - SpectralMixtureKernel, - ) - - from .likelihoods import GaussianLikelihood, MultitaskGaussianLikelihood - - modules_with_log_params = [ - CosineKernel, - IndexKernel, - MaternKernel, - PeriodicKernel, - RBFKernel, - ScaleKernel, - SpectralMixtureKernel, - GaussianLikelihood, - MultitaskGaussianLikelihood, - ] + from .utils.log_deprecation import MODULES_WITH_LOG_PARAMS for name, val in kwargs.items(): if isinstance(val, int): val = float(val) - if any([isinstance(self, mod_type) for mod_type in modules_with_log_params]) and 'log_' in name: - base_name = name.split('log_')[1] - name = 'raw_' + base_name + if any(isinstance(self, mod_type) for mod_type in MODULES_WITH_LOG_PARAMS) and "log_" in name: + base_name = name.split("log_")[1] + name = "raw_" + base_name if not torch.is_tensor(val): val = self._inv_param_transform(torch.tensor(val).exp()).item() else: val = self._inv_param_transform(val.exp()) - if name not in self._parameters: + if not hasattr(self, name): raise AttributeError("Unknown parameter {p} for {c}".format(p=name, c=self.__class__.__name__)) if torch.is_tensor(val): self.__getattr__(name).data.copy_(val) elif isinstance(val, float): self.__getattr__(name).data.fill_(val) else: - raise AttributeError("Type {t} not valid to initialize parameter {p}".format(t=type(val), p=name)) + raise AttributeError("Type {t} not valid for initializing parameter {p}".format(t=type(val), p=name)) # Ensure value is contained in support of prior (if present) prior_name = "_".join([name, "prior"]) @@ -178,7 +157,7 @@ def register_parameter(self, name, parameter, prior=None): ) if "_parameters" not in self.__dict__: raise AttributeError("Cannot assign parameter before Module.__init__() call") - super(Module, self).register_parameter(name, parameter) + super().register_parameter(name, parameter) def register_prior(self, name, prior, param_or_closure, setting_closure=None): """ @@ -252,56 +231,25 @@ def variational_parameters(self): def _load_from_state_dict( self, state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs ): - from .kernels import ( - CosineKernel, - IndexKernel, - MaternKernel, - PeriodicKernel, - RBFKernel, - ScaleKernel, - SpectralMixtureKernel, - ) - - from .likelihoods import GaussianLikelihood, MultitaskGaussianLikelihood + from .utils.log_deprecation import LOG_DEPRECATION_MSG, MODULES_WITH_LOG_PARAMS local_name_params = itertools.chain(self._parameters.items(), self._buffers.items()) local_state = {k: v.data for k, v in local_name_params if v is not None} - modules_with_log_params = [ - CosineKernel, - IndexKernel, - MaternKernel, - PeriodicKernel, - RBFKernel, - ScaleKernel, - SpectralMixtureKernel, - GaussianLikelihood, - MultitaskGaussianLikelihood, - ] - - if not any([isinstance(self, mod_type) for mod_type in modules_with_log_params]): - return super(Module, self)._load_from_state_dict( - state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs - ) - - super(Module, self)._load_from_state_dict( + super()._load_from_state_dict( state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs ) + if not any(isinstance(self, mod_type) for mod_type in MODULES_WITH_LOG_PARAMS): + return # Load log space parameters and throw deprecation warnings. for name, param in local_state.items(): - if 'raw_' in name: - base_name = name.split('raw_')[1] + if "raw_" in name: + base_name = name.split("raw_")[1] log_name = "log_" + base_name log_key = prefix + log_name if log_key in state_dict and log_key not in local_state: - warnings.warn( - "The '{log_name}' parameter is deprecated in favor of '{name}' because we no longer ensure " - "positiveness with torch.exp for improved stability reasons and will be removed in a future " - "release. To solve this issue, just save this model " - "again.".format(log_name=log_name, name=name), - DeprecationWarning, - ) + warnings.warn(LOG_DEPRECATION_MSG.format(log_name=log_name, name=name), DeprecationWarning) input_param = state_dict[log_key] if isinstance(input_param, nn.Parameter): input_param = input_param.data @@ -315,42 +263,21 @@ def _load_from_state_dict( unexpected_keys.remove(prefix + log_name) def __getattr__(self, name): - from .kernels import ( - CosineKernel, - IndexKernel, - MaternKernel, - PeriodicKernel, - RBFKernel, - ScaleKernel, - SpectralMixtureKernel, - ) - - from .likelihoods import GaussianLikelihood, MultitaskGaussianLikelihood - - modules_with_log_params = [ - CosineKernel, - IndexKernel, - MaternKernel, - PeriodicKernel, - RBFKernel, - ScaleKernel, - SpectralMixtureKernel, - GaussianLikelihood, - MultitaskGaussianLikelihood, - ] - - if not any([isinstance(self, mod_type) for mod_type in modules_with_log_params]) or 'log_' not in name: - return super(Module, self).__getattr__(name) - else: - base_name = name.split('log_')[1] # e.g. log_lengthscale -> lengthscale - raw_name = 'raw_' + base_name - warnings.warn( - "The '{log_name}' parameter is deprecated in favor of '{name}' because we no longer ensure " - "positiveness with torch.exp for improved stability reasons and will be removed in a future " - "release.".format(log_name=name, name=raw_name), - DeprecationWarning, - ) - return super(Module, self).__getattribute__(base_name).log() # Get real param value and transform to log + try: + return super().__getattr__(name) + except AttributeError as e: + from .utils.log_deprecation import LOG_DEPRECATION_MSG, MODULES_WITH_LOG_PARAMS + + if any(isinstance(self, mod_type) for mod_type in MODULES_WITH_LOG_PARAMS) and "log_" in name: + base_name = name.split("log_")[1] # e.g. log_lengthscale -> lengthscale + raw_name = "raw_" + base_name + warnings.warn(LOG_DEPRECATION_MSG.format(log_name=name, name=raw_name), DeprecationWarning) + return super().__getattribute__(base_name).log() # Get real param value and transform to log + else: + try: + return super().__getattribute__(name) + except AttributeError: + raise e def _extract_named_added_loss_terms(module, memo=None, prefix=""): diff --git a/gpytorch/settings.py b/gpytorch/settings.py index 3c2bcd328..2de721c3b 100644 --- a/gpytorch/settings.py +++ b/gpytorch/settings.py @@ -47,6 +47,18 @@ def __exit__(self, *args): return False +class check_training_data(_feature_flag): + """ + Check whether the correct training data is supplied in Exact GP training mode + Pros: fewer data checks, fewer warning messages + Cons: possibility of supplying incorrect data, model accidentially in wrong mode + + Note: If using a Heteroskedastic Noise model, this will need to be disabled + """ + + _state = True + + class debug(_feature_flag): """ Whether or not to perform "safety" checks on the supplied data. diff --git a/gpytorch/utils/grid.py b/gpytorch/utils/grid.py index 6c37c788b..e10f374a1 100644 --- a/gpytorch/utils/grid.py +++ b/gpytorch/utils/grid.py @@ -1,6 +1,5 @@ #!/usr/bin/env python3 - import math import torch @@ -48,7 +47,7 @@ def choose_grid_size(train_inputs, ratio=1.0): def create_data_from_grid(grid): grid_size = grid.size(-2) grid_dim = grid.size(-1) - grid_data = torch.zeros(int(pow(grid_size, grid_dim)), grid_dim) + grid_data = torch.zeros(int(pow(grid_size, grid_dim)), grid_dim, device=grid.device) prev_points = None for i in range(grid_dim): for j in range(grid_size): diff --git a/gpytorch/utils/log_deprecation.py b/gpytorch/utils/log_deprecation.py new file mode 100644 index 000000000..b7f786f89 --- /dev/null +++ b/gpytorch/utils/log_deprecation.py @@ -0,0 +1,31 @@ +#!/usr/bin/env python3 + +from ..kernels import ( + CosineKernel, + IndexKernel, + MaternKernel, + PeriodicKernel, + RBFKernel, + ScaleKernel, + SpectralMixtureKernel, +) +from ..likelihoods import GaussianLikelihood, MultitaskGaussianLikelihood + + +MODULES_WITH_LOG_PARAMS = [ + CosineKernel, + IndexKernel, + MaternKernel, + PeriodicKernel, + RBFKernel, + ScaleKernel, + SpectralMixtureKernel, + GaussianLikelihood, + MultitaskGaussianLikelihood, +] + +LOG_DEPRECATION_MSG = ( + "The '{log_name}' parameter is deprecated in favor of '{name}' because we no longer ensure " + "positiveness with torch.exp for improved stability reasons and will be removed in a future " + "release." +) diff --git a/test/examples/test_batch_gp_regression.py b/test/examples/test_batch_gp_regression.py index 804773591..49cf27baf 100644 --- a/test/examples/test_batch_gp_regression.py +++ b/test/examples/test_batch_gp_regression.py @@ -1,16 +1,17 @@ #!/usr/bin/env python3 +import math import os import random -import math -import torch import unittest + import gpytorch -from torch import optim +import torch +from gpytorch.distributions import MultivariateNormal from gpytorch.kernels import RBFKernel, ScaleKernel -from gpytorch.means import ConstantMean from gpytorch.likelihoods import GaussianLikelihood -from gpytorch.distributions import MultivariateNormal +from gpytorch.means import ConstantMean +from torch import optim # Batch training test: Let's learn hyperparameters on a sine dataset, but test on a sine dataset and a cosine dataset @@ -120,7 +121,7 @@ def test_train_on_batch_test_on_batch(self): for _ in range(50): optimizer.zero_grad() output = gp_model(train_x12) - loss = -mll(output, train_y12).sum() + loss = -mll(output, train_y12, train_x12).sum() loss.backward() optimizer.step() @@ -159,7 +160,7 @@ def test_train_on_batch_test_on_batch_shared_hypers_over_batch(self): for _ in range(50): optimizer.zero_grad() output = gp_model(train_x12) - loss = -mll(output, train_y12).sum() + loss = -mll(output, train_y12, train_x12).sum() loss.backward() optimizer.step() diff --git a/test/examples/test_batch_multitask_gp_regression.py b/test/examples/test_batch_multitask_gp_regression.py index a51425de4..89223bcf7 100644 --- a/test/examples/test_batch_multitask_gp_regression.py +++ b/test/examples/test_batch_multitask_gp_regression.py @@ -9,7 +9,7 @@ from torch import optim from gpytorch.kernels import RBFKernel, MultitaskKernel from gpytorch.means import ConstantMean, MultitaskMean -from gpytorch.likelihoods import MultitaskGaussianLikelihood +from gpytorch.likelihoods import MultitaskGaussianLikelihoodKronecker from gpytorch.distributions import MultitaskMultivariateNormal @@ -69,7 +69,7 @@ def tearDown(self): def test_train_on_single_set_test_on_batch(self): # We're manually going to set the hyperparameters to something they shouldn't be - likelihood = MultitaskGaussianLikelihood( + likelihood = MultitaskGaussianLikelihoodKronecker( noise_prior=gpytorch.priors.NormalPrior(loc=torch.zeros(1), scale=torch.ones(1)), num_tasks=2 ) gp_model = ExactGPModel(train_x1, train_y1, likelihood) @@ -112,7 +112,7 @@ def test_train_on_single_set_test_on_batch(self): def test_train_on_batch_test_on_batch(self): # We're manually going to set the hyperparameters to something they shouldn't be - likelihood = MultitaskGaussianLikelihood( + likelihood = MultitaskGaussianLikelihoodKronecker( noise_prior=gpytorch.priors.NormalPrior(loc=torch.zeros(2), scale=torch.ones(2)), batch_size=2, num_tasks=2 ) gp_model = ExactGPModel(train_x12, train_y12, likelihood, batch_size=2) @@ -151,7 +151,7 @@ def test_train_on_batch_test_on_batch(self): def test_train_on_batch_test_on_batch_shared_hypers_over_batch(self): # We're manually going to set the hyperparameters to something they shouldn't be - likelihood = MultitaskGaussianLikelihood( + likelihood = MultitaskGaussianLikelihoodKronecker( noise_prior=gpytorch.priors.NormalPrior(loc=torch.zeros(2), scale=torch.ones(2)), batch_size=1, num_tasks=2 ) gp_model = ExactGPModel(train_x12, train_y12, likelihood, batch_size=1) diff --git a/test/examples/test_grid_gp_regression.py b/test/examples/test_grid_gp_regression.py index ca8e9c9e6..6e53e1b8d 100644 --- a/test/examples/test_grid_gp_regression.py +++ b/test/examples/test_grid_gp_regression.py @@ -1,11 +1,12 @@ #!/usr/bin/env python3 -import gpytorch -import torch import math -import unittest import os import random +import unittest + +import gpytorch +import torch from torch import optim @@ -52,19 +53,26 @@ def tearDown(self): if hasattr(self, "rng_state"): torch.set_rng_state(self.rng_state) - def test_grid_gp_mean_abs_error(self): + def test_grid_gp_mean_abs_error(self, cuda=False): + device = torch.device("cuda") if cuda else torch.device("cpu") grid_bounds = [(0, 1), (0, 2)] grid_size = 25 - grid = torch.zeros(grid_size, len(grid_bounds)) + grid = torch.zeros(grid_size, len(grid_bounds), device=device) for i in range(len(grid_bounds)): grid_diff = float(grid_bounds[i][1] - grid_bounds[i][0]) / (grid_size - 2) - grid[:, i] = torch.linspace(grid_bounds[i][0] - grid_diff, grid_bounds[i][1] + grid_diff, grid_size) + grid[:, i] = torch.linspace( + grid_bounds[i][0] - grid_diff, grid_bounds[i][1] + grid_diff, grid_size, device=device + ) - train_x, train_y, test_x, test_y = make_data(grid) + train_x, train_y, test_x, test_y = make_data(grid, cuda=cuda) likelihood = gpytorch.likelihoods.GaussianLikelihood() gp_model = GridGPRegressionModel(grid, train_x, train_y, likelihood) mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, gp_model) + if cuda: + gp_model.cuda() + likelihood.cuda() + # Optimize the model gp_model.train() likelihood.train() @@ -72,7 +80,7 @@ def test_grid_gp_mean_abs_error(self): optimizer = optim.Adam(list(gp_model.parameters()) + list(likelihood.parameters()), lr=0.1) optimizer.n_iter = 0 with gpytorch.settings.debug(False): - for _ in range(25): + for _ in range(20): optimizer.zero_grad() output = gp_model(train_x) loss = -mll(output, train_y) @@ -96,6 +104,10 @@ def test_grid_gp_mean_abs_error(self): self.assertLess(mean_abs_error.squeeze().item(), 0.3) + def test_grid_gp_mean_abs_error_cuda(self): + if torch.cuda.is_available(): + self.test_grid_gp_mean_abs_error(cuda=True) + if __name__ == "__main__": unittest.main() diff --git a/test/examples/test_kronecker_multitask_ski_gp_regression.py b/test/examples/test_kronecker_multitask_ski_gp_regression.py index 151c43429..6e5d611e1 100644 --- a/test/examples/test_kronecker_multitask_ski_gp_regression.py +++ b/test/examples/test_kronecker_multitask_ski_gp_regression.py @@ -1,28 +1,16 @@ #!/usr/bin/env python3 -from math import pi - import os import random -import torch import unittest +from math import pi + import gpytorch -from gpytorch.kernels import RBFKernel, MultitaskKernel, GridInterpolationKernel -from gpytorch.means import ConstantMean, MultitaskMean -from gpytorch.likelihoods import MultitaskGaussianLikelihood +import torch from gpytorch.distributions import MultitaskMultivariateNormal - - -# Simple training data: let's try to learn a sine function -train_x = torch.linspace(0, 1, 100) - -# y1 function is sin(2*pi*x) with noise N(0, 0.04) -train_y1 = torch.sin(train_x * (2 * pi)) + torch.randn(train_x.size()) * 0.1 -# y2 function is cos(2*pi*x) with noise N(0, 0.04) -train_y2 = torch.cos(train_x * (2 * pi)) + torch.randn(train_x.size()) * 0.1 - -# Create a train_y which interleaves the two -train_y = torch.stack([train_y1, train_y2], -1) +from gpytorch.kernels import GridInterpolationKernel, MultitaskKernel, RBFKernel +from gpytorch.likelihoods import MultitaskGaussianLikelihood +from gpytorch.means import ConstantMean, MultitaskMean class MultitaskGPModel(gpytorch.models.ExactGP): @@ -51,9 +39,25 @@ def tearDown(self): if hasattr(self, "rng_state"): torch.set_rng_state(self.rng_state) - def test_multitask_gp_mean_abs_error(self): + def _get_data(self, cuda=False): + # Simple training data: let's try to learn a sine function + train_x = torch.linspace(0, 1, 100, device=torch.device("cuda") if cuda else torch.device("cpu")) + # y1 function is sin(2*pi*x) with noise N(0, 0.04) + train_y1 = torch.sin(train_x * (2 * pi)) + torch.randn_like(train_x) * 0.1 + # y2 function is cos(2*pi*x) with noise N(0, 0.04) + train_y2 = torch.cos(train_x * (2 * pi)) + torch.randn_like(train_x) * 0.1 + # Create a train_y which interleaves the two + train_y = torch.stack([train_y1, train_y2], -1) + return train_x, train_y + + def test_multitask_gp_mean_abs_error(self, cuda=False): + train_x, train_y = self._get_data(cuda=cuda) likelihood = MultitaskGaussianLikelihood(num_tasks=2) model = MultitaskGPModel(train_x, train_y, likelihood) + + if cuda: + model.cuda() + # Find optimal model hyperparameters model.train() likelihood.train() @@ -79,7 +83,8 @@ def test_multitask_gp_mean_abs_error(self): # Test the model model.eval() likelihood.eval() - test_x = torch.linspace(0, 1, 51) + + test_x = torch.linspace(0, 1, 51, device=torch.device("cuda") if cuda else torch.device("cpu")) test_y1 = torch.sin(test_x * (2 * pi)) test_y2 = torch.cos(test_x * (2 * pi)) test_preds = likelihood(model(test_x)).mean @@ -89,6 +94,10 @@ def test_multitask_gp_mean_abs_error(self): self.assertLess(mean_abs_error_task_1.squeeze().item(), 0.05) self.assertLess(mean_abs_error_task_2.squeeze().item(), 0.05) + def test_multitask_gp_mean_abs_error_cuda(self): + if torch.cuda.is_available(): + self.test_multitask_gp_mean_abs_error(cuda=True) + if __name__ == "__main__": unittest.main() diff --git a/test/examples/test_simple_gp_regression.py b/test/examples/test_simple_gp_regression.py index 8e2784173..d7b56b043 100644 --- a/test/examples/test_simple_gp_regression.py +++ b/test/examples/test_simple_gp_regression.py @@ -1,26 +1,18 @@ #!/usr/bin/env python3 -from math import exp, pi - import os import random -import torch import unittest +from math import exp, pi + import gpytorch -from torch import optim +import torch +from gpytorch.distributions import MultivariateNormal from gpytorch.kernels import RBFKernel, ScaleKernel from gpytorch.likelihoods import GaussianLikelihood from gpytorch.means import ConstantMean from gpytorch.priors import SmoothedBoxPrior -from gpytorch.distributions import MultivariateNormal - - -# Simple training data: let's try to learn a sine function -train_x = torch.linspace(0, 1, 11) -train_y = torch.sin(train_x * (2 * pi)) - -test_x = torch.linspace(0, 1, 51) -test_y = torch.sin(test_x * (2 * pi)) +from torch import optim class ExactGPModel(gpytorch.models.ExactGP): @@ -48,7 +40,17 @@ def tearDown(self): if hasattr(self, "rng_state"): torch.set_rng_state(self.rng_state) - def test_prior(self): + def _get_data(self, cuda=False): + device = torch.device("cuda") if cuda else torch.device("cpu") + # Simple training data: let's try to learn a sine function + train_x = torch.linspace(0, 1, 11, device=device) + train_y = torch.sin(train_x * (2 * pi)) + test_x = torch.linspace(0, 1, 51, device=device) + test_y = torch.sin(test_x * (2 * pi)) + return train_x, test_x, train_y, test_y + + def test_prior(self, cuda=False): + train_x, test_x, train_y, test_y = self._get_data(cuda=cuda) # We're manually going to set the hyperparameters to be ridiculous likelihood = GaussianLikelihood(noise_prior=SmoothedBoxPrior(exp(-3), exp(3), sigma=0.1)) gp_model = ExactGPModel(None, None, likelihood) @@ -60,6 +62,10 @@ def test_prior(self): gp_model.covar_module.base_kernel.initialize(log_lengthscale=0) likelihood.initialize(log_noise=0) + if cuda: + gp_model.cuda() + likelihood.cuda() + # Compute posterior distribution gp_model.eval() likelihood.eval() @@ -71,12 +77,21 @@ def test_prior(self): self.assertLess(torch.norm(function_predictions.mean - 1.5), 1e-3) self.assertLess(torch.norm(function_predictions.variance - correct_variance), 1e-3) - def test_posterior_latent_gp_and_likelihood_without_optimization(self): + def test_prior_cuda(self): + if torch.cuda.is_available(): + self.test_prior(cuda=True) + + def test_posterior_latent_gp_and_likelihood_without_optimization(self, cuda=False): + train_x, test_x, train_y, test_y = self._get_data(cuda=cuda) # We're manually going to set the hyperparameters to be ridiculous likelihood = GaussianLikelihood() gp_model = ExactGPModel(train_x, train_y, likelihood) gp_model.covar_module.base_kernel.initialize(raw_lengthscale=-15) - likelihood.initialize(raw_noise=-15) + likelihood.initialize(log_noise=-15) + + if cuda: + gp_model.cuda() + likelihood.cuda() # Compute posterior distribution gp_model.eval() @@ -91,12 +106,17 @@ def test_posterior_latent_gp_and_likelihood_without_optimization(self): self.assertLess(torch.norm(function_predictions.variance), 1e-3) # It shouldn't fit much else though - test_function_predictions = gp_model(torch.tensor([1.1])) + test_function_predictions = gp_model(torch.tensor([1.1]).type_as(test_x)) self.assertLess(torch.norm(test_function_predictions.mean - 0), 1e-4) self.assertLess(torch.norm(test_function_predictions.variance - gp_model.covar_module.outputscale), 1e-4) - def test_posterior_latent_gp_and_likelihood_with_optimization(self): + def test_posterior_latent_gp_and_likelihood_without_optimization_cuda(self): + if torch.cuda.is_available(): + self.test_posterior_latent_gp_and_likelihood_without_optimization(cuda=True) + + def test_posterior_latent_gp_and_likelihood_with_optimization(self, cuda=False): + train_x, test_x, train_y, test_y = self._get_data(cuda=cuda) # We're manually going to set the hyperparameters to something they shouldn't be likelihood = GaussianLikelihood(noise_prior=SmoothedBoxPrior(exp(-3), exp(3), sigma=0.1)) gp_model = ExactGPModel(train_x, train_y, likelihood) @@ -105,6 +125,10 @@ def test_posterior_latent_gp_and_likelihood_with_optimization(self): gp_model.mean_module.initialize(constant=0) likelihood.initialize(log_noise=1) + if cuda: + gp_model.cuda() + likelihood.cuda() + # Find optimal model hyperparameters gp_model.train() likelihood.train() @@ -135,7 +159,12 @@ def test_posterior_latent_gp_and_likelihood_with_optimization(self): self.assertLess(mean_abs_error.item(), 0.05) - def test_posterior_latent_gp_and_likelihood_fast_pred_var(self): + def test_posterior_latent_gp_and_likelihood_with_optimization_cuda(self): + if torch.cuda.is_available(): + self.test_posterior_latent_gp_and_likelihood_with_optimization(cuda=True) + + def test_posterior_latent_gp_and_likelihood_fast_pred_var(self, cuda=False): + train_x, test_x, train_y, test_y = self._get_data(cuda=cuda) with gpytorch.fast_pred_var(), gpytorch.settings.debug(False): # We're manually going to set the hyperparameters to # something they shouldn't be @@ -146,6 +175,10 @@ def test_posterior_latent_gp_and_likelihood_fast_pred_var(self): gp_model.mean_module.initialize(constant=0) likelihood.initialize(log_noise=1) + if cuda: + gp_model.cuda() + likelihood.cuda() + # Find optimal model hyperparameters gp_model.train() likelihood.train() @@ -175,53 +208,17 @@ def test_posterior_latent_gp_and_likelihood_fast_pred_var(self): # Now bump up the likelihood to something huge # This will make it easy to calculate the variance - likelihood.raw_noise.data.fill_(3) + likelihood.noise_covar.raw_noise.data.fill_(3) test_function_predictions = likelihood(gp_model(train_x)) - noise = likelihood.noise + noise = likelihood.noise_covar.noise var_diff = (test_function_predictions.variance - noise).abs() self.assertLess(torch.max(var_diff / noise), 0.05) - def test_posterior_latent_gp_and_likelihood_with_optimization_cuda(self): + def test_posterior_latent_gp_and_likelihood_fast_pred_var_cuda(self): if torch.cuda.is_available(): - # We're manually going to set the hyperparameters to - # something they shouldn't be - likelihood = GaussianLikelihood(noise_prior=SmoothedBoxPrior(exp(-3), exp(3), sigma=0.1)).cuda() - gp_model = ExactGPModel(train_x.cuda(), train_y.cuda(), likelihood).cuda() - mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, gp_model) - gp_model.covar_module.base_kernel.initialize(log_lengthscale=1) - gp_model.mean_module.initialize(constant=0) - likelihood.initialize(log_noise=1) - - # Find optimal model hyperparameters - gp_model.train() - likelihood.train() - optimizer = optim.Adam(gp_model.parameters(), lr=0.1) - optimizer.n_iter = 0 - for _ in range(50): - optimizer.zero_grad() - output = gp_model(train_x.cuda()) - loss = -mll(output, train_y.cuda()) - loss.backward() - optimizer.n_iter += 1 - optimizer.step() - - for param in gp_model.parameters(): - self.assertTrue(param.grad is not None) - self.assertGreater(param.grad.norm().item(), 0) - for param in likelihood.parameters(): - self.assertTrue(param.grad is not None) - self.assertGreater(param.grad.norm().item(), 0) - optimizer.step() - - # Test the model - gp_model.eval() - likelihood.eval() - test_function_predictions = likelihood(gp_model(test_x.cuda())) - mean_abs_error = torch.mean(torch.abs(test_y.cuda() - test_function_predictions.mean)) - - self.assertLess(mean_abs_error.item(), 0.05) + self.test_posterior_latent_gp_and_likelihood_fast_pred_var(cuda=True) if __name__ == "__main__": diff --git a/test/examples/test_white_noise_regression.py b/test/examples/test_white_noise_regression.py index 4421a7af3..333b150ff 100644 --- a/test/examples/test_white_noise_regression.py +++ b/test/examples/test_white_noise_regression.py @@ -1,26 +1,18 @@ #!/usr/bin/env python3 -from math import exp, pi - import os import random -import torch import unittest +from math import exp, pi + import gpytorch -from torch import optim -from gpytorch.kernels import RBFKernel, WhiteNoiseKernel, ScaleKernel +import torch +from gpytorch.distributions import MultivariateNormal +from gpytorch.kernels import RBFKernel, ScaleKernel, WhiteNoiseKernel from gpytorch.likelihoods import GaussianLikelihood from gpytorch.means import ConstantMean from gpytorch.priors import SmoothedBoxPrior -from gpytorch.distributions import MultivariateNormal - - -# Simple training data: let's try to learn a sine function -train_x = torch.linspace(0, 1, 11) -train_y = torch.sin(train_x * (2 * pi)) - -test_x = torch.linspace(0, 1, 51) -test_y = torch.sin(test_x * (2 * pi)) +from torch import optim class ExactGPModel(gpytorch.models.ExactGP): @@ -50,7 +42,17 @@ def tearDown(self): if hasattr(self, "rng_state"): torch.set_rng_state(self.rng_state) - def test_posterior_latent_gp_and_likelihood_without_optimization(self): + def _get_data(self, cuda=False): + device = torch.device("cuda") if cuda else torch.device("cpu") + # Simple training data: let's try to learn a sine function + train_x = torch.linspace(0, 1, 11, device=device) + train_y = torch.sin(train_x * (2 * pi)) + test_x = torch.linspace(0, 1, 51, device=device) + test_y = torch.sin(test_x * (2 * pi)) + return train_x, test_x, train_y, test_y + + def test_posterior_latent_gp_and_likelihood_without_optimization(self, cuda=False): + train_x, test_x, train_y, test_y = self._get_data(cuda=cuda) with gpytorch.settings.debug(False): # We're manually going to set the hyperparameters to be ridiculous likelihood = GaussianLikelihood(noise_prior=SmoothedBoxPrior(exp(-10), exp(10), sigma=0.25)) @@ -63,6 +65,10 @@ def test_posterior_latent_gp_and_likelihood_without_optimization(self): gp_model.mean_module.initialize(constant=0) likelihood.initialize(log_noise=-10) + if cuda: + gp_model.cuda() + likelihood.cuda() + # Compute posterior distribution gp_model.eval() likelihood.eval() @@ -75,12 +81,17 @@ def test_posterior_latent_gp_and_likelihood_without_optimization(self): self.assertLess(torch.norm(function_predictions.variance), 5e-3) # It shouldn't fit much else though - test_function_predictions = gp_model(torch.tensor([1.1], dtype=torch.float)) + test_function_predictions = gp_model(torch.tensor([1.1]).type_as(test_x)) self.assertLess(torch.norm(test_function_predictions.mean - 0), 1e-4) self.assertLess(torch.norm(test_function_predictions.variance - gp_model.covar_module.outputscale), 1e-4) - def test_posterior_latent_gp_and_likelihood_with_optimization(self): + def test_posterior_latent_gp_and_likelihood_without_optimization_cuda(self): + if torch.cuda.is_available(): + self.test_posterior_latent_gp_and_likelihood_without_optimization(cuda=True) + + def test_posterior_latent_gp_and_likelihood_with_optimization(self, cuda=False): + train_x, test_x, train_y, test_y = self._get_data(cuda=cuda) # We're manually going to set the hyperparameters to something they shouldn't be likelihood = GaussianLikelihood(noise_prior=SmoothedBoxPrior(exp(-3), exp(3), sigma=0.1)) gp_model = ExactGPModel(train_x, train_y, likelihood) @@ -89,9 +100,14 @@ def test_posterior_latent_gp_and_likelihood_with_optimization(self): gp_model.mean_module.initialize(constant=0) likelihood.initialize(log_noise=1) + if cuda: + gp_model.cuda() + likelihood.cuda() + # Find optimal model hyperparameters gp_model.train() likelihood.train() + optimizer = optim.Adam(list(gp_model.parameters()) + list(likelihood.parameters()), lr=0.1) optimizer.n_iter = 0 with gpytorch.settings.debug(False): @@ -119,10 +135,14 @@ def test_posterior_latent_gp_and_likelihood_with_optimization(self): self.assertLess(mean_abs_error.squeeze().item(), 0.05) - def test_posterior_latent_gp_and_likelihood_fast_pred_var(self): + def test_posterior_latent_gp_and_likelihood_with_optimization_cuda(self): + if torch.cuda.is_available(): + self.test_posterior_latent_gp_and_likelihood_with_optimization(cuda=True) + + def test_posterior_latent_gp_and_likelihood_fast_pred_var(self, cuda=False): + train_x, test_x, train_y, test_y = self._get_data(cuda=cuda) with gpytorch.fast_pred_var(), gpytorch.settings.debug(False): - # We're manually going to set the hyperparameters to - # something they shouldn't be + # We're manually going to set the hyperparameters to something they shouldn't be likelihood = GaussianLikelihood(noise_prior=SmoothedBoxPrior(exp(-3), exp(3), sigma=0.1)) gp_model = ExactGPModel(train_x, train_y, likelihood) mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, gp_model) @@ -130,6 +150,10 @@ def test_posterior_latent_gp_and_likelihood_fast_pred_var(self): gp_model.mean_module.initialize(constant=0) likelihood.initialize(log_noise=1) + if cuda: + gp_model.cuda() + likelihood.cuda() + # Find optimal model hyperparameters gp_model.train() likelihood.train() @@ -167,46 +191,9 @@ def test_posterior_latent_gp_and_likelihood_fast_pred_var(self): self.assertLess(torch.max(var_diff / noise), 0.05) - def test_posterior_latent_gp_and_likelihood_with_optimization_cuda(self): + def test_posterior_latent_gp_and_likelihood_fast_pred_var_cuda(self): if torch.cuda.is_available(): - # We're manually going to set the hyperparameters to - # something they shouldn't be - likelihood = GaussianLikelihood(noise_prior=SmoothedBoxPrior(exp(-3), exp(3), sigma=0.1)).cuda() - gp_model = ExactGPModel(train_x.cuda(), train_y.cuda(), likelihood).cuda() - mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, gp_model) - gp_model.rbf_covar_module.initialize(log_lengthscale=1) - gp_model.mean_module.initialize(constant=0) - likelihood.initialize(log_noise=1) - - # Find optimal model hyperparameters - gp_model.train() - likelihood.train() - optimizer = optim.Adam(gp_model.parameters(), lr=0.1) - optimizer.n_iter = 0 - with gpytorch.settings.debug(False): - for _ in range(50): - optimizer.zero_grad() - output = gp_model(train_x.cuda()) - loss = -mll(output, train_y.cuda()) - loss.backward() - optimizer.n_iter += 1 - optimizer.step() - - for param in gp_model.parameters(): - self.assertTrue(param.grad is not None) - self.assertGreater(param.grad.norm().item(), 0) - for param in likelihood.parameters(): - self.assertTrue(param.grad is not None) - self.assertGreater(param.grad.norm().item(), 0) - optimizer.step() - - # Test the model - gp_model.eval() - likelihood.eval() - test_function_predictions = likelihood(gp_model(test_x.cuda())) - mean_abs_error = torch.mean(torch.abs(test_y.cuda() - test_function_predictions.mean)) - - self.assertLess(mean_abs_error.squeeze().item(), 0.05) + self.test_posterior_latent_gp_and_likelihood_fast_pred_var(cuda=True) if __name__ == "__main__": diff --git a/test/likelihoods/test_general_multitask_gaussian_likelihood.py b/test/likelihoods/test_general_multitask_gaussian_likelihood.py index 05ded52a3..ef265bf51 100644 --- a/test/likelihoods/test_general_multitask_gaussian_likelihood.py +++ b/test/likelihoods/test_general_multitask_gaussian_likelihood.py @@ -8,7 +8,7 @@ import gpytorch import torch from gpytorch.kernels import MultitaskKernel, RBFKernel -from gpytorch.likelihoods import MultitaskGaussianLikelihood +from gpytorch.likelihoods import MultitaskGaussianLikelihoodKronecker from gpytorch.means import ConstantMean, MultitaskMean from gpytorch.distributions import MultitaskMultivariateNormal @@ -53,7 +53,7 @@ def tearDown(self): torch.set_rng_state(self.rng_state) def test_multitask_low_rank_noise_covar(self): - likelihood = MultitaskGaussianLikelihood(num_tasks=2, rank=1) + likelihood = MultitaskGaussianLikelihoodKronecker(num_tasks=2, rank=1) model = MultitaskGPModel(train_x, train_y, likelihood) # Find optimal model hyperparameters model.train() @@ -83,10 +83,10 @@ def test_multitask_low_rank_noise_covar(self): num_tasks = 2 task_noise_covar_factor = likelihood.task_noise_covar_factor - log_noise = likelihood.log_noise + noise = likelihood.noise task_noise_covar = task_noise_covar_factor.matmul( task_noise_covar_factor.transpose(-1, -2) - ) + log_noise.exp() * torch.eye(num_tasks) + ) + noise * torch.eye(num_tasks) self.assertGreater(task_noise_covar[0, 0, 1].item(), 0.05)