Merge branch 'master' into dGPFantasize

yyexela · web-flow · commit f50a9f8de82d · 2023-05-02T15:38:20.000-06:00
diff --git a/.conda/meta.yaml b/.conda/meta.yaml
@@ -18,7 +18,7 @@ requirements:
   run:
     - pytorch>=1.11
     - scikit-learn
-    - linear_operator>=0.2.0
+    - linear_operator>=0.4.0
 
 test:
   imports:
diff --git a/docs/requirements.txt b/docs/requirements.txt
@@ -8,5 +8,5 @@ sphinx_autodoc_typehints
 nbsphinx
 m2r2
 pyro-ppl
-linear_operator>=0.2.0
+linear_operator>=0.4.0
 torch>=1.11
diff --git a/gpytorch/metrics/__init__.py b/gpytorch/metrics/__init__.py
@@ -4,11 +4,13 @@
     mean_standardized_log_loss,
     negative_log_predictive_density,
     quantile_coverage_error,
+    standardized_mean_squared_error,
 )
 
 __all__ = [
     "mean_absolute_error",
     "mean_squared_error",
+    "standardized_mean_squared_error",
     "mean_standardized_log_loss",
     "negative_log_predictive_density",
     "quantile_coverage_error",
diff --git a/gpytorch/metrics/metrics.py b/gpytorch/metrics/metrics.py
@@ -1,4 +1,5 @@
 from math import pi
+from typing import Optional
 
 import torch
 
@@ -12,7 +13,7 @@ def mean_absolute_error(
     test_y: torch.Tensor,
 ):
     """
-    Mean Absolute Error.
+    Mean absolute error.
     """
     combine_dim = -2 if isinstance(pred_dist, MultitaskMultivariateNormal) else -1
     return torch.abs(pred_dist.mean - test_y).mean(dim=combine_dim)
@@ -24,7 +25,7 @@ def mean_squared_error(
     squared: bool = True,
 ):
     """
-    Mean Squared Error.
+    Mean squared error.
     """
     combine_dim = -2 if isinstance(pred_dist, MultitaskMultivariateNormal) else -1
     res = torch.square(pred_dist.mean - test_y).mean(dim=combine_dim)
@@ -33,29 +34,59 @@ def mean_squared_error(
     return res
 
 
+def standardized_mean_squared_error(
+    pred_dist: MultivariateNormal,
+    test_y: torch.Tensor,
+):
+    """Standardized mean squared error.
+
+    Standardizes the mean squared error by the variance of the test data.
+    """
+    return mean_squared_error(pred_dist, test_y, squared=True) / test_y.var()
+
+
 def negative_log_predictive_density(
     pred_dist: MultivariateNormal,
     test_y: torch.Tensor,
 ):
+    """Negative log predictive density.
+
+    Computes the negative predictive log density normalized by the size of the test data.
+    """
     combine_dim = -2 if isinstance(pred_dist, MultitaskMultivariateNormal) else -1
     return -pred_dist.log_prob(test_y) / test_y.shape[combine_dim]
 
 
 def mean_standardized_log_loss(
     pred_dist: MultivariateNormal,
     test_y: torch.Tensor,
+    train_y: Optional[torch.Tensor] = None,
 ):
     """
-    Mean Standardized Log Loss.
-    Reference: Page No. 23,
-    Gaussian Processes for Machine Learning,
-    Carl Edward Rasmussen and Christopher K. I. Williams,
-    The MIT Press, 2006. ISBN 0-262-18253-X
+    Mean standardized log loss.
+
+    Computes the average *standardized* log loss, which subtracts the loss obtained
+    under the trivial model which predicts with the mean and variance of the training
+    data from the mean log loss. See p.23 of Rasmussen and Williams (2006).
+
+    If no training data is supplied, the mean log loss is computed.
     """
     combine_dim = -2 if isinstance(pred_dist, MultitaskMultivariateNormal) else -1
+
     f_mean = pred_dist.mean
     f_var = pred_dist.variance
-    return (0.5 * torch.log(2 * pi * f_var) + torch.square(test_y - f_mean) / (2 * f_var)).mean(dim=combine_dim)
+    loss_model = (0.5 * torch.log(2 * pi * f_var) + torch.square(test_y - f_mean) / (2 * f_var)).mean(dim=combine_dim)
+    res = loss_model
+
+    if train_y is not None:
+        data_mean = train_y.mean(dim=combine_dim)
+        data_var = train_y.var()
+        loss_trivial_model = (
+            0.5 * torch.log(2 * pi * data_var) + torch.square(test_y - data_mean) / (2 * data_var)
+        ).mean(dim=combine_dim)
+        res = res - loss_trivial_model
+
+    return res
 
 
 def quantile_coverage_error(
diff --git a/gpytorch/variational/nearest_neighbor_variational_strategy.py b/gpytorch/variational/nearest_neighbor_variational_strategy.py
@@ -293,7 +293,7 @@ def _stochastic_kl_helper(self, kl_indices: Tensor) -> Tensor:
 
         # compute interp_term
         cov = self.model.covar_module.forward(nearest_neighbors, nearest_neighbors)
-        cross_cov = self.model.covar_module.forward(nearest_neighbors, inducing_points.unsqueeze(-2))
+        cross_cov = to_dense(self.model.covar_module.forward(nearest_neighbors, inducing_points.unsqueeze(-2)))
         interp_term = torch.linalg.solve(
             cov + self.jitter_val * torch.eye(self.k, device=self.inducing_points.device), cross_cov
         ).squeeze(-1)
diff --git a/setup.py b/setup.py
@@ -40,7 +40,7 @@ def find_version(*file_paths):
 torch_min = "1.11"
 install_requires = [
     "scikit-learn",
-    "linear_operator>=0.2.0",
+    "linear_operator>=0.4.0",
 ]
 # if recent dev version of PyTorch is installed, no need to install stable
 try:
diff --git a/test/metrics/test_metrics.py b/test/metrics/test_metrics.py
@@ -14,6 +14,7 @@
     mean_standardized_log_loss,
     negative_log_predictive_density,
     quantile_coverage_error,
+    standardized_mean_squared_error,
 )
 from gpytorch.models import ExactGP
 
@@ -126,6 +127,9 @@ def test_negative_log_predictive_density(self):
     def test_mean_standardized_log_loss(self):
         self._test_metric(mean_standardized_log_loss)
 
+    def test_standardized_mean_squared_error(self):
+        self._test_metric(standardized_mean_squared_error)
+
     def test_quantile_coverage_error(self):
         self._test_metric(
             quantile_coverage_error,

Original file line number	Diff line number	Diff line change
`@@ -40,7 +40,7 @@ def find_version(*file_paths):`
`40`	`40`	`torch_min = "1.11"`
`41`	`41`	`install_requires = [`
`42`	`42`	`"scikit-learn",`
`43`		`- "linear_operator>=0.2.0",`
	`43`	`+ "linear_operator>=0.4.0",`
`44`	`44`	`]`
`45`	`45`	`# if recent dev version of PyTorch is installed, no need to install stable`
`46`	`46`	`try:`