Added Piecewise Polynomial Kernel (#1738)

RSid8 · gpleiss · web-flow · commit a6c5b022b3c1 · 2021-08-31T13:41:55.000Z
Co-authored-by: Geoff Pleiss &lt;gpleiss@gmail.com&gt;
diff --git a/.gitignore b/.gitignore
@@ -7,7 +7,7 @@
 
 # VS Code settings stuff
 .vscode
-
+.pylintrc
 # Project specific
 gpytorch/libfft
 .pytest_cache
diff --git a/docs/source/kernels.rst b/docs/source/kernels.rst
@@ -53,6 +53,12 @@ Standard Kernels
 .. autoclass:: PeriodicKernel
    :members:
 
+:hidden:`PiecewisePolynomialKernel`
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. autoclass:: PiecewisePolynomialKernel
+   :members:
+
 :hidden:`PolynomialKernel`
 ~~~~~~~~~~~~~~~~~~~~~~~~~~
 
diff --git a/gpytorch/kernels/__init__.py b/gpytorch/kernels/__init__.py
@@ -18,6 +18,7 @@
 from .multitask_kernel import MultitaskKernel
 from .newton_girard_additive_kernel import NewtonGirardAdditiveKernel
 from .periodic_kernel import PeriodicKernel
+from .piecewise_polynomial_kernel import PiecewisePolynomialKernel
 from .polynomial_kernel import PolynomialKernel
 from .polynomial_kernel_grad import PolynomialKernelGrad
 from .product_structure_kernel import ProductStructureKernel
@@ -50,6 +51,7 @@
     "MultitaskKernel",
     "NewtonGirardAdditiveKernel",
     "PeriodicKernel",
+    "PiecewisePolynomialKernel",
     "PolynomialKernel",
     "PolynomialKernelGrad",
     "ProductKernel",
diff --git a/gpytorch/kernels/piecewise_polynomial_kernel.py b/gpytorch/kernels/piecewise_polynomial_kernel.py
@@ -0,0 +1,118 @@
+import torch
+
+from .kernel import Kernel
+
+
+class PiecewisePolynomialKernel(Kernel):
+    r"""
+    Computes a covariance matrix based on the Piecewise Polynomial kernel
+    between inputs :math:`\mathbf{x_1}` and :math:`\mathbf{x_2}`:
+
+    .. math::
+
+        \begin{align}
+            r &= \left\Vert x1 - x2 \right\Vert \\
+            j &= \lfloor \frac{D}{2} \rfloor + q +1 \\
+            K_{\text{ppD, 0}}(\mathbf{x_1}, \mathbf{x_2}) &= (1-r)^j_+ , \\
+            K_{\text{ppD, 1}}(\mathbf{x_1}, \mathbf{x_2}) &= (1-r)^{j+1}_+ ((j + 1)r + 1), \\
+            K_{\text{ppD, 2}}(\mathbf{x_1}, \mathbf{x_2}) &= (1-r)^{j+2}_+ ((1 + (j+2)r +
+                \frac{j^2 + 4j + 3}{3}r^2), \\
+            K_{\text{ppD, 3}}(\mathbf{x_1}, \mathbf{x_2}) &= (1-r)^{j+3}_+
+                (1 + (j+3)r + \frac{6j^2 + 36j + 45}{15}r^2 +
+                \frac{j^3 + 9j^2 + 23j +15}{15}r^3) \\
+        \end{align}
+
+    where :math:`K_{\text{ppD, q}}` is positive semidefinite in :math:`\mathbb{R}^{D}` and
+    :math:`q` is the smoothness coefficient. See `Rasmussen and Williams (2006)`_ Equation 4.21.
+
+    .. note:: This kernel does not have an `outputscale` parameter. To add a scaling parameter,
+        decorate this kernel with a :class:`gpytorch.kernels.ScaleKernel`.
+
+    :param int q: (default= 2) The smoothness parameter.
+    :type q: int (0, 1, 2 or 3)
+    :param ard_num_dims: (Default: `None`) Set this if you want a separate lengthscale for each
+        input dimension. It should be `d` if :attr:`x1` is a `... x n x d` matrix.
+    :type ard_num_dims: int, optional
+    :param batch_shape: (Default: `None`) Set this if you want a separate lengthscale for each
+         batch of input data. It should be `torch.Size([b1, b2])` for a `b1 x b2 x n x m` kernel output.
+    :type batch_shape: torch.Size, optional
+    :param active_dims: (Default: `None`) Set this if you want to
+        compute the covariance of only a few input dimensions. The ints
+        corresponds to the indices of the dimensions.
+    :type active_dims: Tuple(int)
+    :param lengthscale_prior: (Default: `None`)
+        Set this if you want to apply a prior to the lengthscale parameter.
+    :type lengthscale_prior: ~gpytorch.priors.Prior, optional
+    :param lengthscale_constraint: (Default: `Positive`) Set this if you want
+        to apply a constraint to the lengthscale parameter.
+    :type lengthscale_constraint: ~gpytorch.constraints.Positive, optional
+    :param eps: (Default: 1e-6) The minimum value that the lengthscale can take (prevents divide by zero errors).
+    :type eps: float, optional
+
+    :var torch.Tensor lengthscale: The lengthscale parameter. Size/shape of parameter depends on the
+        :attr:`ard_num_dims` and :attr:`batch_shape` arguments.
+
+    .. _Rasmussen and Williams (2006):
+        http://www.gaussianprocess.org/gpml/
+
+    Example:
+        >>> x = torch.randn(10, 5)
+        >>> # Non-batch option
+        >>> covar_module = gpytorch.kernels.ScaleKernel(
+                                gpytorch.kernels.PiecewisePolynomialKernel(q = 2))
+        >>> # Non-batch: ARD (different lengthscale for each input dimension)
+        >>> covar_module = gpytorch.kernels.ScaleKernel(
+                            gpytorch.kernels.PiecewisePolynomialKernel(q = 2, ard_num_dims=5)
+                            )
+        >>> covar = covar_module(x)  # Output: LazyTensor of size (10 x 10)
+        >>> batch_x = torch.randn(2, 10, 5)
+        >>> # Batch: different lengthscale for each batch
+        >>> covar_module = gpytorch.kernels.ScaleKernel(
+            gpytorch.kernels.PiecewisePolynomialKernel(q = 2, batch_shape=torch.Size([2]))
+            )
+        >>> covar = covar_module(batch_x)  # Output: LazyTensor of size (2 x 10 x 10)
+    """
+    has_lengthscale = True
+
+    def __init__(self, q=2, **kwargs):
+        super(PiecewisePolynomialKernel, self).__init__(**kwargs)
+        if q not in {0, 1, 2, 3}:
+            raise ValueError("q expected to be 0, 1, 2 or 3")
+        self.q = q
+
+    def fmax(self, r, j, q):
+        return torch.max(torch.tensor(0.0), 1 - r).pow(j + q)
+
+    def get_cov(self, r, j, q):
+        if q == 0:
+            return 1
+        if q == 1:
+            return (j + 1) * r + 1
+        if q == 2:
+            return 1 + (j + 2) * r + ((j ** 2 + 4 * j + 3) / 3.0) * r ** 2
+        if q == 3:
+            return (
+                1
+                + (j + 3) * r
+                + ((6 * j ** 2 + 36 * j + 45) / 15.0) * r ** 2
+                + ((j ** 3 + 9 * j ** 2 + 23 * j + 15) / 15.0) * r ** 3
+            )
+
+    def forward(self, x1, x2, last_dim_is_batch=False, diag=False, **params):
+        x1_ = x1.div(self.lengthscale)
+        x2_ = x2.div(self.lengthscale)
+        if last_dim_is_batch is True:
+            D = x1.shape[1]
+        else:
+            D = x1.shape[-1]
+        j = torch.floor(torch.tensor(D / 2.0)) + self.q + 1
+        if last_dim_is_batch and diag:
+            r = self.covar_dist(x1_, x2_, last_dim_is_batch=True, diag=True)
+        elif diag:
+            r = self.covar_dist(x1_, x2_, diag=True)
+        elif last_dim_is_batch:
+            r = self.covar_dist(x1_, x2_, last_dim_is_batch=True)
+        else:
+            r = self.covar_dist(x1_, x2_)
+        cov_matrix = self.fmax(r, j, self.q) * self.get_cov(r, j, self.q)
+        return cov_matrix
diff --git a/test/kernels/test_piecewise_polynomial_kernel.py b/test/kernels/test_piecewise_polynomial_kernel.py
@@ -0,0 +1,103 @@
+#!/usr/bin/env python3
+
+import unittest
+
+import torch
+
+from gpytorch.kernels import PiecewisePolynomialKernel
+from gpytorch.test.base_kernel_test_case import BaseKernelTestCase
+
+
+class TestPiecewisePolynomialKernel(unittest.TestCase, BaseKernelTestCase):
+    def create_kernel_no_ard(self, **kwargs):
+        return PiecewisePolynomialKernel(q=2, **kwargs)
+
+    def test_computes_piecewise_polynomial_kernel(self):
+        a = torch.tensor([[4, 1], [2, 2], [8, 0]], dtype=torch.float)
+        b = torch.tensor([[0, 0], [2, 1], [1, 0]], dtype=torch.float)
+        kernel = PiecewisePolynomialKernel(q=0)
+        kernel.eval()
+
+        def test_r(a, b):
+            return torch.cdist(a, b)
+
+        def test_get_cov(r, j, q):
+            if q == 0:
+                return 1
+            if q == 1:
+                return (j + 1) * r + 1
+            if q == 2:
+                return 1 + (j + 2) * r + ((j ** 2 + 4 * j + 3) / 3.0) * r ** 2
+            if q == 3:
+                return (
+                    1
+                    + (j + 3) * r
+                    + ((6 * j ** 2 + 36 * j + 45) / 15.0) * r ** 2
+                    + ((j ** 3 + 9 * j ** 2 + 23 * j + 15) / 15.0) * r ** 3
+                )
+
+        def test_fmax(r, j, q):
+            return torch.max(torch.tensor(0.0), 1 - r).pow(j + q)
+
+        actual = torch.zeros(3, 3)
+        j = torch.floor(a / 2.0).shape[-1] + kernel.q + 1
+        r = test_r(a, b)
+        actual = test_fmax(r, j, kernel.q) * test_get_cov(r, j, kernel.q)
+        res = kernel(a, b).evaluate()
+        self.assertLess(torch.norm(res - actual), 1e-5)
+
+        # diag
+        actual = actual.diag()
+        res = kernel(a, b).diag()
+        self.assertLess(torch.norm(res - actual), 1e-5)
+
+        # batch_dims
+        actual = torch.zeros(2, 3, 3)
+        for i in range(2):
+            actual[i] = kernel(a[:, i].unsqueeze(-1), b[:, i].unsqueeze(-1)).evaluate()
+
+        res = kernel(a, b, last_dim_is_batch=True).evaluate()
+        self.assertLess(torch.norm(res - actual), 1e-5)
+
+        # batch_dims + diag
+        res = kernel(a, b, last_dim_is_batch=True).diag()
+        actual = torch.cat([actual[i].diag().unsqueeze(0) for i in range(actual.size(0))])
+        self.assertLess(torch.norm(res - actual), 1e-5)
+
+    def test_piecewise_polynomial_kernel_batch(self):
+        a = torch.tensor([[4, 2, 8], [1, 2, 3]], dtype=torch.float).view(2, 3, 1)
+        b = torch.tensor([[0, 2, 1], [-1, 2, 0]], dtype=torch.float).view(2, 3, 1)
+        kernel = PiecewisePolynomialKernel(q=0, batch_shape=torch.Size([2]))
+        kernel.eval()
+
+        def test_r(a, b):
+            return torch.cdist(a, b)
+
+        def test_get_cov(r, j, q):
+            if q == 0:
+                return 1
+            if q == 1:
+                return (j + 1) * r + 1
+            if q == 2:
+                return 1 + (j + 2) * r + ((j ** 2 + 4 * j + 3) / 3.0) * r ** 2
+            if q == 3:
+                return (
+                    1
+                    + (j + 3) * r
+                    + ((6 * j ** 2 + 36 * j + 45) / 15.0) * r ** 2
+                    + ((j ** 3 + 9 * j ** 2 + 23 * j + 15) / 15.0) * r ** 3
+                )
+
+        def test_fmax(r, j, q):
+            return torch.max(torch.tensor(0.0), 1 - r).pow(j + q)
+
+        actual = torch.zeros(3, 3)
+        j = torch.floor(a / 2.0).shape[-1] + kernel.q + 1
+        r = test_r(a, b)
+        actual = test_fmax(r, j, kernel.q) * test_get_cov(r, j, kernel.q)
+        res = kernel(a, b).evaluate()
+        self.assertLess(torch.norm(res - actual), 1e-5)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/test/kernels/test_polynomial_kernel.py b/test/kernels/test_polynomial_kernel.py
@@ -25,16 +25,15 @@ def test_computes_quadratic_kernel(self):
 
         res = kernel(a, b).evaluate()
         self.assertLess(torch.norm(res - actual), 1e-5)
-
         # diag
         res = kernel(a, b).diag()
         actual = actual.diag()
         self.assertLess(torch.norm(res - actual), 1e-5)
 
         # batch_dims
         actual = torch.zeros(2, 3, 3)
-        for l in range(2):
-            actual[l] = kernel(a[:, l].unsqueeze(-1), b[:, l].unsqueeze(-1)).evaluate()
+        for i in range(2):
+            actual[i] = kernel(a[:, i].unsqueeze(-1), b[:, i].unsqueeze(-1)).evaluate()
 
         res = kernel(a, b, last_dim_is_batch=True).evaluate()
         self.assertLess(torch.norm(res - actual), 1e-5)
@@ -65,8 +64,8 @@ def test_computes_cubic_kernel(self):
 
         # batch_dims
         actual = torch.zeros(2, 3, 3)
-        for l in range(2):
-            actual[l] = kernel(a[:, l].unsqueeze(-1), b[:, l].unsqueeze(-1)).evaluate()
+        for i in range(2):
+            actual[i] = kernel(a[:, i].unsqueeze(-1), b[:, i].unsqueeze(-1)).evaluate()
 
         res = kernel(a, b, last_dim_is_batch=True).evaluate()
         self.assertLess(torch.norm(res - actual), 1e-5)