Fix Remove bandwidth parameter and add gamma to EigenPro (#36)

Alex7Li · rth · commit 86a233924533 · 2019-10-16T09:58:08.000+02:00
diff --git a/.gitignore b/.gitignore
@@ -74,4 +74,4 @@ target/
 
 # Pycharm
 .idea
-venv/
+venv/
diff --git a/benchmarks/_bench/eigenpro_plot_mnist.py b/benchmarks/_bench/eigenpro_plot_mnist.py
@@ -30,23 +30,15 @@
 
 train_sizes = [500, 1000, 2000, 5000, 10000, 20000, 40000, 60000]
 
-bandwidth = 5.0
-
+gamma = 0.02
 # Fit models to data
 for train_size in train_sizes:
     for name, estimator in [
         (
             "EigenPro",
-            EigenProClassifier(
-                n_epoch=2, bandwidth=bandwidth, random_state=rng
-            ),
-        ),
-        (
-            "SupportVector",
-            SVC(
-                C=5, gamma=1.0 / (2 * bandwidth * bandwidth), random_state=rng
-            ),
+            EigenProClassifier(n_epoch=2, gamma=gamma, random_state=rng),
         ),
+        ("SupportVector", SVC(C=5, gamma=gamma, random_state=rng)),
     ]:
         stime = time()
         estimator.fit(x_train[:train_size], y_train[:train_size])
diff --git a/benchmarks/_bench/eigenpro_plot_noisy_mnist.py b/benchmarks/_bench/eigenpro_plot_noisy_mnist.py
@@ -35,17 +35,16 @@
 
 train_sizes = [500, 1000, 2000, 5000, 10000, 20000, 40000, 60000]
 
-bandwidth = 5.0
+gamma = 0.02
+
 # Fit models to data
 for train_size in train_sizes:
     for name, estimator in [
         (
             "EigenPro",
-            EigenProClassifier(
-                n_epoch=2, bandwidth=bandwidth, random_state=rng
-            ),
+            EigenProClassifier(n_epoch=2, gamma=gamma, random_state=rng),
         ),
-        ("SupportVector", SVC(C=5, gamma=1.0 / (2 * bandwidth * bandwidth))),
+        ("SupportVector", SVC(C=5, gamma=gamma)),
     ]:
         stime = time()
         estimator.fit(x_train[:train_size], y_train[:train_size])
diff --git a/benchmarks/_bench/eigenpro_plot_synthetic.py b/benchmarks/_bench/eigenpro_plot_synthetic.py
@@ -35,20 +35,20 @@
 
 train_sizes = [2000, 5000, 10000, 20000, 50000]
 
-bandwidth = 10.0
+gamma = 0.005
 for train_size in train_sizes:
     for name, estimator in [
         (
             "EigenPro",
             EigenProClassifier(
                 n_epoch=3,
-                bandwidth=bandwidth,
+                gamma=gamma,
                 n_components=30,
                 subsample_size=1000,
                 random_state=rng,
             ),
         ),
-        ("SupportVector", SVC(C=5, gamma=1.0 / (2 * bandwidth * bandwidth))),
+        ("SupportVector", SVC(C=5, gamma=gamma)),
     ]:
         stime = time()
         estimator.fit(x_train[:train_size], y_train[:train_size])
diff --git a/doc/api.rst b/doc/api.rst
@@ -16,13 +16,6 @@ Kernel approximation
 EigenPro
 ========
 
-.. currentmodule:: doc
-
-.. toctree::
-   modules/eigenpro
-
-.. currentmodule:: sklearn_extra
-
 .. autosummary::
    :toctree: generated/
    :template: class.rst
diff --git a/doc/modules/eigenpro.rst b/doc/modules/eigenpro.rst
@@ -25,7 +25,7 @@ Notably, on the full MNIST training and testing using EigenPro are
 approximately 2 times and 5 times faster than that using SVC, respectively.
 
 .. |mnist| image:: ../images/eigenpro_mnist.png
-    :target: ../auto_examples/eigenpro/plot_mnist.html
+    :target: ../auto_examples/eigenpro/eigenpro_mnist.html
     :scale: 70
 
 .. centered:: |mnist|
@@ -36,8 +36,8 @@ We see that EigenPro has a significant advantage over SVC
 on this noisy MNIST. Training and testing using EigenPro are
 both 10 to 20 times faster than they are when using SVC.
 
-.. |mnist_noisy| image:: ../images/eigenpro_noisy_mnist.png
-    :target: ../auto_examples/eigenpro/plot_noisy_mnist.html
+.. |mnist_noisy| image:: ../images/eigenpro_mnist_noisy.png
+    :target: ../auto_examples/eigenpro/eigenpro_mnist_noisy.html
     :scale: 70
 
 .. centered:: |mnist_noisy|
@@ -48,7 +48,7 @@ with 400 synthetic features. Again, EigenPro demonstrates 10~20 times
 acceleration on training and testing without loss of accuracy.
 
 .. |synthetic| image:: ../images/eigenpro_synthetic.png
-    :target: ../auto_examples/eigenpro/plot_synthetic.html
+    :target: ../auto_examples/eigenpro/eigenpro_synthetic.html
     :scale: 70
 
 .. centered:: |synthetic|
diff --git a/doc/user_guide.rst b/doc/user_guide.rst
@@ -2,15 +2,16 @@
 
 .. _user_guide:
 
-.. toctree::
-     :numbered:
-
-     modules/eigenpro.rst
 
 ==========
 User guide
 ==========
 
+.. toctree::
+     :numbered:
+
+     modules/eigenpro.rst
+
 .. _k_medoids:
 
 K-Medoids
diff --git a/examples/eigenpro/README.txt b/examples/eigenpro/README.txt
@@ -3,4 +3,4 @@
 Eigenpro
 ========
 
-Examples concerning the :mod:`sklearn.fast_kernel` module.
+Examples concerning the :mod:`sklearn_extra.kernel_methods.eigenpro` module.
diff --git a/examples/eigenpro/plot_eigenpro_synthetic.py b/examples/eigenpro/plot_eigenpro_synthetic.py
@@ -9,7 +9,7 @@
 of EigenPro after two epochs.
 While EigenPro is slower on low dimensional datasets, as
 the number of features exceeds 500, it begins to outperform
-SVM in terms of both time and training error.
+SVM and shows more stability.
 """
 print(__doc__)
 
@@ -35,8 +35,8 @@
 svc_pred_times = []
 svc_err = []
 
-feature_counts = [15, 50, 150, 500, 1500]
-bandwidth = 8.0
+feature_counts = [20, 50, 150, 500, 1500]
+gamma = 0.008
 
 # Fit models to data
 for n_features in feature_counts:
@@ -54,16 +54,10 @@
         (
             "EigenPro",
             EigenProClassifier(
-                n_epoch=2,
-                bandwidth=bandwidth,
-                n_components=400,
-                random_state=rng,
+                n_epoch=2, gamma=gamma, n_components=400, random_state=rng
             ),
         ),
-        (
-            "SupportVector",
-            SVC(gamma=1.0 / (2 * bandwidth * bandwidth), random_state=rng),
-        ),
+        ("SupportVector", SVC(gamma=gamma, random_state=rng)),
     ]:
         stime = time()
         estimator.fit(x_train, y_train)
diff --git a/sklearn_extra/kernel_methods/_eigenpro.py b/sklearn_extra/kernel_methods/_eigenpro.py
@@ -22,8 +22,7 @@ def __init__(
         n_components=1000,
         subsample_size="auto",
         kernel="rbf",
-        bandwidth=5,
-        gamma=None,
+        gamma="scale",
         degree=3,
         coef0=1,
         kernel_params=None,
@@ -34,7 +33,6 @@ def __init__(
         self.n_components = n_components
         self.subsample_size = subsample_size
         self.kernel = kernel
-        self.bandwidth = bandwidth
         self.gamma = gamma
         self.degree = degree
         self.coef0 = coef0
@@ -66,25 +64,23 @@ def _kernel(self, X, Y):
                 params = self.kernel_params or {}
             else:
                 params = {
-                    "gamma": np.float32(
-                        0.5 / (self.bandwidth * self.bandwidth)
-                    ),
+                    "gamma": self.gamma_,
                     "degree": self.degree,
                     "coef0": self.coef0,
                 }
             return pairwise_kernels(
                 X, Y, metric=self.kernel, filter_params=True, **params
             )
         distance = euclidean_distances(X, Y, squared=True)
-        bandwidth = np.float32(self.bandwidth)
+        bandwidth = np.float32(1.0 / np.sqrt(2.0 * self.gamma_))
         if self.kernel == "rbf":
-            distance = distance / (-2.0 * bandwidth * bandwidth)
+            distance = -self.gamma_ * distance
             K = np.exp(distance)
         elif self.kernel == "laplace":
             d = np.maximum(distance, 0)
             K = np.exp(-np.sqrt(d) / bandwidth)
         else:  # self.kernel == "cauchy":
-            K = 1 / (1 + distance / (bandwidth * bandwidth))
+            K = 1 / (1 + 2.0 * self.gamma_ * distance)
         return K
 
     def _nystrom_svd(self, X, n_components):
@@ -247,6 +243,10 @@ def _initialize_params(self, X, Y, random_state):
         pinx = random_state.choice(n, sample_size, replace=False).astype(
             "int32"
         )
+        if self.gamma == "scale":
+            self.gamma_ = np.float32(1.0 / (X.var() * d))
+        else:
+            self.gamma_ = self.gamma
         max_S, beta, E, Lambda = self._setup(
             X[pinx], n_components, mG, alpha=0.95
         )
@@ -294,9 +294,9 @@ def validate_parameters(self):
             raise ValueError(
                 "batch_size should be positive, was " + str(self.batch_size)
             )
-        if self.bandwidth <= 0:
+        if self.gamma != "scale" and self.gamma <= 0:
             raise ValueError(
-                "bandwidth should be positive, was " + str(self.bandwidth)
+                "gamma should be positive, was " + str(self.gamma)
             )
 
     def _raw_fit(self, X, Y):
@@ -369,7 +369,9 @@ def _raw_predict(self, X):
         Y : {float, array}, shape = [n_samples, n_targets]
             Predicted targets.
         """
-        check_is_fitted(self, ["bs_", "centers_", "coef_", "was_1D_"])
+        check_is_fitted(
+            self, ["bs_", "centers_", "coef_", "was_1D_", "gamma_"]
+        )
         X = np.asarray(X, dtype=np.float64)
 
         if len(X.shape) == 1:
@@ -428,11 +430,11 @@ class EigenProRegressor(BaseEigenPro, RegressorMixin):
         rbf, laplace, and cauchy kernels. If a callable is given, it should
         accept two arguments and return a floating point number.
 
-    bandwidth : float, default=5
-        Bandwidth to use with the given kernel. For kernels that use gamma,
-        gamma = .5/(bandwidth^2). Interpretation of the default value is left to
-        the kernel; see the documentation for sklearn.metrics.pairwise.
-        Ignored by other kernels.
+    gamma : float, default='scale'
+        Kernel coefficient. If 'scale', gamma = 1/(n_features*X.var()).
+        Interpretation of the default value is left to the kernel;
+        see the documentation for sklearn.metrics.pairwise.
+        For kernels that use bandwidth, bandwidth = 1/sqrt(2*gamma).
 
     degree : float, default=3
         Degree of the polynomial kernel. Ignored by other kernels.
@@ -466,11 +468,11 @@ class EigenProRegressor(BaseEigenPro, RegressorMixin):
     >>> rng = np.random.RandomState(1)
     >>> x_train = rng.randn(n_samples, n_features)
     >>> y_train = rng.randn(n_samples, n_targets)
-    >>> rgs = EigenProRegressor(n_epoch=3, bandwidth=1, subsample_size=50)
+    >>> rgs = EigenProRegressor(n_epoch=3, gamma=.5, subsample_size=50)
     >>> rgs.fit(x_train, y_train)
-    EigenProRegressor(bandwidth=1, batch_size='auto', coef0=1, degree=3, gamma=None,
-                      kernel='rbf', kernel_params=None, n_components=1000,
-                      n_epoch=3, random_state=None, subsample_size=50)
+    EigenProRegressor(batch_size='auto', coef0=1, degree=3, gamma=0.5, kernel='rbf',
+                      kernel_params=None, n_components=1000, n_epoch=3,
+                      random_state=None, subsample_size=50)
     >>> y_pred = rgs.predict(x_train)
     >>> loss = np.mean(np.square(y_train - y_pred))
     """
@@ -482,8 +484,7 @@ def __init__(
         n_components=1000,
         subsample_size="auto",
         kernel="rbf",
-        bandwidth=5,
-        gamma=None,
+        gamma="scale",
         degree=3,
         coef0=1,
         kernel_params=None,
@@ -495,7 +496,6 @@ def __init__(
             n_components=n_components,
             subsample_size=subsample_size,
             kernel=kernel,
-            bandwidth=bandwidth,
             gamma=gamma,
             degree=degree,
             coef0=coef0,
@@ -543,17 +543,11 @@ class EigenProClassifier(BaseEigenPro, ClassifierMixin):
         rbf, laplace, and cauchy kernels. If a callable is given, it should
         accept two arguments and return a floating point number.
 
-    bandwidth : float, default=5
-        Bandwidth to use with the given kernel. For kernels that use gamma,
-        gamma = .5/(bandwidth^2). Interpretation of the default value is left to
-        the kernel; see the documentation for sklearn.metrics.pairwise.
-        Ignored by other kernels.
-
-    gamma : float, default=None
-        Gamma parameter for the RBF, polynomial, exponential chi2
-        and sigmoid kernels. Interpretation of the default value is left
-        to the kernel; see the documentation for
-        sklearn.metrics.pairwise. Ignored by other kernels.
+    gamma : float, default='scale'
+        Kernel coefficient. If 'scale', gamma = 1/(n_features*X.var()).
+        Interpretation of the default value is left to the kernel;
+        see the documentation for sklearn.metrics.pairwise.
+        For kernels that use bandwidth, bandwidth = 1/sqrt(2*gamma).
 
     degree : float, default=3
         Degree of the polynomial kernel. Ignored by other kernels.
@@ -588,12 +582,11 @@ class EigenProClassifier(BaseEigenPro, ClassifierMixin):
     >>> rng = np.random.RandomState(1)
     >>> x_train = rng.randn(n_samples, n_features)
     >>> y_train = rng.randint(n_targets, size=n_samples)
-    >>> rgs = EigenProClassifier(n_epoch=3, bandwidth=1, subsample_size=50)
+    >>> rgs = EigenProClassifier(n_epoch=3, gamma=.01, subsample_size=50)
     >>> rgs.fit(x_train, y_train)
-    EigenProClassifier(bandwidth=1, batch_size='auto', coef0=1, degree=3,
-                       gamma=None, kernel='rbf', kernel_params=None,
-                       n_components=1000, n_epoch=3, random_state=None,
-                       subsample_size=50)
+    EigenProClassifier(batch_size='auto', coef0=1, degree=3, gamma=0.01,
+                       kernel='rbf', kernel_params=None, n_components=1000,
+                       n_epoch=3, random_state=None, subsample_size=50)
     >>> y_pred = rgs.predict(x_train)
     >>> loss = np.mean(y_train != y_pred)
     """
@@ -605,8 +598,7 @@ def __init__(
         n_components=1000,
         subsample_size="auto",
         kernel="rbf",
-        bandwidth=5,
-        gamma=None,
+        gamma=0.02,
         degree=3,
         coef0=1,
         kernel_params=None,
@@ -618,7 +610,6 @@ def __init__(
             n_components=n_components,
             subsample_size=subsample_size,
             kernel=kernel,
-            bandwidth=bandwidth,
             gamma=gamma,
             degree=degree,
             coef0=coef0,
diff --git a/sklearn_extra/kernel_methods/tests/test_eigenpro.py b/sklearn_extra/kernel_methods/tests/test_eigenpro.py

-Original file line number
+Diff line change
 # Pycharm
 .idea
 -venv/
 +venv/