Skip to content

Commit 86a2339

Browse files
Alex7Lirth
authored andcommitted
Fix Remove bandwidth parameter and add gamma to EigenPro (#36)
1 parent 12fb111 commit 86a2339

File tree

11 files changed

+81
-113
lines changed

11 files changed

+81
-113
lines changed

.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,4 +74,4 @@ target/
7474

7575
# Pycharm
7676
.idea
77-
venv/
77+
venv/

benchmarks/_bench/eigenpro_plot_mnist.py

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -30,23 +30,15 @@
3030

3131
train_sizes = [500, 1000, 2000, 5000, 10000, 20000, 40000, 60000]
3232

33-
bandwidth = 5.0
34-
33+
gamma = 0.02
3534
# Fit models to data
3635
for train_size in train_sizes:
3736
for name, estimator in [
3837
(
3938
"EigenPro",
40-
EigenProClassifier(
41-
n_epoch=2, bandwidth=bandwidth, random_state=rng
42-
),
43-
),
44-
(
45-
"SupportVector",
46-
SVC(
47-
C=5, gamma=1.0 / (2 * bandwidth * bandwidth), random_state=rng
48-
),
39+
EigenProClassifier(n_epoch=2, gamma=gamma, random_state=rng),
4940
),
41+
("SupportVector", SVC(C=5, gamma=gamma, random_state=rng)),
5042
]:
5143
stime = time()
5244
estimator.fit(x_train[:train_size], y_train[:train_size])

benchmarks/_bench/eigenpro_plot_noisy_mnist.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -35,17 +35,16 @@
3535

3636
train_sizes = [500, 1000, 2000, 5000, 10000, 20000, 40000, 60000]
3737

38-
bandwidth = 5.0
38+
gamma = 0.02
39+
3940
# Fit models to data
4041
for train_size in train_sizes:
4142
for name, estimator in [
4243
(
4344
"EigenPro",
44-
EigenProClassifier(
45-
n_epoch=2, bandwidth=bandwidth, random_state=rng
46-
),
45+
EigenProClassifier(n_epoch=2, gamma=gamma, random_state=rng),
4746
),
48-
("SupportVector", SVC(C=5, gamma=1.0 / (2 * bandwidth * bandwidth))),
47+
("SupportVector", SVC(C=5, gamma=gamma)),
4948
]:
5049
stime = time()
5150
estimator.fit(x_train[:train_size], y_train[:train_size])

benchmarks/_bench/eigenpro_plot_synthetic.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,20 +35,20 @@
3535

3636
train_sizes = [2000, 5000, 10000, 20000, 50000]
3737

38-
bandwidth = 10.0
38+
gamma = 0.005
3939
for train_size in train_sizes:
4040
for name, estimator in [
4141
(
4242
"EigenPro",
4343
EigenProClassifier(
4444
n_epoch=3,
45-
bandwidth=bandwidth,
45+
gamma=gamma,
4646
n_components=30,
4747
subsample_size=1000,
4848
random_state=rng,
4949
),
5050
),
51-
("SupportVector", SVC(C=5, gamma=1.0 / (2 * bandwidth * bandwidth))),
51+
("SupportVector", SVC(C=5, gamma=gamma)),
5252
]:
5353
stime = time()
5454
estimator.fit(x_train[:train_size], y_train[:train_size])

doc/api.rst

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -16,13 +16,6 @@ Kernel approximation
1616
EigenPro
1717
========
1818

19-
.. currentmodule:: doc
20-
21-
.. toctree::
22-
modules/eigenpro
23-
24-
.. currentmodule:: sklearn_extra
25-
2619
.. autosummary::
2720
:toctree: generated/
2821
:template: class.rst

doc/modules/eigenpro.rst

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ Notably, on the full MNIST training and testing using EigenPro are
2525
approximately 2 times and 5 times faster than that using SVC, respectively.
2626

2727
.. |mnist| image:: ../images/eigenpro_mnist.png
28-
:target: ../auto_examples/eigenpro/plot_mnist.html
28+
:target: ../auto_examples/eigenpro/eigenpro_mnist.html
2929
:scale: 70
3030

3131
.. centered:: |mnist|
@@ -36,8 +36,8 @@ We see that EigenPro has a significant advantage over SVC
3636
on this noisy MNIST. Training and testing using EigenPro are
3737
both 10 to 20 times faster than they are when using SVC.
3838

39-
.. |mnist_noisy| image:: ../images/eigenpro_noisy_mnist.png
40-
:target: ../auto_examples/eigenpro/plot_noisy_mnist.html
39+
.. |mnist_noisy| image:: ../images/eigenpro_mnist_noisy.png
40+
:target: ../auto_examples/eigenpro/eigenpro_mnist_noisy.html
4141
:scale: 70
4242

4343
.. centered:: |mnist_noisy|
@@ -48,7 +48,7 @@ with 400 synthetic features. Again, EigenPro demonstrates 10~20 times
4848
acceleration on training and testing without loss of accuracy.
4949

5050
.. |synthetic| image:: ../images/eigenpro_synthetic.png
51-
:target: ../auto_examples/eigenpro/plot_synthetic.html
51+
:target: ../auto_examples/eigenpro/eigenpro_synthetic.html
5252
:scale: 70
5353

5454
.. centered:: |synthetic|

doc/user_guide.rst

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,16 @@
22

33
.. _user_guide:
44

5-
.. toctree::
6-
:numbered:
7-
8-
modules/eigenpro.rst
95

106
==========
117
User guide
128
==========
139

10+
.. toctree::
11+
:numbered:
12+
13+
modules/eigenpro.rst
14+
1415
.. _k_medoids:
1516

1617
K-Medoids

examples/eigenpro/README.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,4 @@
33
Eigenpro
44
========
55

6-
Examples concerning the :mod:`sklearn.fast_kernel` module.
6+
Examples concerning the :mod:`sklearn_extra.kernel_methods.eigenpro` module.

examples/eigenpro/plot_eigenpro_synthetic.py

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
of EigenPro after two epochs.
1010
While EigenPro is slower on low dimensional datasets, as
1111
the number of features exceeds 500, it begins to outperform
12-
SVM in terms of both time and training error.
12+
SVM and shows more stability.
1313
"""
1414
print(__doc__)
1515

@@ -35,8 +35,8 @@
3535
svc_pred_times = []
3636
svc_err = []
3737

38-
feature_counts = [15, 50, 150, 500, 1500]
39-
bandwidth = 8.0
38+
feature_counts = [20, 50, 150, 500, 1500]
39+
gamma = 0.008
4040

4141
# Fit models to data
4242
for n_features in feature_counts:
@@ -54,16 +54,10 @@
5454
(
5555
"EigenPro",
5656
EigenProClassifier(
57-
n_epoch=2,
58-
bandwidth=bandwidth,
59-
n_components=400,
60-
random_state=rng,
57+
n_epoch=2, gamma=gamma, n_components=400, random_state=rng
6158
),
6259
),
63-
(
64-
"SupportVector",
65-
SVC(gamma=1.0 / (2 * bandwidth * bandwidth), random_state=rng),
66-
),
60+
("SupportVector", SVC(gamma=gamma, random_state=rng)),
6761
]:
6862
stime = time()
6963
estimator.fit(x_train, y_train)

sklearn_extra/kernel_methods/_eigenpro.py

Lines changed: 34 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,7 @@ def __init__(
2222
n_components=1000,
2323
subsample_size="auto",
2424
kernel="rbf",
25-
bandwidth=5,
26-
gamma=None,
25+
gamma="scale",
2726
degree=3,
2827
coef0=1,
2928
kernel_params=None,
@@ -34,7 +33,6 @@ def __init__(
3433
self.n_components = n_components
3534
self.subsample_size = subsample_size
3635
self.kernel = kernel
37-
self.bandwidth = bandwidth
3836
self.gamma = gamma
3937
self.degree = degree
4038
self.coef0 = coef0
@@ -66,25 +64,23 @@ def _kernel(self, X, Y):
6664
params = self.kernel_params or {}
6765
else:
6866
params = {
69-
"gamma": np.float32(
70-
0.5 / (self.bandwidth * self.bandwidth)
71-
),
67+
"gamma": self.gamma_,
7268
"degree": self.degree,
7369
"coef0": self.coef0,
7470
}
7571
return pairwise_kernels(
7672
X, Y, metric=self.kernel, filter_params=True, **params
7773
)
7874
distance = euclidean_distances(X, Y, squared=True)
79-
bandwidth = np.float32(self.bandwidth)
75+
bandwidth = np.float32(1.0 / np.sqrt(2.0 * self.gamma_))
8076
if self.kernel == "rbf":
81-
distance = distance / (-2.0 * bandwidth * bandwidth)
77+
distance = -self.gamma_ * distance
8278
K = np.exp(distance)
8379
elif self.kernel == "laplace":
8480
d = np.maximum(distance, 0)
8581
K = np.exp(-np.sqrt(d) / bandwidth)
8682
else: # self.kernel == "cauchy":
87-
K = 1 / (1 + distance / (bandwidth * bandwidth))
83+
K = 1 / (1 + 2.0 * self.gamma_ * distance)
8884
return K
8985

9086
def _nystrom_svd(self, X, n_components):
@@ -247,6 +243,10 @@ def _initialize_params(self, X, Y, random_state):
247243
pinx = random_state.choice(n, sample_size, replace=False).astype(
248244
"int32"
249245
)
246+
if self.gamma == "scale":
247+
self.gamma_ = np.float32(1.0 / (X.var() * d))
248+
else:
249+
self.gamma_ = self.gamma
250250
max_S, beta, E, Lambda = self._setup(
251251
X[pinx], n_components, mG, alpha=0.95
252252
)
@@ -294,9 +294,9 @@ def validate_parameters(self):
294294
raise ValueError(
295295
"batch_size should be positive, was " + str(self.batch_size)
296296
)
297-
if self.bandwidth <= 0:
297+
if self.gamma != "scale" and self.gamma <= 0:
298298
raise ValueError(
299-
"bandwidth should be positive, was " + str(self.bandwidth)
299+
"gamma should be positive, was " + str(self.gamma)
300300
)
301301

302302
def _raw_fit(self, X, Y):
@@ -369,7 +369,9 @@ def _raw_predict(self, X):
369369
Y : {float, array}, shape = [n_samples, n_targets]
370370
Predicted targets.
371371
"""
372-
check_is_fitted(self, ["bs_", "centers_", "coef_", "was_1D_"])
372+
check_is_fitted(
373+
self, ["bs_", "centers_", "coef_", "was_1D_", "gamma_"]
374+
)
373375
X = np.asarray(X, dtype=np.float64)
374376

375377
if len(X.shape) == 1:
@@ -428,11 +430,11 @@ class EigenProRegressor(BaseEigenPro, RegressorMixin):
428430
rbf, laplace, and cauchy kernels. If a callable is given, it should
429431
accept two arguments and return a floating point number.
430432
431-
bandwidth : float, default=5
432-
Bandwidth to use with the given kernel. For kernels that use gamma,
433-
gamma = .5/(bandwidth^2). Interpretation of the default value is left to
434-
the kernel; see the documentation for sklearn.metrics.pairwise.
435-
Ignored by other kernels.
433+
gamma : float, default='scale'
434+
Kernel coefficient. If 'scale', gamma = 1/(n_features*X.var()).
435+
Interpretation of the default value is left to the kernel;
436+
see the documentation for sklearn.metrics.pairwise.
437+
For kernels that use bandwidth, bandwidth = 1/sqrt(2*gamma).
436438
437439
degree : float, default=3
438440
Degree of the polynomial kernel. Ignored by other kernels.
@@ -466,11 +468,11 @@ class EigenProRegressor(BaseEigenPro, RegressorMixin):
466468
>>> rng = np.random.RandomState(1)
467469
>>> x_train = rng.randn(n_samples, n_features)
468470
>>> y_train = rng.randn(n_samples, n_targets)
469-
>>> rgs = EigenProRegressor(n_epoch=3, bandwidth=1, subsample_size=50)
471+
>>> rgs = EigenProRegressor(n_epoch=3, gamma=.5, subsample_size=50)
470472
>>> rgs.fit(x_train, y_train)
471-
EigenProRegressor(bandwidth=1, batch_size='auto', coef0=1, degree=3, gamma=None,
472-
kernel='rbf', kernel_params=None, n_components=1000,
473-
n_epoch=3, random_state=None, subsample_size=50)
473+
EigenProRegressor(batch_size='auto', coef0=1, degree=3, gamma=0.5, kernel='rbf',
474+
kernel_params=None, n_components=1000, n_epoch=3,
475+
random_state=None, subsample_size=50)
474476
>>> y_pred = rgs.predict(x_train)
475477
>>> loss = np.mean(np.square(y_train - y_pred))
476478
"""
@@ -482,8 +484,7 @@ def __init__(
482484
n_components=1000,
483485
subsample_size="auto",
484486
kernel="rbf",
485-
bandwidth=5,
486-
gamma=None,
487+
gamma="scale",
487488
degree=3,
488489
coef0=1,
489490
kernel_params=None,
@@ -495,7 +496,6 @@ def __init__(
495496
n_components=n_components,
496497
subsample_size=subsample_size,
497498
kernel=kernel,
498-
bandwidth=bandwidth,
499499
gamma=gamma,
500500
degree=degree,
501501
coef0=coef0,
@@ -543,17 +543,11 @@ class EigenProClassifier(BaseEigenPro, ClassifierMixin):
543543
rbf, laplace, and cauchy kernels. If a callable is given, it should
544544
accept two arguments and return a floating point number.
545545
546-
bandwidth : float, default=5
547-
Bandwidth to use with the given kernel. For kernels that use gamma,
548-
gamma = .5/(bandwidth^2). Interpretation of the default value is left to
549-
the kernel; see the documentation for sklearn.metrics.pairwise.
550-
Ignored by other kernels.
551-
552-
gamma : float, default=None
553-
Gamma parameter for the RBF, polynomial, exponential chi2
554-
and sigmoid kernels. Interpretation of the default value is left
555-
to the kernel; see the documentation for
556-
sklearn.metrics.pairwise. Ignored by other kernels.
546+
gamma : float, default='scale'
547+
Kernel coefficient. If 'scale', gamma = 1/(n_features*X.var()).
548+
Interpretation of the default value is left to the kernel;
549+
see the documentation for sklearn.metrics.pairwise.
550+
For kernels that use bandwidth, bandwidth = 1/sqrt(2*gamma).
557551
558552
degree : float, default=3
559553
Degree of the polynomial kernel. Ignored by other kernels.
@@ -588,12 +582,11 @@ class EigenProClassifier(BaseEigenPro, ClassifierMixin):
588582
>>> rng = np.random.RandomState(1)
589583
>>> x_train = rng.randn(n_samples, n_features)
590584
>>> y_train = rng.randint(n_targets, size=n_samples)
591-
>>> rgs = EigenProClassifier(n_epoch=3, bandwidth=1, subsample_size=50)
585+
>>> rgs = EigenProClassifier(n_epoch=3, gamma=.01, subsample_size=50)
592586
>>> rgs.fit(x_train, y_train)
593-
EigenProClassifier(bandwidth=1, batch_size='auto', coef0=1, degree=3,
594-
gamma=None, kernel='rbf', kernel_params=None,
595-
n_components=1000, n_epoch=3, random_state=None,
596-
subsample_size=50)
587+
EigenProClassifier(batch_size='auto', coef0=1, degree=3, gamma=0.01,
588+
kernel='rbf', kernel_params=None, n_components=1000,
589+
n_epoch=3, random_state=None, subsample_size=50)
597590
>>> y_pred = rgs.predict(x_train)
598591
>>> loss = np.mean(y_train != y_pred)
599592
"""
@@ -605,8 +598,7 @@ def __init__(
605598
n_components=1000,
606599
subsample_size="auto",
607600
kernel="rbf",
608-
bandwidth=5,
609-
gamma=None,
601+
gamma=0.02,
610602
degree=3,
611603
coef0=1,
612604
kernel_params=None,
@@ -618,7 +610,6 @@ def __init__(
618610
n_components=n_components,
619611
subsample_size=subsample_size,
620612
kernel=kernel,
621-
bandwidth=bandwidth,
622613
gamma=gamma,
623614
degree=degree,
624615
coef0=coef0,

0 commit comments

Comments
 (0)