Skip to content

Commit 7904318

Browse files
committed
Merge remote-tracking branch 'fastfood/master' into fastfood
2 parents 8694508 + a77ca4f commit 7904318

File tree

8 files changed

+1474
-0
lines changed

8 files changed

+1474
-0
lines changed
Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
"""
2+
===================================================================
3+
Recognizing hand-written digits using Fastfood kernel approximation
4+
===================================================================
5+
6+
This shows how the Fastfood kernel approximation compares to a dual and primal
7+
support vector classifier. It is based on the plot_digits_classification
8+
example of scikit-learn. The idea behind Fastfood is to map the data into a
9+
feature space (approximation) and then run a linear classifier on the mapped
10+
data.
11+
12+
13+
"""
14+
15+
print __doc__
16+
17+
# Author: Gael Varoquaux <gael dot varoquaux at normalesup dot org>
18+
# Modified By: Felix Maximilian Möller
19+
# License: Simplified BSD
20+
21+
# Standard scientific Python imports
22+
import numpy as np
23+
import pylab as pl
24+
25+
# Import datasets, classifiers and performance metrics
26+
from sklearn import datasets, svm, metrics
27+
from sklearn.kernel_approximation import Fastfood
28+
29+
# The digits dataset
30+
digits = datasets.load_digits()
31+
32+
# The data that we are interested in is made of 8x8 images of digits,
33+
# let's have a look at the first 3 images, stored in the `images`
34+
# attribute of the dataset. If we were working from image files, we
35+
# could load them using pylab.imread. For these images know which
36+
# digit they represent: it is given in the 'target' of the dataset.
37+
for index, (image, label) in enumerate(zip(digits.images, digits.target)[:4]):
38+
pl.subplot(2, 4, index + 1)
39+
pl.axis('off')
40+
pl.imshow(image, cmap=pl.cm.gray_r, interpolation='nearest')
41+
pl.title('Training: %i' % label)
42+
43+
# To apply an classifier on this data, we need to flatten the image, to
44+
# turn the data in a (samples, feature) matrix:
45+
n_samples = len(digits.images)
46+
data = digits.images.reshape((n_samples, -1))
47+
gamma = .001
48+
sigma = np.sqrt(1 / (2 * gamma))
49+
number_of_features_to_generate = 1000
50+
train__idx = range(n_samples / 2)
51+
test__idx = range(n_samples / 2, n_samples)
52+
53+
# map data into featurespace
54+
rbf_transform = Fastfood(sigma=sigma, n_components=number_of_features_to_generate)
55+
data_transformed_train = rbf_transform.fit_transform(data[train__idx])
56+
data_transformed_test = rbf_transform.transform(data[test__idx])
57+
58+
# Create a classifier: a support vector classifier
59+
classifier = svm.SVC(gamma=gamma)
60+
linear_classifier = svm.LinearSVC()
61+
linear_classifier_transformation = svm.LinearSVC()
62+
63+
# We learn the digits on the first half of the digits
64+
classifier.fit(data[train__idx], digits.target[train__idx])
65+
linear_classifier.fit(data[train__idx], digits.target[train__idx])
66+
67+
# Run the linear classifier on the mapped data.
68+
linear_classifier_transformation.fit(data_transformed_train, digits.target[train__idx])
69+
70+
# Now predict the value of the digit on the second half:
71+
expected = digits.target[test__idx]
72+
predicted = classifier.predict(data[test__idx])
73+
predicted_linear = linear_classifier.predict(data[test__idx])
74+
predicted_linear_transformed = linear_classifier_transformation.predict(data_transformed_test)
75+
76+
print "Classification report for dual classifier %s:\n%s\n" % (
77+
classifier, metrics.classification_report(expected, predicted))
78+
print "Classification report for primal linear classifier %s:\n%s\n" % (
79+
linear_classifier, metrics.classification_report(expected, predicted_linear))
80+
print "Classification report for primal transformation classifier %s:\n%s\n" % (
81+
linear_classifier_transformation, metrics.classification_report(expected, predicted_linear_transformed))
82+
83+
print "Confusion matrix for dual classifier:\n%s" % metrics.confusion_matrix(expected, predicted)
84+
print "Confusion matrix for primal linear classifier:\n%s" % metrics.confusion_matrix(expected, predicted_linear)
85+
print "Confusion matrix for for primal transformation classifier:\n%s" % metrics.confusion_matrix(expected, predicted_linear_transformed)
86+
87+
# assert_almost_equal(metrics.classification_report(expected, predicted),
88+
# metrics.classification_report(expected, predicted_linear_transformed),
89+
# decimal=1)
90+
91+
for index, (image, prediction) in enumerate(zip(digits.images[test__idx], predicted)[:4]):
92+
pl.subplot(2, 4, index + 5)
93+
pl.axis('off')
94+
pl.imshow(image, cmap=pl.cm.gray_r, interpolation='nearest')
95+
pl.title('Prediction: %i' % prediction)
96+
97+
pl.show()

examples/plot_kernel_approximation.py

Lines changed: 234 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,234 @@
1+
"""
2+
==================================================
3+
Explicit feature map approximation for RBF kernels
4+
==================================================
5+
6+
An example illustrating the approximation of the feature map
7+
of an RBF kernel.
8+
9+
.. currentmodule:: sklearn.kernel_approximation
10+
11+
It shows how to use :class:`Fastfood`, :class:`RBFSampler` and :class:`Nystroem` to
12+
approximate the feature map of an RBF kernel for classification with an SVM on
13+
the digits dataset. Results using a linear SVM in the original space, a linear
14+
SVM using the approximate mappings and using a kernelized SVM are compared.
15+
Timings and accuracy for varying amounts of Monte Carlo samplings (in the case
16+
of :class:`RBFSampler`, which uses random Fourier features) and different sized
17+
subsets of the training set (for :class:`Nystroem`) for the approximate mapping
18+
are shown.
19+
20+
Please note that the dataset here is not large enough to show the benefits
21+
of kernel approximation, as the exact SVM is still reasonably fast.
22+
23+
Sampling more dimensions clearly leads to better classification results, but
24+
comes at a greater cost. This means there is a tradeoff between runtime and
25+
accuracy, given by the parameter n_components. Note that solving the Linear
26+
SVM and also the approximate kernel SVM could be greatly accelerated by using
27+
stochastic gradient descent via :class:`sklearn.linear_model.SGDClassifier`.
28+
This is not easily possible for the case of the kernelized SVM.
29+
30+
The second plot visualized the decision surfaces of the RBF kernel SVM and
31+
the linear SVM with approximate kernel maps.
32+
The plot shows decision surfaces of the classifiers projected onto
33+
the first two principal components of the data. This visualization should
34+
be taken with a grain of salt since it is just an interesting slice through
35+
the decision surface in 64 dimensions. In particular note that
36+
a datapoint (represented as a dot) does not necessarily be classified
37+
into the region it is lying in, since it will not lie on the plane
38+
that the first two principal components span.
39+
40+
The usage of :class:`Fastfood`, :class:`RBFSampler` and :class:`Nystroem` is described in detail
41+
in :ref:`kernel_approximation`.
42+
43+
"""
44+
print(__doc__)
45+
46+
# Author: Gael Varoquaux <gael dot varoquaux at normalesup dot org>
47+
# Andreas Mueller <[email protected]>
48+
# License: BSD 3 clause
49+
50+
# Standard scientific Python imports
51+
import matplotlib.pyplot as plt
52+
import numpy as np
53+
from time import time
54+
55+
# Import datasets, classifiers and performance metrics
56+
from sklearn import datasets, svm, pipeline
57+
from sklearn.kernel_approximation import (RBFSampler,
58+
Nystroem, Fastfood)
59+
from sklearn.decomposition import PCA
60+
61+
# The digits dataset
62+
digits = datasets.load_digits(n_class=9)
63+
64+
# To apply an classifier on this data, we need to flatten the image, to
65+
# turn the data in a (samples, feature) matrix:
66+
n_samples = len(digits.data)
67+
data = digits.data / 16.
68+
data -= data.mean(axis=0)
69+
70+
# We learn the digits on the first half of the digits
71+
data_train, targets_train = data[:n_samples / 2], digits.target[:n_samples / 2]
72+
73+
74+
# Now predict the value of the digit on the second half:
75+
data_test, targets_test = data[n_samples / 2:], digits.target[n_samples / 2:]
76+
#data_test = scaler.transform(data_test)
77+
78+
# fix model parameters:
79+
GAMMA = .2
80+
SIGMA = np.sqrt(1 / (2 * GAMMA))
81+
82+
# Create a classifier: a support vector classifier
83+
kernel_svm = svm.SVC(gamma=GAMMA)
84+
linear_svm = svm.LinearSVC()
85+
86+
# create pipeline from kernel approximation
87+
# and linear svm
88+
feature_map_fastfood = Fastfood(sigma=SIGMA, tradeoff_mem_accuracy='mem', random_state=1)
89+
feature_map_fourier = RBFSampler(gamma=GAMMA, random_state=1)
90+
feature_map_nystroem = Nystroem(gamma=GAMMA, random_state=1)
91+
fastfood_approx_svm = pipeline.Pipeline([("feature_map", feature_map_fastfood),
92+
("svm", svm.LinearSVC())])
93+
94+
fourier_approx_svm = pipeline.Pipeline([("feature_map", feature_map_fourier),
95+
("svm", svm.LinearSVC())])
96+
97+
nystroem_approx_svm = pipeline.Pipeline([("feature_map", feature_map_nystroem),
98+
("svm", svm.LinearSVC())])
99+
100+
# fit and predict using linear and kernel svm:
101+
102+
kernel_svm_time = time()
103+
kernel_svm.fit(data_train, targets_train)
104+
kernel_svm_score = kernel_svm.score(data_test, targets_test)
105+
kernel_svm_time = time() - kernel_svm_time
106+
107+
linear_svm_time = time()
108+
linear_svm.fit(data_train, targets_train)
109+
linear_svm_score = linear_svm.score(data_test, targets_test)
110+
linear_svm_time = time() - linear_svm_time
111+
112+
sample_sizes = 30 * np.arange(1, 14)
113+
fastfood_scores = []
114+
fourier_scores = []
115+
nystroem_scores = []
116+
fastfood_times = []
117+
fourier_times = []
118+
nystroem_times = []
119+
120+
for D in sample_sizes:
121+
fastfood_approx_svm.set_params(feature_map__n_components=D)
122+
fourier_approx_svm.set_params(feature_map__n_components=D)
123+
nystroem_approx_svm.set_params(feature_map__n_components=D)
124+
start = time()
125+
fastfood_approx_svm.fit(data_train, targets_train)
126+
fastfood_times.append(time() - start)
127+
128+
start = time()
129+
nystroem_approx_svm.fit(data_train, targets_train)
130+
nystroem_times.append(time() - start)
131+
132+
start = time()
133+
fourier_approx_svm.fit(data_train, targets_train)
134+
fourier_times.append(time() - start)
135+
136+
fastfood_score = fastfood_approx_svm.score(data_test, targets_test)
137+
fourier_score = fourier_approx_svm.score(data_test, targets_test)
138+
nystroem_score = nystroem_approx_svm.score(data_test, targets_test)
139+
fastfood_scores.append(fastfood_score)
140+
nystroem_scores.append(nystroem_score)
141+
fourier_scores.append(fourier_score)
142+
143+
# plot the results:
144+
plt.figure(figsize=(8, 8))
145+
accuracy = plt.subplot(211)
146+
# second y axis for timings
147+
timescale = plt.subplot(212)
148+
149+
accuracy.plot(sample_sizes, nystroem_scores, label="Nystroem approx. kernel")
150+
timescale.plot(sample_sizes, nystroem_times, '--',
151+
label='Nystroem approx. kernel')
152+
153+
accuracy.plot(sample_sizes, fourier_scores, label="Fourier approx. kernel")
154+
timescale.plot(sample_sizes, fourier_times, '--',
155+
label='Fourier approx. kernel')
156+
157+
accuracy.plot(sample_sizes, fastfood_scores, label="Fastfood approx. kernel")
158+
timescale.plot(sample_sizes, fastfood_times, '--',
159+
label='Fastfood approx. kernel')
160+
161+
# horizontal lines for exact rbf and linear kernels:
162+
accuracy.plot([sample_sizes[0], sample_sizes[-1]],
163+
[linear_svm_score, linear_svm_score], label="linear svm")
164+
timescale.plot([sample_sizes[0], sample_sizes[-1]],
165+
[linear_svm_time, linear_svm_time], '--', label='linear svm')
166+
167+
accuracy.plot([sample_sizes[0], sample_sizes[-1]],
168+
[kernel_svm_score, kernel_svm_score], label="rbf svm")
169+
timescale.plot([sample_sizes[0], sample_sizes[-1]],
170+
[kernel_svm_time, kernel_svm_time], '--', label='rbf svm')
171+
172+
# vertical line for dataset dimensionality = 64
173+
accuracy.plot([64, 64], [0.7, 1], label="n_features")
174+
175+
# legends and labels
176+
accuracy.set_title("Classification accuracy")
177+
timescale.set_title("Training times for dataset size of " + str(n_samples) + " with dimensionality of "
178+
+ str(np.size(data, 1)))
179+
accuracy.set_xlim(sample_sizes[0], sample_sizes[-1])
180+
accuracy.set_xticks(())
181+
accuracy.set_ylim(np.min(fourier_scores), 1)
182+
timescale.set_xlabel("Sampling steps = transformed feature dimension")
183+
accuracy.set_ylabel("Classification accuracy")
184+
timescale.set_ylabel("Training time in seconds")
185+
accuracy.legend(loc='best')
186+
timescale.legend(loc='best')
187+
188+
# visualize the decision surface, projected down to the first
189+
# two principal components of the dataset
190+
pca = PCA(n_components=8).fit(data_train)
191+
192+
X = pca.transform(data_train)
193+
194+
# Gemerate grid along first two principal components
195+
multiples = np.arange(-2, 2, 0.1)
196+
# steps along first component
197+
first = multiples[:, np.newaxis] * pca.components_[0, :]
198+
# steps along second component
199+
second = multiples[:, np.newaxis] * pca.components_[1, :]
200+
# combine
201+
grid = first[np.newaxis, :, :] + second[:, np.newaxis, :]
202+
flat_grid = grid.reshape(-1, data.shape[1])
203+
204+
# title for the plots
205+
titles = ['SVC with rbf kernel',
206+
'SVC (linear kernel)\n with Fastfood rbf feature map\n'
207+
'n_components=100',
208+
'SVC (linear kernel)\n with Fourier rbf feature map\n'
209+
'n_components=100',
210+
'SVC (linear kernel)\n with Nystroem rbf feature map\n'
211+
'n_components=100']
212+
213+
plt.tight_layout()
214+
plt.figure(figsize=(12, 5))
215+
216+
# predict and plot
217+
for i, clf in enumerate((kernel_svm, fastfood_approx_svm, nystroem_approx_svm,
218+
fourier_approx_svm)):
219+
# Plot the decision boundary. For that, we will assign a color to each
220+
# point in the mesh [x_min, m_max]x[y_min, y_max].
221+
plt.subplot(1, 4, i + 1)
222+
Z = clf.predict(flat_grid)
223+
224+
# Put the result into a color plot
225+
Z = Z.reshape(grid.shape[:-1])
226+
plt.contourf(multiples, multiples, Z, cmap=plt.cm.Paired)
227+
plt.axis('off')
228+
229+
# Plot also the training points
230+
plt.scatter(X[:, 0], X[:, 1], c=targets_train, cmap=plt.cm.Paired)
231+
232+
plt.title(titles[i])
233+
plt.tight_layout()
234+
plt.show()

examples/rks_vs_fastfood.py

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
import datetime
2+
3+
import numpy as np
4+
5+
from sklearn.kernel_approximation import Fastfood
6+
from sklearn.kernel_approximation import RBFSampler
7+
from sklearn.utils.testing import assert_greater
8+
9+
"""compares the performance of Fastfood and RKS"""
10+
# generate data
11+
rng = np.random.RandomState(0)
12+
X = rng.random_sample(size=(1000, 4096))
13+
Y = rng.random_sample(size=(10000, 4096))
14+
X /= X.sum(axis=1)[:, np.newaxis]
15+
Y /= Y.sum(axis=1)[:, np.newaxis]
16+
17+
# calculate feature maps
18+
gamma = 10.
19+
sigma = np.sqrt(1 / (2 * gamma))
20+
number_of_features_to_generate = 4096*4
21+
22+
exact_start = datetime.datetime.utcnow()
23+
# original rbf kernel method:
24+
# rbf_kernel(X, X, gamma=gamma)
25+
# rbf_kernel(X, Y, gamma=gamma)
26+
exact_end = datetime.datetime.utcnow()
27+
exact_spent_time = exact_end - exact_start
28+
print "Timimg exact rbf: \t\t", exact_spent_time
29+
30+
rbf_transform = Fastfood(sigma=sigma,
31+
n_components=number_of_features_to_generate,
32+
tradeoff_mem_accuracy='mem',
33+
random_state=42)
34+
_ = rbf_transform.fit(X)
35+
fastfood_fast_vec_start = datetime.datetime.utcnow()
36+
# Fastfood: approximate kernel mapping
37+
_ = rbf_transform.transform(X)
38+
_ = rbf_transform.transform(Y)
39+
fastfood_fast_vec_end = datetime.datetime.utcnow()
40+
fastfood_fast_vec_spent_time = fastfood_fast_vec_end - \
41+
fastfood_fast_vec_start
42+
print "Timimg fastfood fast vectorized: \t\t", fastfood_fast_vec_spent_time
43+
44+
rks_rbf_transform = RBFSampler(gamma=gamma,
45+
n_components=number_of_features_to_generate,
46+
random_state=42)
47+
_ = rks_rbf_transform.fit(X)
48+
rks_start = datetime.datetime.utcnow()
49+
# Random Kitchens Sinks: approximate kernel mapping
50+
_ = rks_rbf_transform.transform(X)
51+
_ = rks_rbf_transform.transform(Y)
52+
rks_end = datetime.datetime.utcnow()
53+
rks_spent_time = rks_end - rks_start
54+
print "Timimg rks: \t\t\t", rks_spent_time
55+
56+
assert_greater(rks_spent_time, fastfood_fast_vec_spent_time)

fastfood_ml/_version.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
__version__ = '0.1.0'

0 commit comments

Comments
 (0)