Skip to content

Commit eaa814f

Browse files
authored
Merge pull request #1 from rth/fastfood
Add Fastfood algorithm
2 parents 22f78c6 + c5e9bbc commit eaa814f

File tree

14 files changed

+859
-7
lines changed

14 files changed

+859
-7
lines changed

.circleci/config.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ jobs:
1414
conda update --yes --quiet conda
1515
conda create -n testenv --yes --quiet python=3
1616
source activate testenv
17-
conda install --yes pip numpy scipy scikit-learn matplotlib sphinx sphinx_rtd_theme numpydoc pillow
17+
conda install --yes pip numpy scipy scikit-learn matplotlib sphinx sphinx_rtd_theme numpydoc pillow cython nomkl
1818
pip install sphinx-gallery
1919
pip install .
2020
cd doc

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@ __pycache__/
33
*.py[cod]
44
*$py.class
55

6+
*.c
7+
68
# C extensions
79
*.so
810

.travis.yml

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@ matrix:
1515
SKLEARN_VERSION="0.20.2"
1616
- env: PYTHON_VERSION="3.7" NUMPY_VERSION="*" SCIPY_VERSION="*"
1717
SKLEARN_VERSION="*"
18+
- env: PYTHON_VERSION="3.7" NUMPY_VERSION="*" SCIPY_VERSION="*"
19+
SKLEARN_VERSION="nightly"
1820

1921
install:
2022
# install miniconda
@@ -27,7 +29,14 @@ install:
2729
# create the testing environment
2830
- conda create -n testenv --yes python=$PYTHON_VERSION pip
2931
- source activate testenv
30-
- conda install --yes numpy==$NUMPY_VERSION scipy==$SCIPY_VERSION scikit-learn==$SKLEARN_VERSION nose pytest pytest-cov
32+
- |
33+
if [ $SKLEARN_VERSION = "nightly" ]; then
34+
conda install --yes numpy==$NUMPY_VERSION scipy==$SCIPY_VERSION cython nose pytest pytest-cov
35+
# install nightly wheels
36+
pip install --pre -f https://sklearn-nightly.scdn8.secure.raxcdn.com scikit-learn
37+
else
38+
conda install --yes numpy==$NUMPY_VERSION scipy==$SCIPY_VERSION scikit-learn==$SKLEARN_VERSION cython nose pytest pytest-cov
39+
fi
3140
- pip install codecov
3241
- pip install .
3342

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
import datetime
2+
3+
import numpy as np
4+
5+
from sklearn.kernel_approximation import Fastfood
6+
from sklearn.kernel_approximation import RBFSampler
7+
from sklearn.utils.testing import assert_greater
8+
9+
"""compares the performance of Fastfood and RKS"""
10+
# generate data
11+
rng = np.random.RandomState(0)
12+
X = rng.random_sample(size=(1000, 4096))
13+
Y = rng.random_sample(size=(10000, 4096))
14+
X /= X.sum(axis=1)[:, np.newaxis]
15+
Y /= Y.sum(axis=1)[:, np.newaxis]
16+
17+
# calculate feature maps
18+
gamma = 10.
19+
sigma = np.sqrt(1 / (2 * gamma))
20+
number_of_features_to_generate = 4096*4
21+
22+
exact_start = datetime.datetime.utcnow()
23+
# original rbf kernel method:
24+
# rbf_kernel(X, X, gamma=gamma)
25+
# rbf_kernel(X, Y, gamma=gamma)
26+
exact_end = datetime.datetime.utcnow()
27+
exact_spent_time = exact_end - exact_start
28+
print("Timimg exact rbf: \t\t", exact_spent_time)
29+
30+
rbf_transform = Fastfood(sigma=sigma,
31+
n_components=number_of_features_to_generate,
32+
tradeoff_mem_accuracy='mem',
33+
random_state=42)
34+
_ = rbf_transform.fit(X)
35+
fastfood_fast_vec_start = datetime.datetime.utcnow()
36+
# Fastfood: approximate kernel mapping
37+
_ = rbf_transform.transform(X)
38+
_ = rbf_transform.transform(Y)
39+
fastfood_fast_vec_end = datetime.datetime.utcnow()
40+
fastfood_fast_vec_spent_time = fastfood_fast_vec_end - \
41+
fastfood_fast_vec_start
42+
print("Timimg fastfood fast vectorized: \t\t", fastfood_fast_vec_spent_time)
43+
44+
rks_rbf_transform = RBFSampler(gamma=gamma,
45+
n_components=number_of_features_to_generate,
46+
random_state=42)
47+
_ = rks_rbf_transform.fit(X)
48+
rks_start = datetime.datetime.utcnow()
49+
# Random Kitchens Sinks: approximate kernel mapping
50+
_ = rks_rbf_transform.transform(X)
51+
_ = rks_rbf_transform.transform(Y)
52+
rks_end = datetime.datetime.utcnow()
53+
rks_spent_time = rks_end - rks_start
54+
print("Timimg rks: \t\t\t", rks_spent_time)
55+
56+
assert_greater(rks_spent_time, fastfood_fast_vec_spent_time)
Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
"""
2+
===================================================================
3+
Recognizing hand-written digits using Fastfood kernel approximation
4+
===================================================================
5+
6+
This shows how the Fastfood kernel approximation compares to a dual and primal
7+
support vector classifier. It is based on the plot_digits_classification
8+
example of scikit-learn. The idea behind Fastfood is to map the data into a
9+
feature space (approximation) and then run a linear classifier on the mapped
10+
data.
11+
12+
13+
"""
14+
15+
print(__doc__)
16+
17+
# Author: Gael Varoquaux <gael dot varoquaux at normalesup dot org>
18+
# Modified By: Felix Maximilian Möller
19+
# License: Simplified BSD
20+
21+
# Standard scientific Python imports
22+
import numpy as np
23+
import pylab as pl
24+
25+
# Import datasets, classifiers and performance metrics
26+
from sklearn import datasets, svm, metrics
27+
28+
from sklearn_extra.kernel_approximation import Fastfood
29+
30+
# The digits dataset
31+
digits = datasets.load_digits()
32+
33+
# The data that we are interested in is made of 8x8 images of digits,
34+
# let's have a look at the first 3 images, stored in the `images`
35+
# attribute of the dataset. If we were working from image files, we
36+
# could load them using pylab.imread. For these images know which
37+
# digit they represent: it is given in the 'target' of the dataset.
38+
for index, (image, label) in enumerate(zip(digits.images, digits.target)):
39+
pl.subplot(2, 4, index + 1)
40+
pl.axis('off')
41+
pl.imshow(image, cmap=pl.cm.gray_r, interpolation='nearest')
42+
pl.title('Training: %i' % label)
43+
if index > 3:
44+
break
45+
46+
# To apply an classifier on this data, we need to flatten the image, to
47+
# turn the data in a (samples, feature) matrix:
48+
n_samples = len(digits.images)
49+
data = digits.images.reshape((n_samples, -1))
50+
gamma = .001
51+
sigma = np.sqrt(1 / (2 * gamma))
52+
number_of_features_to_generate = 1000
53+
train__idx = range(n_samples // 2)
54+
test__idx = range(n_samples // 2, n_samples)
55+
56+
# map data into featurespace
57+
rbf_transform = Fastfood(
58+
sigma=sigma, n_components=number_of_features_to_generate)
59+
data_transformed_train = rbf_transform.fit_transform(data[train__idx])
60+
data_transformed_test = rbf_transform.transform(data[test__idx])
61+
62+
# Create a classifier: a support vector classifier
63+
classifier = svm.SVC(gamma=gamma)
64+
linear_classifier = svm.LinearSVC()
65+
linear_classifier_transformation = svm.LinearSVC()
66+
67+
# We learn the digits on the first half of the digits
68+
classifier.fit(data[train__idx], digits.target[train__idx])
69+
linear_classifier.fit(data[train__idx], digits.target[train__idx])
70+
71+
# Run the linear classifier on the mapped data.
72+
linear_classifier_transformation.fit(
73+
data_transformed_train, digits.target[train__idx])
74+
75+
# Now predict the value of the digit on the second half:
76+
expected = digits.target[test__idx]
77+
predicted = classifier.predict(data[test__idx])
78+
predicted_linear = linear_classifier.predict(data[test__idx])
79+
predicted_linear_transformed = linear_classifier_transformation.predict(
80+
data_transformed_test)
81+
82+
print("Classification report for dual classifier %s:\n%s\n"
83+
% (classifier, metrics.classification_report(expected, predicted)))
84+
print("Classification report for primal linear classifier %s:\n%s\n"
85+
% (linear_classifier,
86+
metrics.classification_report(expected, predicted_linear)))
87+
print(
88+
"Classification report for primal transformation classifier %s:\n%s\n"
89+
% (linear_classifier_transformation,
90+
metrics.classification_report(expected, predicted_linear_transformed)))
91+
92+
print("Confusion matrix for dual classifier:\n%s"
93+
% metrics.confusion_matrix(expected, predicted))
94+
print("Confusion matrix for primal linear classifier:\n%s"
95+
% metrics.confusion_matrix(expected, predicted_linear))
96+
print("Confusion matrix for for primal transformation classifier:\n%s"
97+
% metrics.confusion_matrix(expected, predicted_linear_transformed))
98+
99+
for index, (image, prediction) in enumerate(
100+
zip(digits.images[test__idx], predicted)):
101+
pl.subplot(2, 4, index + 4)
102+
pl.axis('off')
103+
pl.imshow(image, cmap=pl.cm.gray_r, interpolation='nearest')
104+
pl.title('Prediction: %i' % prediction)
105+
if index > 3:
106+
break
107+
108+
pl.show()

0 commit comments

Comments
 (0)