scikit-learn-contrib
diff --git a/‎examples/plot_digits_classification_fastfood.py
Lines changed: 97 additions & 0 deletions b/‎examples/plot_digits_classification_fastfood.py
Lines changed: 97 additions & 0 deletions
diff --git a/‎examples/plot_kernel_approximation.py
Lines changed: 234 additions & 0 deletions b/‎examples/plot_kernel_approximation.py
Lines changed: 234 additions & 0 deletions
diff --git a/‎examples/rks_vs_fastfood.py
Lines changed: 56 additions & 0 deletions b/‎examples/rks_vs_fastfood.py
Lines changed: 56 additions & 0 deletions
diff --git a/‎fastfood_ml/_version.py
Lines changed: 1 addition & 0 deletions b/‎fastfood_ml/_version.py
Lines changed: 1 addition & 0 deletions
@@ -0,0 +1,97 @@
+"""
+===================================================================
+Recognizing hand-written digits using Fastfood kernel approximation
+===================================================================
+
+This shows how the Fastfood kernel approximation compares to a dual and primal
+support vector classifier. It is based on the plot_digits_classification
+example of scikit-learn. The idea behind Fastfood is to map the data into a
+feature space (approximation) and then run a linear classifier on the mapped
+data.
+
+
+"""
+
+print __doc__
+
+# Author: Gael Varoquaux <gael dot varoquaux at normalesup dot org>
+# Modified By: Felix Maximilian Möller
+# License: Simplified BSD
+
+# Standard scientific Python imports
+import numpy as np
+import pylab as pl
+
+# Import datasets, classifiers and performance metrics
+from sklearn import datasets, svm, metrics
+from sklearn.kernel_approximation import Fastfood
+
+# The digits dataset
+digits = datasets.load_digits()
+
+# The data that we are interested in is made of 8x8 images of digits,
+# let's have a look at the first 3 images, stored in the `images`
+# attribute of the dataset. If we were working from image files, we
+# could load them using pylab.imread. For these images know which
+# digit they represent: it is given in the 'target' of the dataset.
+for index, (image, label) in enumerate(zip(digits.images, digits.target)[:4]):
+    pl.subplot(2, 4, index + 1)
+    pl.axis('off')
+    pl.imshow(image, cmap=pl.cm.gray_r, interpolation='nearest')
+    pl.title('Training: %i' % label)
+
+# To apply an classifier on this data, we need to flatten the image, to
+# turn the data in a (samples, feature) matrix:
+n_samples = len(digits.images)
+data = digits.images.reshape((n_samples, -1))
+gamma = .001
+sigma = np.sqrt(1 / (2 * gamma))
+number_of_features_to_generate = 1000
+train__idx = range(n_samples / 2)
+test__idx = range(n_samples / 2, n_samples)
+
+# map data into featurespace
+rbf_transform = Fastfood(sigma=sigma, n_components=number_of_features_to_generate)
+data_transformed_train = rbf_transform.fit_transform(data[train__idx])
+data_transformed_test = rbf_transform.transform(data[test__idx])
+
+# Create a classifier: a support vector classifier
+classifier = svm.SVC(gamma=gamma)
+linear_classifier = svm.LinearSVC()
+linear_classifier_transformation = svm.LinearSVC()
+
+# We learn the digits on the first half of the digits
+classifier.fit(data[train__idx], digits.target[train__idx])
+linear_classifier.fit(data[train__idx], digits.target[train__idx])
+
+# Run the linear classifier on the mapped data.
+linear_classifier_transformation.fit(data_transformed_train, digits.target[train__idx])
+
+# Now predict the value of the digit on the second half:
+expected = digits.target[test__idx]
+predicted = classifier.predict(data[test__idx])
+predicted_linear = linear_classifier.predict(data[test__idx])
+predicted_linear_transformed = linear_classifier_transformation.predict(data_transformed_test)
+
+print "Classification report for dual classifier %s:\n%s\n" % (
+    classifier, metrics.classification_report(expected, predicted))
+print "Classification report for primal linear classifier %s:\n%s\n" % (
+    linear_classifier, metrics.classification_report(expected, predicted_linear))
+print "Classification report for primal transformation classifier %s:\n%s\n" % (
+    linear_classifier_transformation, metrics.classification_report(expected, predicted_linear_transformed))
+
+print "Confusion matrix for dual classifier:\n%s" % metrics.confusion_matrix(expected, predicted)
+print "Confusion matrix for primal linear classifier:\n%s" % metrics.confusion_matrix(expected, predicted_linear)
+print "Confusion matrix for for primal transformation classifier:\n%s" % metrics.confusion_matrix(expected, predicted_linear_transformed)
+
+# assert_almost_equal(metrics.classification_report(expected, predicted),
+#                     metrics.classification_report(expected, predicted_linear_transformed),
+#                     decimal=1)
+
+for index, (image, prediction) in enumerate(zip(digits.images[test__idx], predicted)[:4]):
+    pl.subplot(2, 4, index + 5)
+    pl.axis('off')
+    pl.imshow(image, cmap=pl.cm.gray_r, interpolation='nearest')
+    pl.title('Prediction: %i' % prediction)
+
+pl.show()
@@ -0,0 +1,234 @@
+"""
+==================================================
+Explicit feature map approximation for RBF kernels
+==================================================
+
+An example illustrating the approximation of the feature map
+of an RBF kernel.
+
+.. currentmodule:: sklearn.kernel_approximation
+
+It shows how to use :class:`Fastfood`, :class:`RBFSampler` and :class:`Nystroem` to
+approximate the feature map of an RBF kernel for classification with an SVM on
+the digits dataset. Results using a linear SVM in the original space, a linear
+SVM using the approximate mappings and using a kernelized SVM are compared.
+Timings and accuracy for varying amounts of Monte Carlo samplings (in the case
+of :class:`RBFSampler`, which uses random Fourier features) and different sized
+subsets of the training set (for :class:`Nystroem`) for the approximate mapping
+are shown.
+
+Please note that the dataset here is not large enough to show the benefits
+of kernel approximation, as the exact SVM is still reasonably fast.
+
+Sampling more dimensions clearly leads to better classification results, but
+comes at a greater cost. This means there is a tradeoff between runtime and
+accuracy, given by the parameter n_components. Note that solving the Linear
+SVM and also the approximate kernel SVM could be greatly accelerated by using
+stochastic gradient descent via :class:`sklearn.linear_model.SGDClassifier`.
+This is not easily possible for the case of the kernelized SVM.
+
+The second plot visualized the decision surfaces of the RBF kernel SVM and
+the linear SVM with approximate kernel maps.
+The plot shows decision surfaces of the classifiers projected onto
+the first two principal components of the data. This visualization should
+be taken with a grain of salt since it is just an interesting slice through
+the decision surface in 64 dimensions. In particular note that
+a datapoint (represented as a dot) does not necessarily be classified
+into the region it is lying in, since it will not lie on the plane
+that the first two principal components span.
+
+The usage of :class:`Fastfood`, :class:`RBFSampler` and :class:`Nystroem` is described in detail
+in :ref:`kernel_approximation`.
+
+"""
+print(__doc__)
+
+# Author: Gael Varoquaux <gael dot varoquaux at normalesup dot org>
+#         Andreas Mueller <[email protected]>
+# License: BSD 3 clause
+
+# Standard scientific Python imports
+import matplotlib.pyplot as plt
+import numpy as np
+from time import time
+
+# Import datasets, classifiers and performance metrics
+from sklearn import datasets, svm, pipeline
+from sklearn.kernel_approximation import (RBFSampler,
+                                          Nystroem, Fastfood)
+from sklearn.decomposition import PCA
+
+# The digits dataset
+digits = datasets.load_digits(n_class=9)
+
+# To apply an classifier on this data, we need to flatten the image, to
+# turn the data in a (samples, feature) matrix:
+n_samples = len(digits.data)
+data = digits.data / 16.
+data -= data.mean(axis=0)
+
+# We learn the digits on the first half of the digits
+data_train, targets_train = data[:n_samples / 2], digits.target[:n_samples / 2]
+
+
+# Now predict the value of the digit on the second half:
+data_test, targets_test = data[n_samples / 2:], digits.target[n_samples / 2:]
+#data_test = scaler.transform(data_test)
+
+# fix model parameters:
+GAMMA = .2
+SIGMA = np.sqrt(1 / (2 * GAMMA))
+
+# Create a classifier: a support vector classifier
+kernel_svm = svm.SVC(gamma=GAMMA)
+linear_svm = svm.LinearSVC()
+
+# create pipeline from kernel approximation
+# and linear svm
+feature_map_fastfood = Fastfood(sigma=SIGMA, tradeoff_mem_accuracy='mem', random_state=1)
+feature_map_fourier = RBFSampler(gamma=GAMMA, random_state=1)
+feature_map_nystroem = Nystroem(gamma=GAMMA, random_state=1)
+fastfood_approx_svm = pipeline.Pipeline([("feature_map", feature_map_fastfood),
+                                        ("svm", svm.LinearSVC())])
+
+fourier_approx_svm = pipeline.Pipeline([("feature_map", feature_map_fourier),
+                                        ("svm", svm.LinearSVC())])
+
+nystroem_approx_svm = pipeline.Pipeline([("feature_map", feature_map_nystroem),
+                                        ("svm", svm.LinearSVC())])
+
+# fit and predict using linear and kernel svm:
+
+kernel_svm_time = time()
+kernel_svm.fit(data_train, targets_train)
+kernel_svm_score = kernel_svm.score(data_test, targets_test)
+kernel_svm_time = time() - kernel_svm_time
+
+linear_svm_time = time()
+linear_svm.fit(data_train, targets_train)
+linear_svm_score = linear_svm.score(data_test, targets_test)
+linear_svm_time = time() - linear_svm_time
+
+sample_sizes = 30 * np.arange(1, 14)
+fastfood_scores = []
+fourier_scores = []
+nystroem_scores = []
+fastfood_times = []
+fourier_times = []
+nystroem_times = []
+
+for D in sample_sizes:
+    fastfood_approx_svm.set_params(feature_map__n_components=D)
+    fourier_approx_svm.set_params(feature_map__n_components=D)
+    nystroem_approx_svm.set_params(feature_map__n_components=D)
+    start = time()
+    fastfood_approx_svm.fit(data_train, targets_train)
+    fastfood_times.append(time() - start)
+
+    start = time()
+    nystroem_approx_svm.fit(data_train, targets_train)
+    nystroem_times.append(time() - start)
+
+    start = time()
+    fourier_approx_svm.fit(data_train, targets_train)
+    fourier_times.append(time() - start)
+
+    fastfood_score = fastfood_approx_svm.score(data_test, targets_test)
+    fourier_score = fourier_approx_svm.score(data_test, targets_test)
+    nystroem_score = nystroem_approx_svm.score(data_test, targets_test)
+    fastfood_scores.append(fastfood_score)
+    nystroem_scores.append(nystroem_score)
+    fourier_scores.append(fourier_score)
+
+# plot the results:
+plt.figure(figsize=(8, 8))
+accuracy = plt.subplot(211)
+# second y axis for timings
+timescale = plt.subplot(212)
+
+accuracy.plot(sample_sizes, nystroem_scores, label="Nystroem approx. kernel")
+timescale.plot(sample_sizes, nystroem_times, '--',
+               label='Nystroem approx. kernel')
+
+accuracy.plot(sample_sizes, fourier_scores, label="Fourier approx. kernel")
+timescale.plot(sample_sizes, fourier_times, '--',
+               label='Fourier approx. kernel')
+
+accuracy.plot(sample_sizes, fastfood_scores, label="Fastfood approx. kernel")
+timescale.plot(sample_sizes, fastfood_times, '--',
+               label='Fastfood approx. kernel')
+
+# horizontal lines for exact rbf and linear kernels:
+accuracy.plot([sample_sizes[0], sample_sizes[-1]],
+              [linear_svm_score, linear_svm_score], label="linear svm")
+timescale.plot([sample_sizes[0], sample_sizes[-1]],
+               [linear_svm_time, linear_svm_time], '--', label='linear svm')
+
+accuracy.plot([sample_sizes[0], sample_sizes[-1]],
+              [kernel_svm_score, kernel_svm_score], label="rbf svm")
+timescale.plot([sample_sizes[0], sample_sizes[-1]],
+               [kernel_svm_time, kernel_svm_time], '--', label='rbf svm')
+
+# vertical line for dataset dimensionality = 64
+accuracy.plot([64, 64], [0.7, 1], label="n_features")
+
+# legends and labels
+accuracy.set_title("Classification accuracy")
+timescale.set_title("Training times for dataset size of " + str(n_samples) + " with dimensionality of  "
+                    + str(np.size(data, 1)))
+accuracy.set_xlim(sample_sizes[0], sample_sizes[-1])
+accuracy.set_xticks(())
+accuracy.set_ylim(np.min(fourier_scores), 1)
+timescale.set_xlabel("Sampling steps = transformed feature dimension")
+accuracy.set_ylabel("Classification accuracy")
+timescale.set_ylabel("Training time in seconds")
+accuracy.legend(loc='best')
+timescale.legend(loc='best')
+
+# visualize the decision surface, projected down to the first
+# two principal components of the dataset
+pca = PCA(n_components=8).fit(data_train)
+
+X = pca.transform(data_train)
+
+# Gemerate grid along first two principal components
+multiples = np.arange(-2, 2, 0.1)
+# steps along first component
+first = multiples[:, np.newaxis] * pca.components_[0, :]
+# steps along second component
+second = multiples[:, np.newaxis] * pca.components_[1, :]
+# combine
+grid = first[np.newaxis, :, :] + second[:, np.newaxis, :]
+flat_grid = grid.reshape(-1, data.shape[1])
+
+# title for the plots
+titles = ['SVC with rbf kernel',
+          'SVC (linear kernel)\n with Fastfood rbf feature map\n'
+          'n_components=100',
+          'SVC (linear kernel)\n with Fourier rbf feature map\n'
+          'n_components=100',
+          'SVC (linear kernel)\n with Nystroem rbf feature map\n'
+          'n_components=100']
+
+plt.tight_layout()
+plt.figure(figsize=(12, 5))
+
+# predict and plot
+for i, clf in enumerate((kernel_svm, fastfood_approx_svm, nystroem_approx_svm,
+                         fourier_approx_svm)):
+    # Plot the decision boundary. For that, we will assign a color to each
+    # point in the mesh [x_min, m_max]x[y_min, y_max].
+    plt.subplot(1, 4, i + 1)
+    Z = clf.predict(flat_grid)
+
+    # Put the result into a color plot
+    Z = Z.reshape(grid.shape[:-1])
+    plt.contourf(multiples, multiples, Z, cmap=plt.cm.Paired)
+    plt.axis('off')
+
+    # Plot also the training points
+    plt.scatter(X[:, 0], X[:, 1], c=targets_train, cmap=plt.cm.Paired)
+
+    plt.title(titles[i])
+plt.tight_layout()
+plt.show()
@@ -0,0 +1,56 @@
+import datetime
+
+import numpy as np
+
+from sklearn.kernel_approximation import Fastfood
+from sklearn.kernel_approximation import RBFSampler
+from sklearn.utils.testing import assert_greater
+
+"""compares the performance of Fastfood and RKS"""
+# generate data
+rng = np.random.RandomState(0)
+X = rng.random_sample(size=(1000, 4096))
+Y = rng.random_sample(size=(10000, 4096))
+X /= X.sum(axis=1)[:, np.newaxis]
+Y /= Y.sum(axis=1)[:, np.newaxis]
+
+# calculate feature maps
+gamma = 10.
+sigma = np.sqrt(1 / (2 * gamma))
+number_of_features_to_generate = 4096*4
+
+exact_start = datetime.datetime.utcnow()
+# original rbf kernel method:
+# rbf_kernel(X, X, gamma=gamma)
+# rbf_kernel(X, Y, gamma=gamma)
+exact_end = datetime.datetime.utcnow()
+exact_spent_time = exact_end - exact_start
+print "Timimg exact rbf: \t\t", exact_spent_time
+
+rbf_transform = Fastfood(sigma=sigma,
+                         n_components=number_of_features_to_generate,
+                         tradeoff_mem_accuracy='mem',
+                         random_state=42)
+_ = rbf_transform.fit(X)
+fastfood_fast_vec_start = datetime.datetime.utcnow()
+# Fastfood: approximate kernel mapping
+_ = rbf_transform.transform(X)
+_ = rbf_transform.transform(Y)
+fastfood_fast_vec_end = datetime.datetime.utcnow()
+fastfood_fast_vec_spent_time = fastfood_fast_vec_end - \
+    fastfood_fast_vec_start
+print "Timimg fastfood fast vectorized: \t\t", fastfood_fast_vec_spent_time
+
+rks_rbf_transform = RBFSampler(gamma=gamma,
+                               n_components=number_of_features_to_generate,
+                               random_state=42)
+_ = rks_rbf_transform.fit(X)
+rks_start = datetime.datetime.utcnow()
+# Random Kitchens Sinks: approximate kernel mapping
+_ = rks_rbf_transform.transform(X)
+_ = rks_rbf_transform.transform(Y)
+rks_end = datetime.datetime.utcnow()
+rks_spent_time = rks_end - rks_start
+print "Timimg rks: \t\t\t", rks_spent_time
+
+assert_greater(rks_spent_time, fastfood_fast_vec_spent_time)
@@ -0,0 +1 @@
+__version__ = '0.1.0'