scikit-learn-contrib
diff --git a/‎.circleci/config.yml
Lines changed: 15 additions & 0 deletions b/‎.circleci/config.yml
Lines changed: 15 additions & 0 deletions
diff --git a/‎doc/conf.py
Lines changed: 1 addition & 1 deletion b/‎doc/conf.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/plot_digits_classification_fastfood.py
Lines changed: 43 additions & 24 deletions b/‎examples/plot_digits_classification_fastfood.py
Lines changed: 43 additions & 24 deletions
diff --git a/‎examples/plot_kernel_approximation.py
Lines changed: 73 additions & 41 deletions b/‎examples/plot_kernel_approximation.py
Lines changed: 73 additions & 41 deletions
diff --git a/‎pyproject.toml
Lines changed: 6 additions & 0 deletions b/‎pyproject.toml
Lines changed: 6 additions & 0 deletions
@@ -34,8 +34,23 @@ jobs:
       branches:
         ignore: gh-pages
 
+  lint:
+    docker:
+      - image: circleci/python:3.6.1
+    steps:
+      - checkout
+      - run:
+          command: |
+              sudo python3 -m pip install black flake8
+      - run:
+          command: |
+              black --check examples sklearn_extra *py
+              # ensure there is no unused imports with flake8
+              flake8 --select=F401
+
 workflows:
   version: 2
   build-doc-and-deploy:
     jobs:
       - python3
+      - lint
@@ -15,7 +15,7 @@
 import sys
 import os
 
-import sphinx_gallery
+import sphinx_gallery  # noqa
 import sphinx_rtd_theme
 
 # If extensions (or modules to document with autodoc) are in another directory,
 
@@ -37,25 +37,26 @@
 # digit they represent: it is given in the 'target' of the dataset.
 for index, (image, label) in enumerate(zip(digits.images, digits.target)):
     pl.subplot(2, 4, index + 1)
-    pl.axis('off')
-    pl.imshow(image, cmap=pl.cm.gray_r, interpolation='nearest')
-    pl.title('Training: %i' % label)
+    pl.axis("off")
+    pl.imshow(image, cmap=pl.cm.gray_r, interpolation="nearest")
+    pl.title("Training: %i" % label)
     if index > 3:
         break
 
 # To apply an classifier on this data, we need to flatten the image, to
 # turn the data in a (samples, feature) matrix:
 n_samples = len(digits.images)
 data = digits.images.reshape((n_samples, -1))
-gamma = .001
+gamma = 0.001
 sigma = np.sqrt(1 / (2 * gamma))
 number_of_features_to_generate = 1000
 train__idx = range(n_samples // 2)
 test__idx = range(n_samples // 2, n_samples)
 
 # map data into featurespace
 rbf_transform = Fastfood(
-    sigma=sigma, n_components=number_of_features_to_generate)
+    sigma=sigma, n_components=number_of_features_to_generate
+)
 data_transformed_train = rbf_transform.fit_transform(data[train__idx])
 data_transformed_test = rbf_transform.transform(data[test__idx])
 
@@ -70,38 +71,56 @@
 
 # Run the linear classifier on the mapped data.
 linear_classifier_transformation.fit(
-    data_transformed_train, digits.target[train__idx])
+    data_transformed_train, digits.target[train__idx]
+)
 
 # Now predict the value of the digit on the second half:
 expected = digits.target[test__idx]
 predicted = classifier.predict(data[test__idx])
 predicted_linear = linear_classifier.predict(data[test__idx])
 predicted_linear_transformed = linear_classifier_transformation.predict(
-    data_transformed_test)
+    data_transformed_test
+)
 
-print("Classification report for dual classifier %s:\n%s\n"
-      % (classifier, metrics.classification_report(expected, predicted)))
-print("Classification report for primal linear classifier %s:\n%s\n"
-      % (linear_classifier,
-         metrics.classification_report(expected, predicted_linear)))
+print(
+    "Classification report for dual classifier %s:\n%s\n"
+    % (classifier, metrics.classification_report(expected, predicted))
+)
+print(
+    "Classification report for primal linear classifier %s:\n%s\n"
+    % (
+        linear_classifier,
+        metrics.classification_report(expected, predicted_linear),
+    )
+)
 print(
     "Classification report for primal transformation classifier %s:\n%s\n"
-    % (linear_classifier_transformation,
-       metrics.classification_report(expected, predicted_linear_transformed)))
+    % (
+        linear_classifier_transformation,
+        metrics.classification_report(expected, predicted_linear_transformed),
+    )
+)
 
-print("Confusion matrix for dual classifier:\n%s"
-      % metrics.confusion_matrix(expected, predicted))
-print("Confusion matrix for primal linear classifier:\n%s"
-      % metrics.confusion_matrix(expected, predicted_linear))
-print("Confusion matrix for for primal transformation classifier:\n%s"
-      % metrics.confusion_matrix(expected, predicted_linear_transformed))
+print(
+    "Confusion matrix for dual classifier:\n%s"
+    % metrics.confusion_matrix(expected, predicted)
+)
+print(
+    "Confusion matrix for primal linear classifier:\n%s"
+    % metrics.confusion_matrix(expected, predicted_linear)
+)
+print(
+    "Confusion matrix for for primal transformation classifier:\n%s"
+    % metrics.confusion_matrix(expected, predicted_linear_transformed)
+)
 
 for index, (image, prediction) in enumerate(
-        zip(digits.images[test__idx], predicted)):
+    zip(digits.images[test__idx], predicted)
+):
     pl.subplot(2, 4, index + 4)
-    pl.axis('off')
-    pl.imshow(image, cmap=pl.cm.gray_r, interpolation='nearest')
-    pl.title('Prediction: %i' % prediction)
+    pl.axis("off")
+    pl.imshow(image, cmap=pl.cm.gray_r, interpolation="nearest")
+    pl.title("Prediction: %i" % prediction)
     if index > 3:
         break
 
 
@@ -65,19 +65,25 @@
 # To apply an classifier on this data, we need to flatten the image, to
 # turn the data in a (samples, feature) matrix:
 n_samples = len(digits.data)
-data = digits.data / 16.
+data = digits.data / 16.0
 data -= data.mean(axis=0)
 
 # We learn the digits on the first half of the digits
-data_train, targets_train = data[:n_samples // 2], digits.target[:n_samples // 2]
+data_train, targets_train = (
+    data[: n_samples // 2],
+    digits.target[: n_samples // 2],
+)
 
 
 # Now predict the value of the digit on the second half:
-data_test, targets_test = data[n_samples // 2:], digits.target[n_samples // 2:]
-#data_test = scaler.transform(data_test)
+data_test, targets_test = (
+    data[n_samples // 2 :],
+    digits.target[n_samples // 2 :],
+)
+# data_test = scaler.transform(data_test)
 
 # fix model parameters:
-GAMMA = .2
+GAMMA = 0.2
 SIGMA = np.sqrt(1 / (2 * GAMMA))
 
 # Create a classifier: a support vector classifier
@@ -86,17 +92,22 @@
 
 # create pipeline from kernel approximation
 # and linear svm
-feature_map_fastfood = Fastfood(sigma=SIGMA, tradeoff_mem_accuracy='mem', random_state=1)
+feature_map_fastfood = Fastfood(
+    sigma=SIGMA, tradeoff_mem_accuracy="mem", random_state=1
+)
 feature_map_fourier = RBFSampler(gamma=GAMMA, random_state=1)
 feature_map_nystroem = Nystroem(gamma=GAMMA, random_state=1)
-fastfood_approx_svm = pipeline.Pipeline([("feature_map", feature_map_fastfood),
-                                        ("svm", svm.LinearSVC())])
+fastfood_approx_svm = pipeline.Pipeline(
+    [("feature_map", feature_map_fastfood), ("svm", svm.LinearSVC())]
+)
 
-fourier_approx_svm = pipeline.Pipeline([("feature_map", feature_map_fourier),
-                                        ("svm", svm.LinearSVC())])
+fourier_approx_svm = pipeline.Pipeline(
+    [("feature_map", feature_map_fourier), ("svm", svm.LinearSVC())]
+)
 
-nystroem_approx_svm = pipeline.Pipeline([("feature_map", feature_map_nystroem),
-                                        ("svm", svm.LinearSVC())])
+nystroem_approx_svm = pipeline.Pipeline(
+    [("feature_map", feature_map_nystroem), ("svm", svm.LinearSVC())]
+)
 
 # fit and predict using linear and kernel svm:
 
@@ -148,43 +159,64 @@
 timescale = plt.subplot(212)
 
 accuracy.plot(sample_sizes, nystroem_scores, label="Nystroem approx. kernel")
-timescale.plot(sample_sizes, nystroem_times, '--',
-               label='Nystroem approx. kernel')
+timescale.plot(
+    sample_sizes, nystroem_times, "--", label="Nystroem approx. kernel"
+)
 
 accuracy.plot(sample_sizes, fourier_scores, label="Fourier approx. kernel")
-timescale.plot(sample_sizes, fourier_times, '--',
-               label='Fourier approx. kernel')
+timescale.plot(
+    sample_sizes, fourier_times, "--", label="Fourier approx. kernel"
+)
 
 accuracy.plot(sample_sizes, fastfood_scores, label="Fastfood approx. kernel")
-timescale.plot(sample_sizes, fastfood_times, '--',
-               label='Fastfood approx. kernel')
+timescale.plot(
+    sample_sizes, fastfood_times, "--", label="Fastfood approx. kernel"
+)
 
 # horizontal lines for exact rbf and linear kernels:
-accuracy.plot([sample_sizes[0], sample_sizes[-1]],
-              [linear_svm_score, linear_svm_score], label="linear svm")
-timescale.plot([sample_sizes[0], sample_sizes[-1]],
-               [linear_svm_time, linear_svm_time], '--', label='linear svm')
-
-accuracy.plot([sample_sizes[0], sample_sizes[-1]],
-              [kernel_svm_score, kernel_svm_score], label="rbf svm")
-timescale.plot([sample_sizes[0], sample_sizes[-1]],
-               [kernel_svm_time, kernel_svm_time], '--', label='rbf svm')
+accuracy.plot(
+    [sample_sizes[0], sample_sizes[-1]],
+    [linear_svm_score, linear_svm_score],
+    label="linear svm",
+)
+timescale.plot(
+    [sample_sizes[0], sample_sizes[-1]],
+    [linear_svm_time, linear_svm_time],
+    "--",
+    label="linear svm",
+)
+
+accuracy.plot(
+    [sample_sizes[0], sample_sizes[-1]],
+    [kernel_svm_score, kernel_svm_score],
+    label="rbf svm",
+)
+timescale.plot(
+    [sample_sizes[0], sample_sizes[-1]],
+    [kernel_svm_time, kernel_svm_time],
+    "--",
+    label="rbf svm",
+)
 
 # vertical line for dataset dimensionality = 64
 accuracy.plot([64, 64], [0.7, 1], label="n_features")
 
 # legends and labels
 accuracy.set_title("Classification accuracy")
-timescale.set_title("Training times for dataset size of " + str(n_samples) + " with dimensionality of  "
-                    + str(np.size(data, 1)))
+timescale.set_title(
+    "Training times for dataset size of "
+    + str(n_samples)
+    + " with dimensionality of  "
+    + str(np.size(data, 1))
+)
 accuracy.set_xlim(sample_sizes[0], sample_sizes[-1])
 accuracy.set_xticks(())
 accuracy.set_ylim(np.min(fourier_scores), 1)
 timescale.set_xlabel("Sampling steps = transformed feature dimension")
 accuracy.set_ylabel("Classification accuracy")
 timescale.set_ylabel("Training time in seconds")
-accuracy.legend(loc='best')
-timescale.legend(loc='best')
+accuracy.legend(loc="best")
+timescale.legend(loc="best")
 
 # visualize the decision surface, projected down to the first
 # two principal components of the dataset
@@ -203,20 +235,20 @@
 flat_grid = grid.reshape(-1, data.shape[1])
 
 # title for the plots
-titles = ['SVC with rbf kernel',
-          'SVC (linear kernel)\n with Fastfood rbf feature map\n'
-          'n_components=100',
-          'SVC (linear kernel)\n with Fourier rbf feature map\n'
-          'n_components=100',
-          'SVC (linear kernel)\n with Nystroem rbf feature map\n'
-          'n_components=100']
+titles = [
+    "SVC with rbf kernel",
+    "SVC (linear kernel)\n with Fastfood rbf feature map\n" "n_components=100",
+    "SVC (linear kernel)\n with Fourier rbf feature map\n" "n_components=100",
+    "SVC (linear kernel)\n with Nystroem rbf feature map\n" "n_components=100",
+]
 
 plt.tight_layout()
 plt.figure(figsize=(12, 5))
 
 # predict and plot
-for i, clf in enumerate((kernel_svm, fastfood_approx_svm, nystroem_approx_svm,
-                         fourier_approx_svm)):
+for i, clf in enumerate(
+    (kernel_svm, fastfood_approx_svm, nystroem_approx_svm, fourier_approx_svm)
+):
     # Plot the decision boundary. For that, we will assign a color to each
     # point in the mesh [x_min, m_max]x[y_min, y_max].
     plt.subplot(1, 4, i + 1)
@@ -225,7 +257,7 @@
     # Put the result into a color plot
     Z = Z.reshape(grid.shape[:-1])
     plt.contourf(multiples, multiples, Z, cmap=plt.cm.Paired)
-    plt.axis('off')
+    plt.axis("off")
 
     # Plot also the training points
     plt.scatter(X[:, 0], X[:, 1], c=targets_train, cmap=plt.cm.Paired)
 
@@ -0,0 +1,6 @@
+[build-system]
+# build with the oldest numpy that has pre-build wheels for Py3.7 on PyPi
+requires = ["setuptools", "wheel", "cython>=0.28", "numpy==1.14.5"]
+
+[tool.black]
+line-length = 79