scikit-learn-contrib
diff --git a/‎lightning/.buildinfo‎
Lines changed: 4 additions & 0 deletions b/‎lightning/.buildinfo‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎lightning/_downloads/document_classification_news20.py‎
Lines changed: 49 additions & 0 deletions b/‎lightning/_downloads/document_classification_news20.py‎
Lines changed: 49 additions & 0 deletions
diff --git a/‎lightning/_downloads/plot_l2_solvers.py‎
Lines changed: 124 additions & 0 deletions b/‎lightning/_downloads/plot_l2_solvers.py‎
Lines changed: 124 additions & 0 deletions
diff --git a/‎lightning/_downloads/plot_robust_regression.py‎
Lines changed: 45 additions & 0 deletions b/‎lightning/_downloads/plot_robust_regression.py‎
Lines changed: 45 additions & 0 deletions
diff --git a/‎lightning/_downloads/plot_sgd_loss_functions.py‎
Lines changed: 41 additions & 0 deletions b/‎lightning/_downloads/plot_sgd_loss_functions.py‎
Lines changed: 41 additions & 0 deletions
diff --git a/‎lightning/_downloads/plot_sparse_non_linear.py‎
Lines changed: 110 additions & 0 deletions b/‎lightning/_downloads/plot_sparse_non_linear.py‎
Lines changed: 110 additions & 0 deletions
@@ -0,0 +1,4 @@
+# Sphinx build info version 1
+# This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
+config: 03f01e6df4c66de912deebb84e264bee
+tags: 645f666f9bcd5a90fca523b33c5a78b7
@@ -0,0 +1,49 @@
+"""
+================================
+Classification of text documents
+================================
+
+"""
+import numpy as np
+
+from sklearn.datasets import fetch_20newsgroups_vectorized
+from sklearn.cross_validation import train_test_split
+
+from lightning.classification import CDClassifier
+from lightning.classification import LinearSVC
+from lightning.classification import SGDClassifier
+
+# Load News20 dataset from scikit-learn.
+bunch = fetch_20newsgroups_vectorized(subset="all")
+X = bunch.data
+y = bunch.target
+
+# Select a subset of the classes for faster training.
+ind = np.arange(X.shape[0])
+subset = y < 5
+X = X[ind[subset]]
+y = y[subset]
+
+# Train / test split.
+X_tr, X_te, y_tr, y_te = train_test_split(X, y,
+                                          train_size=0.75,
+                                          test_size=0.25,
+                                          random_state=0)
+
+clfs = (CDClassifier(loss="squared_hinge",
+                     penalty="l2",
+                     max_iter=20,
+                     random_state=0),
+
+        LinearSVC(max_iter=20,
+                  random_state=0),
+
+        SGDClassifier(learning_rate="constant",
+                      alpha=1e-3,
+                      max_iter=20,
+                      random_state=0))
+
+for clf in clfs:
+    print(clf.__class__.__name__)
+    clf.fit(X_tr, y_tr)
+    print(clf.score(X_te, y_te))
@@ -0,0 +1,124 @@
+"""
+=====================
+L2 solver comparison
+=====================
+
+This example compares different solvers with L2 regularization.
+"""
+print(__doc__)
+
+import sys
+import time
+
+import numpy as np
+import matplotlib.pyplot as plt
+
+from sklearn.datasets import make_classification
+from sklearn.datasets import fetch_20newsgroups_vectorized
+
+from lightning.classification import SVRGClassifier
+from lightning.classification import SDCAClassifier
+from lightning.classification import CDClassifier
+from lightning.classification import AdaGradClassifier
+from lightning.classification import SAGClassifier
+
+from lightning.impl.adagrad_fast import _proj_elastic_all
+
+class Callback(object):
+
+    def __init__(self, X, y):
+        self.X = X
+        self.y = y
+        self.obj = []
+        self.times = []
+        self.start_time = time.clock()
+        self.test_time = 0
+
+    def __call__(self, clf, t=None):
+        test_time = time.clock()
+
+        if hasattr(clf, "_finalize_coef"):
+            clf._finalize_coef()
+
+        if t is not None:
+            _proj_elastic_all(clf.eta, t, clf.g_sum_[0], clf.g_norms_[0],
+                              alpha1=0, alpha2=clf.alpha, delta=0,
+                              w=clf.coef_[0])
+
+
+        y_pred = clf.decision_function(self.X).ravel()
+        loss = (np.maximum(1 - self.y * y_pred, 0) ** 2).mean()
+        coef = clf.coef_.ravel()
+        regul = 0.5 * clf.alpha * np.dot(coef, coef)
+        self.obj.append(loss + regul)
+        self.test_time += time.clock() - test_time
+        self.times.append(time.clock() -  self.start_time - self.test_time)
+
+try:
+    dataset = sys.argv[1]
+except:
+    dataset = "synthetic"
+
+if dataset == "news20":
+    bunch = fetch_20newsgroups_vectorized(subset="all")
+    X = bunch.data
+    y = bunch.target
+    y[y >= 1] = 1
+    alpha = 1e-4
+    eta_svrg = 1e-1
+    eta_sag = 1
+    eta_adagrad = 1
+    xlim = (0, 4)
+    ylim = (0.04, 0.1)
+
+else:
+    X, y = make_classification(n_samples=10000,
+                               n_features=100,
+                               n_classes=2,
+                               random_state=0)
+    alpha = 1e-2
+    eta_svrg = 1e-3
+    eta_sag = 1e-3
+    eta_adagrad = 1e-2
+    xlim = None
+    ylim = (0.5, 0.6)
+
+y = y * 2 - 1
+
+
+clf1 = SVRGClassifier(loss="squared_hinge", alpha=alpha, eta=eta_svrg,
+                      n_inner=1.0, max_iter=50, random_state=0)
+clf2 = SDCAClassifier(loss="squared_hinge", alpha=alpha,
+                      max_iter=50, n_calls=X.shape[0]/2, random_state=0)
+clf3 = CDClassifier(loss="squared_hinge", alpha=alpha, C=1.0/X.shape[0],
+                    max_iter=50, n_calls=X.shape[1]/3, random_state=0)
+clf4 = AdaGradClassifier(loss="squared_hinge", alpha=alpha, eta=eta_adagrad,
+                    n_iter=50, n_calls=X.shape[0]/2, random_state=0)
+clf5 = SAGClassifier(loss="squared_hinge", alpha=alpha, eta=eta_sag,
+                    max_iter=50, random_state=0)
+
+plt.figure()
+
+for clf, name in ((clf1, "SVRG"),
+                  (clf2, "SDCA"),
+                  (clf3, "PCD"),
+                  (clf4, "AdaGrad"),
+                  (clf5, "SAG")):
+    print(name)
+    cb = Callback(X, y)
+    clf.callback = cb
+
+    if name == "PCD" and hasattr(X, "tocsc"):
+        clf.fit(X.tocsc(), y)
+    else:
+        clf.fit(X, y)
+
+    plt.plot(cb.times, cb.obj, label=name)
+
+plt.xlim(xlim)
+plt.ylim(ylim)
+plt.xlabel("CPU time")
+plt.ylabel("Objective value")
+plt.legend()
+
+plt.show()
@@ -0,0 +1,45 @@
+"""
+==================
+Robust regression
+==================
+
+"""
+print(__doc__)
+
+import numpy as np
+import pylab as pl
+
+from sklearn.datasets import make_regression
+from sklearn.utils import check_random_state
+from sklearn.linear_model import Ridge
+
+from lightning.regression import LinearSVR
+
+# Generate regression data.
+X_train, y_train = make_regression(n_samples=15, n_features=1,
+                                   n_informative=1, random_state=0)
+
+# Add noise.
+rs = check_random_state(0)
+y_train += rs.normal(np.std(y_train), size=X_train.shape[0])
+# Add an outlier.
+y_train[5] *= 5
+
+X_test = np.linspace(-5, 5, 100).reshape(-1, 1)
+
+pl.figure()
+pl.scatter(X_train.ravel(), y_train)
+
+reg = Ridge(alpha=1e-1)
+reg.fit(X_train, y_train)
+pl.plot(X_test.ravel(), reg.predict(X_test), label="Ridge")
+
+# LinearSVR is equivalent to absolute-loss regression (robust regression)
+# when epsilon=0.
+reg = LinearSVR(C=10, epsilon=0, fit_intercept=True, random_state=0)
+reg.fit(X_train, y_train)
+pl.plot(X_test.ravel(), reg.predict(X_test), label="Robust")
+
+pl.legend(loc="upper left")
+
+pl.show()
@@ -0,0 +1,41 @@
+"""
+==========================
+SGD: Convex Loss Functions
+==========================
+
+"""
+print(__doc__)
+
+import numpy as np
+import pylab as pl
+from lightning.impl.sgd import Hinge
+from lightning.impl.sgd import SquaredHinge
+from lightning.impl.sgd import Log
+from lightning.impl.sgd import SquaredLoss
+
+###############################################################################
+# Define loss funcitons
+xmin, xmax = -3, 3
+hinge = Hinge(1)
+squared_hinge = SquaredHinge()
+log = Log()
+squared_loss = SquaredLoss()
+
+###############################################################################
+# Plot loss funcitons
+xx = np.linspace(xmin, xmax, 100)
+pl.plot([xmin, 0, 0, xmax], [1, 1, 0, 0], 'k-',
+        label="Zero-one loss")
+pl.plot(xx, [hinge.loss(x, 1) for x in xx], 'g-',
+        label="Hinge loss")
+pl.plot(xx, [squared_hinge.loss(x, 1) for x in xx], 'b--',
+        label="Squared hinge loss", zorder=3)
+pl.plot(xx, [log.loss(x, 1) for x in xx], 'r-',
+        label="Log loss")
+pl.plot(xx, [2.0*squared_loss.loss(x, 1) for x in xx], 'c-',
+       label="Squared loss")
+pl.ylim((0, 5))
+pl.legend(loc="upper right")
+pl.xlabel(r"$y \cdot f(x)$")
+pl.ylabel("$L(y, f(x))$")
+pl.show()
@@ -0,0 +1,110 @@
+"""
+================================
+Sparse non-linear classification
+================================
+
+This examples demonstrates how to use `CDClassifier` with L1 penalty to do
+sparse non-linear classification. The trick simply consists in fitting the
+classifier with a kernel matrix (e.g., using an RBF kernel).
+
+There are a few interesting differences with standard kernel SVMs:
+
+1. the kernel matrix does not need to be positive semi-definite (hence the
+expression "kernel matrix" above is an abuse of terminology)
+
+2. the number of "support vectors" will be typically smaller thanks to L1
+regularization and can be adjusted by the regularization parameter C (the
+smaller C, the fewer the support vectors)
+
+3. the "support vectors" need not be located at the margin
+"""
+
+import numpy as np
+import pylab as pl
+
+from sklearn.metrics.pairwise import rbf_kernel
+
+from lightning.classification import CDClassifier
+from lightning.classification import KernelSVC
+
+np.random.seed(0)
+
+class SparseNonlinearClassifier(CDClassifier):
+
+    def __init__(self, gamma=1e-2, C=1, alpha=1):
+        self.gamma = gamma
+        super(SparseNonlinearClassifier, self).__init__(C=C,
+                                                        alpha=alpha,
+                                                        loss="squared_hinge",
+                                                        penalty="l1")
+
+    def fit(self, X, y):
+        K = rbf_kernel(X, gamma=self.gamma)
+        self.X_train_ = X
+        super(SparseNonlinearClassifier, self).fit(K, y)
+        return self
+
+    def decision_function(self, X):
+        K = rbf_kernel(X, self.X_train_, gamma=self.gamma)
+        return super(SparseNonlinearClassifier, self).decision_function(K)
+
+
+def gen_non_lin_separable_data():
+    mean1 = [-1, 2]
+    mean2 = [1, -1]
+    mean3 = [4, -4]
+    mean4 = [-4, 4]
+    cov = [[1.0,0.8], [0.8, 1.0]]
+    X1 = np.random.multivariate_normal(mean1, cov, 50)
+    X1 = np.vstack((X1, np.random.multivariate_normal(mean3, cov, 50)))
+    y1 = np.ones(len(X1))
+    X2 = np.random.multivariate_normal(mean2, cov, 50)
+    X2 = np.vstack((X2, np.random.multivariate_normal(mean4, cov, 50)))
+    y2 = np.ones(len(X2)) * -1
+    return X1, y1, X2, y2
+
+def plot_contour(X, X1, X2, clf, title):
+    pl.figure()
+    pl.title(title)
+
+    # Plot instances of class 1.
+    pl.plot(X1[:,0], X1[:,1], "ro")
+    # Plot instances of class 2.
+    pl.plot(X2[:,0], X2[:,1], "bo")
+
+    # Select "support vectors".
+    if hasattr(clf, "support_vectors_"):
+        sv = clf.support_vectors_
+    else:
+        sv = X[clf.coef_.ravel() != 0]
+
+    # Plot support vectors.
+    pl.scatter(sv[:, 0], sv[:, 1], s=100, c="g")
+
+    # Plot decision surface.
+    A, B = np.meshgrid(np.linspace(-6,6,50), np.linspace(-6,6,50))
+    C = np.array([[x1, x2] for x1, x2 in zip(np.ravel(A), np.ravel(B))])
+    Z = clf.decision_function(C).reshape(A.shape)
+    pl.contour(A, B, Z, [0.0], colors='k', linewidths=1, origin='lower')
+
+    pl.axis("tight")
+
+# Generate synthetic data from 2 classes.
+X1, y1, X2, y2 = gen_non_lin_separable_data()
+
+# Combine them to form a training set.
+X = np.vstack((X1, X2))
+y = np.hstack((y1, y2))
+
+# Train the classifiers.
+clf = SparseNonlinearClassifier(gamma=0.1, alpha=1./0.05)
+clf.fit(X, y)
+
+clf2 = KernelSVC(gamma=0.1, kernel="rbf", alpha=1e-2)
+clf2.fit(X, y)
+
+# Plot contours.
+plot_contour(X, X1, X2, clf, "Sparse")
+plot_contour(X, X1, X2, clf2, "Kernel SVM")
+
+pl.show()