scikit-learn-contrib
diff --git a/‎lightning/_downloads/plot_1d_total_variation.py‎
Lines changed: 47 additions & 0 deletions b/‎lightning/_downloads/plot_1d_total_variation.py‎
Lines changed: 47 additions & 0 deletions
diff --git a/‎lightning/_downloads/plot_l2_solvers.py‎
Lines changed: 26 additions & 17 deletions b/‎lightning/_downloads/plot_l2_solvers.py‎
Lines changed: 26 additions & 17 deletions
diff --git a/‎lightning/_downloads/plot_sample_weight.py‎
Lines changed: 50 additions & 0 deletions b/‎lightning/_downloads/plot_sample_weight.py‎
Lines changed: 50 additions & 0 deletions
diff --git a/‎lightning/_images/plot_1d_total_variation.png‎
43.2 KB b/‎lightning/_images/plot_1d_total_variation.png‎
43.2 KB
diff --git a/‎lightning/_images/plot_1d_total_variation_001.png‎
54.4 KB b/‎lightning/_images/plot_1d_total_variation_001.png‎
54.4 KB
diff --git a/‎lightning/_images/plot_l2_solvers.png‎
27.1 KB b/‎lightning/_images/plot_l2_solvers.png‎
27.1 KB
diff --git a/‎lightning/_images/plot_l2_solvers_001.png‎
38.9 KB b/‎lightning/_images/plot_l2_solvers_001.png‎
38.9 KB
diff --git a/‎lightning/_images/plot_sample_weight.png‎
19.6 KB b/‎lightning/_images/plot_sample_weight.png‎
19.6 KB
diff --git a/‎lightning/_images/plot_sample_weight_001.png‎
41 KB b/‎lightning/_images/plot_sample_weight_001.png‎
41 KB
diff --git a/‎lightning/_modules/index.html‎
Lines changed: 5 additions & 1 deletion b/‎lightning/_modules/index.html‎
Lines changed: 5 additions & 1 deletion
@@ -0,0 +1,47 @@
+"""
+=====================================
+Signal recovery by 1D total variation
+=====================================
+
+In this example, we generate a signal that is piecewise constant. We then
+observe some random and corrupted measurements from that signal and
+then try to recover that signal using L1 and TV1D penalties.
+
+Given a ground truth vectors, the signal that we observe is given by
+
+     y = sign(X ground_truth + noise)
+
+where X is a random matrix. We obtain the vector ground_truth by solving
+an optimization problem using lightning FistaClassifier.
+
+The 1D total variation is also known as fused lasso.
+"""
+
+import numpy as np
+import matplotlib.pyplot as plt
+from lightning.classification import FistaClassifier
+from sklearn.grid_search import GridSearchCV
+
+# generate some synthetic data
+n_samples = 200
+ground_truth = np.concatenate((
+    np.ones(20), - np.ones(20), np.zeros(40)))
+n_features = ground_truth.size
+np.random.seed(0)  # for reproducibility
+X = np.random.rand(n_samples, n_features)
+# generate y as a linear model, y = sign(X w + noise)
+y = np.sign(X.dot(ground_truth) + 0.5 * np.random.randn(n_samples)).astype(np.int)
+
+
+for penalty in ('l1', 'tv1d'):
+    clf = FistaClassifier(penalty=penalty)
+    gs = GridSearchCV(clf, {'alpha': np.logspace(-3, 3, 10)})
+    gs.fit(X, y)
+    coefs = gs.best_estimator_.coef_
+    plt.plot(coefs.ravel(), label='%s penalty' % penalty, lw=3)
+
+plt.plot(ground_truth, lw=3, marker='^', markevery=5, markersize=10, label="ground truth")
+plt.grid()
+plt.legend()
+plt.ylim((-1.5, 1.5))
+plt.show()
@@ -20,7 +20,7 @@
 from lightning.classification import SDCAClassifier
 from lightning.classification import CDClassifier
 from lightning.classification import AdaGradClassifier
-from lightning.classification import SAGClassifier
+from lightning.classification import SAGAClassifier, SAGClassifier
 
 from lightning.impl.adagrad_fast import _proj_elastic_all
 
@@ -66,10 +66,8 @@ def __call__(self, clf, t=None):
     y[y >= 1] = 1
     alpha = 1e-4
     eta_svrg = 1e-1
-    eta_sag = 1
     eta_adagrad = 1
-    xlim = (0, 4)
-    ylim = (0.04, 0.1)
+    xlim = (0, 20)
 
 else:
     X, y = make_classification(n_samples=10000,
@@ -78,32 +76,38 @@ def __call__(self, clf, t=None):
                                random_state=0)
     alpha = 1e-2
     eta_svrg = 1e-3
-    eta_sag = 1e-3
     eta_adagrad = 1e-2
-    xlim = None
-    ylim = (0.5, 0.6)
+    xlim = [0, 2]
 
 y = y * 2 - 1
 
+# make sure the method does not stop prematurely, we want to see
+# the full convergence path
+tol = 1e-24
 
 clf1 = SVRGClassifier(loss="squared_hinge", alpha=alpha, eta=eta_svrg,
-                      n_inner=1.0, max_iter=50, random_state=0)
+                      n_inner=1.0, max_iter=100, random_state=0, tol=1e-24)
 clf2 = SDCAClassifier(loss="squared_hinge", alpha=alpha,
-                      max_iter=50, n_calls=X.shape[0]/2, random_state=0)
+                      max_iter=100, n_calls=X.shape[0]/2, random_state=0, tol=tol)
 clf3 = CDClassifier(loss="squared_hinge", alpha=alpha, C=1.0/X.shape[0],
-                    max_iter=50, n_calls=X.shape[1]/3, random_state=0)
+                    max_iter=50, n_calls=X.shape[1]/3, random_state=0, tol=tol)
 clf4 = AdaGradClassifier(loss="squared_hinge", alpha=alpha, eta=eta_adagrad,
-                    n_iter=50, n_calls=X.shape[0]/2, random_state=0)
-clf5 = SAGClassifier(loss="squared_hinge", alpha=alpha, eta=eta_sag,
-                    max_iter=50, random_state=0)
+                    n_iter=100, n_calls=X.shape[0]/2, random_state=0)
+clf5 = SAGAClassifier(loss="squared_hinge", alpha=alpha,
+                    max_iter=100, random_state=0, tol=tol)
+clf6 = SAGClassifier(loss="squared_hinge", alpha=alpha,
+                    max_iter=100, random_state=0, tol=tol)
 
 plt.figure()
 
+data = {}
 for clf, name in ((clf1, "SVRG"),
                   (clf2, "SDCA"),
                   (clf3, "PCD"),
                   (clf4, "AdaGrad"),
-                  (clf5, "SAG")):
+                  (clf5, "SAGA"),
+                  (clf6, "SAG")
+                  ):
     print(name)
     cb = Callback(X, y)
     clf.callback = cb
@@ -112,13 +116,18 @@ def __call__(self, clf, t=None):
         clf.fit(X.tocsc(), y)
     else:
         clf.fit(X, y)
+    data[name] = (cb.times, np.array(cb.obj))
 
-    plt.plot(cb.times, cb.obj, label=name)
+# get best value
+fmin = min([np.min(a[1]) for a in data.values()])
+for name in data:
+    plt.plot(data[name][0], data[name][1] - fmin, label=name, lw=3)
 
 plt.xlim(xlim)
-plt.ylim(ylim)
+plt.yscale('log')
 plt.xlabel("CPU time")
-plt.ylabel("Objective value")
+plt.ylabel("Objective value minus optimum")
 plt.legend()
+plt.grid()
 
 plt.show()
@@ -0,0 +1,50 @@
+"""
+======================
+SAGA: Weighted samples
+======================
+
+Plot decision function of a weighted dataset, where the size of points
+is proportional to its weight.
+
+Adapted from scikit-learn's plot_sgd_weighted_samples.py
+"""
+print(__doc__)
+
+import numpy as np
+import matplotlib.pyplot as plt
+from lightning.impl.sag import SAGAClassifier
+
+# we create 20 points
+np.random.seed(0)
+X = np.r_[np.random.randn(10, 2) + [1, 1], np.random.randn(10, 2)]
+y = np.array([1] * 10 + [-1] * 10)
+sample_weight = 100 * np.abs(np.random.randn(20))
+# and assign a bigger weight to the last 10 samples
+sample_weight[:10] *= 10
+
+# plot the weighted data points
+xx, yy = np.meshgrid(np.linspace(-4, 5, 500), np.linspace(-4, 5, 500))
+plt.figure()
+plt.scatter(X[:, 0], X[:, 1], c=y, s=sample_weight, alpha=0.9,
+            cmap=plt.cm.bone)
+
+# fit the unweighted model
+clf = SAGAClassifier(alpha=0.01, loss='log')
+clf.fit(X, y)
+Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])
+Z = Z.reshape(xx.shape)
+no_weights = plt.contour(xx, yy, Z, levels=[0], linestyles=['solid'])
+
+# fit the weighted model
+clf = SAGAClassifier(alpha=0.01, loss='log')
+clf.fit(X, y, sample_weight=sample_weight)
+Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])
+Z = Z.reshape(xx.shape)
+samples_weights = plt.contour(xx, yy, Z, levels=[0], linestyles=['dashed'])
+
+plt.legend([no_weights.collections[0], samples_weights.collections[0]],
+           ["no weights", "with weights"], loc="lower left")
+
+plt.xticks(())
+plt.yticks(())
+plt.show()
@@ -73,9 +73,11 @@
       role="menu"
       aria-labelledby="dLabelGlobalToc"><ul>
 <li class="toctree-l1"><a class="reference internal" href="../auto_examples/index.html">Examples</a><ul>
+<li class="toctree-l2"><a class="reference internal" href="../auto_examples/plot_1d_total_variation.html">Signal recovery by 1D total variation</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../auto_examples/plot_sgd_loss_functions.html">SGD: Convex Loss Functions</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../auto_examples/plot_robust_regression.html">Robust regression</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../auto_examples/trace.html">Trace norm</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../auto_examples/plot_sample_weight.html">SAGA: Weighted samples</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../auto_examples/document_classification_news20.html">Classification of text documents</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../auto_examples/plot_svrg.html">Sensitivity to hyper-parameters in SVRG</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../auto_examples/plot_sparse_non_linear.html">Sparse non-linear classification</a></li>
@@ -118,13 +120,15 @@
 <li class="toctree-l2"><a class="reference internal" href="../intro.html#fista">FISTA</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../intro.html#stochastic-gradient-method-sgd">Stochastic gradient method (SGD)</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../intro.html#adagrad">AdaGrad</a></li>
-<li class="toctree-l2"><a class="reference internal" href="../intro.html#stochastic-averaged-gradient-sag">Stochastic averaged gradient (SAG)</a></li>
+<li class="toctree-l2"><a class="reference internal" href="../intro.html#stochastic-averaged-gradient-sag-and-saga">Stochastic averaged gradient (SAG and SAGA)</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../intro.html#stochastic-variance-reduced-gradient-svrg">Stochastic variance-reduced gradient (SVRG)</a></li>
 <li class="toctree-l2"><a class="reference internal" href="../intro.html#prank">PRank</a><ul>
 <li class="toctree-l3"><a class="reference internal" href="../auto_examples/index.html">Examples</a><ul>
+<li class="toctree-l4"><a class="reference internal" href="../auto_examples/plot_1d_total_variation.html">Signal recovery by 1D total variation</a></li>
 <li class="toctree-l4"><a class="reference internal" href="../auto_examples/plot_sgd_loss_functions.html">SGD: Convex Loss Functions</a></li>
 <li class="toctree-l4"><a class="reference internal" href="../auto_examples/plot_robust_regression.html">Robust regression</a></li>
 <li class="toctree-l4"><a class="reference internal" href="../auto_examples/trace.html">Trace norm</a></li>
+<li class="toctree-l4"><a class="reference internal" href="../auto_examples/plot_sample_weight.html">SAGA: Weighted samples</a></li>
 <li class="toctree-l4"><a class="reference internal" href="../auto_examples/document_classification_news20.html">Classification of text documents</a></li>
 <li class="toctree-l4"><a class="reference internal" href="../auto_examples/plot_svrg.html">Sensitivity to hyper-parameters in SVRG</a></li>
 <li class="toctree-l4"><a class="reference internal" href="../auto_examples/plot_sparse_non_linear.html">Sparse non-linear classification</a></li>