Hi @mblondel @fabianp
I think this will be short to answer, why is the solution sometimes equal to that of sklearn, and sometimes not ?
This should be quick to reproduce, look at 1st and 3rd result over 5 seeds:
import numpy as np
from numpy.linalg import norm
from lightning.regression import CDRegressor
from sklearn.linear_model import Lasso
np.random.seed(0)
X = np.random.randn(200, 500)
beta = np.ones(X.shape[1])
beta[20:] = 0
y = X @ beta + 0.3 * np.random.randn(X.shape[0])
alpha = norm(X.T @ y, ord=np.inf) / 10
def p_obj(X, y, alpha, w):
return norm(y - X @ w) ** 2 / 2 + alpha * norm(w, ord=1)
for seed in range(5):
print('-' * 80)
clf = CDRegressor(C=0.5, alpha=alpha, penalty='l1',
tol=1-30, random_state=seed)
clf.fit(X, y)
las = Lasso(fit_intercept=False, alpha=alpha/len(y), tol=1e-10).fit(X, y)
print(norm(clf.coef_[0] - las.coef_))
light_o = p_obj(X, y, alpha, clf.coef_[0])
sklea_o = p_obj(X, y, alpha, las.coef_)
print(light_o - sklea_o)
ping @QB3 @agramfort