Skip to content

Commit b6c664c

Browse files
committed
Merge branch 'main' of https://github.com/scikit-learn-contrib/skglm into fista
2 parents cbc5418 + 1d4de0f commit b6c664c

File tree

12 files changed

+249
-71
lines changed

12 files changed

+249
-71
lines changed

doc/api.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ Datafits
5555

5656
Huber
5757
Logistic
58+
LogisticGroup
5859
Quadratic
5960
QuadraticGroup
6061
QuadraticSVC

skglm/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
__version__ = '0.2dev'
1+
__version__ = '0.3.dev'
22

33
from skglm.estimators import ( # noqa F401
44
Lasso, WeightedLasso, ElasticNet, MCPRegression, MultiTaskLasso, LinearSVC,

skglm/datafits/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
from .base import BaseDatafit, BaseMultitaskDatafit
22
from .single_task import Quadratic, QuadraticSVC, Logistic, Huber, Poisson
33
from .multi_task import QuadraticMultiTask
4-
from .group import QuadraticGroup
4+
from .group import QuadraticGroup, LogisticGroup
55

66

77
__all__ = [
88
BaseDatafit, BaseMultitaskDatafit,
99
Quadratic, QuadraticSVC, Logistic, Huber, Poisson,
1010
QuadraticMultiTask,
11-
QuadraticGroup
11+
QuadraticGroup, LogisticGroup
1212
]

skglm/datafits/group.py

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from numba import int32, float64
44

55
from skglm.datafits.base import BaseDatafit
6+
from skglm.datafits.single_task import Logistic
67

78

89
class QuadraticGroup(BaseDatafit):
@@ -71,3 +72,63 @@ def gradient_scalar(self, X, y, w, Xw, j):
7172

7273
def intercept_update_step(self, y, Xw):
7374
return np.mean(Xw - y)
75+
76+
77+
class LogisticGroup(Logistic):
78+
r"""Logistic datafit used with group penalties.
79+
80+
The datafit reads::
81+
82+
(1 / n_samples) * \sum_i log(1 + exp(-y_i * Xw_i))
83+
84+
Attributes
85+
----------
86+
grp_indices : array, shape (n_features,)
87+
The group indices stacked contiguously
88+
([grp1_indices, grp2_indices, ...]).
89+
90+
grp_ptr : array, shape (n_groups + 1,)
91+
The group pointers such that two consecutive elements delimit
92+
the indices of a group in ``grp_indices``.
93+
94+
lipschitz : array, shape (n_groups,)
95+
The lipschitz constants for each group.
96+
"""
97+
98+
def __init__(self, grp_ptr, grp_indices):
99+
self.grp_ptr, self.grp_indices = grp_ptr, grp_indices
100+
101+
def get_spec(self):
102+
spec = (
103+
('grp_ptr', int32[:]),
104+
('grp_indices', int32[:]),
105+
('lipschitz', float64[:])
106+
)
107+
return spec
108+
109+
def params_to_dict(self):
110+
return dict(grp_ptr=self.grp_ptr,
111+
grp_indices=self.grp_indices)
112+
113+
def initialize(self, X, y):
114+
grp_ptr, grp_indices = self.grp_ptr, self.grp_indices
115+
n_groups = len(grp_ptr) - 1
116+
117+
lipschitz = np.zeros(n_groups)
118+
for g in range(n_groups):
119+
grp_g_indices = grp_indices[grp_ptr[g]: grp_ptr[g+1]]
120+
X_g = X[:, grp_g_indices]
121+
lipschitz[g] = norm(X_g, ord=2) ** 2 / (4 * len(y))
122+
123+
self.lipschitz = lipschitz
124+
125+
def gradient_g(self, X, y, w, Xw, g):
126+
grp_ptr, grp_indices = self.grp_ptr, self.grp_indices
127+
grp_g_indices = grp_indices[grp_ptr[g]: grp_ptr[g+1]]
128+
raw_grad_val = self.raw_grad(y, Xw)
129+
130+
grad_g = np.zeros(len(grp_g_indices))
131+
for idx, j in enumerate(grp_g_indices):
132+
grad_g[idx] = X[:, j] @ raw_grad_val
133+
134+
return grad_g

skglm/datafits/multi_task.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ def value(self, Y, W, XW):
6868
def gradient_j(self, X, Y, W, XW, j):
6969
"""Gradient with respect to j-th coordinate of W."""
7070
n_samples = X.shape[0]
71-
return (X[:, j:j+1].T @ XW - self.XtY[j, :]) / n_samples
71+
return (X[:, j] @ XW - self.XtY[j, :]) / n_samples
7272

7373
def gradient_j_sparse(self, X_data, X_indptr, X_indices, Y, XW, j):
7474
"""Gradient with respect to j-th coordinate of W when X is sparse."""

skglm/experimental/sqrt_lasso.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,8 @@ def path(self, X, y, alphas=None, eps=1e-3, n_alphas=10):
146146
"""
147147
if not hasattr(self, "solver_"):
148148
self.solver_ = ProxNewton(
149-
tol=self.tol, max_iter=self.max_iter, verbose=self.verbose)
149+
tol=self.tol, max_iter=self.max_iter, verbose=self.verbose,
150+
fit_intercept=False)
150151
# build path
151152
if alphas is None:
152153
alpha_max = norm(X.T @ y, ord=np.inf) / (np.sqrt(len(y)) * norm(y))

skglm/penalties/block_separable.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -286,8 +286,8 @@ def prox_1group(self, value, stepsize, g):
286286
def subdiff_distance(self, w, grad_ws, ws):
287287
"""Compute distance to the subdifferential at ``w`` of negative gradient.
288288
289-
Note: ``grad_ws`` is a stacked array of ``-``gradients.
290-
([-grad_ws_1, -grad_ws_2, ...])
289+
Note: ``grad_ws`` is a stacked array of gradients.
290+
([grad_ws_1, grad_ws_2, ...])
291291
"""
292292
alpha, weights = self.alpha, self.weights
293293
grp_ptr, grp_indices = self.grp_ptr, self.grp_indices
@@ -307,7 +307,7 @@ def subdiff_distance(self, w, grad_ws, ws):
307307
scores[idx] = max(0, norm(grad_g) - alpha * weights[g])
308308
else:
309309
subdiff = alpha * weights[g] * w_g / norm_w_g
310-
scores[idx] = norm(grad_g - subdiff)
310+
scores[idx] = norm(grad_g + subdiff)
311311

312312
return scores
313313

skglm/solvers/group_bcd.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,6 @@ def _construct_grad(X, y, w, Xw, datafit, ws):
170170
grad_ptr = 0
171171
for g in ws:
172172
grad_g = datafit.gradient_g(X, y, w, Xw, g)
173-
grads[grad_ptr: grad_ptr+len(grad_g)] = -grad_g
173+
grads[grad_ptr: grad_ptr+len(grad_g)] = grad_g
174174
grad_ptr += len(grad_g)
175175
return grads

skglm/solvers/multitask_bcd.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -369,8 +369,8 @@ def _bcd_epoch(X, Y, W, XW, datafit, penalty, ws):
369369
continue
370370
Xj = X[:, j]
371371
old_W_j = W[j, :].copy() # copy is very important here
372-
W[j:j+1, :] = penalty.prox_1feat(
373-
W[j:j+1, :] - datafit.gradient_j(X, Y, W, XW, j) / lc[j],
372+
W[j, :] = penalty.prox_1feat(
373+
W[j, :] - datafit.gradient_j(X, Y, W, XW, j) / lc[j],
374374
1 / lc[j], j)
375375
if not np.all(W[j, :] == old_W_j):
376376
for k in range(n_tasks):

0 commit comments

Comments
 (0)