Skip to content

Commit 10dd69f

Browse files
first design changes, change template
1 parent 1a5ddc8 commit 10dd69f

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

55 files changed

+10984
-131
lines changed

.DS_Store

8 KB
Binary file not shown.

build/lib/skglm/__init__.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
__version__ = '0.4dev'
2+
3+
from skglm.estimators import ( # noqa F401
4+
Lasso, WeightedLasso, ElasticNet, MCPRegression, MultiTaskLasso, LinearSVC,
5+
SparseLogisticRegression, GeneralizedLinearEstimator, CoxEstimator, GroupLasso,
6+
)
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
from .base import BaseDatafit, BaseMultitaskDatafit
2+
from .single_task import (Quadratic, QuadraticSVC, Logistic, Huber, Poisson, Gamma,
3+
Cox, WeightedQuadratic, QuadraticHessian,)
4+
from .multi_task import QuadraticMultiTask
5+
from .group import QuadraticGroup, LogisticGroup
6+
7+
8+
__all__ = [
9+
BaseDatafit, BaseMultitaskDatafit,
10+
Quadratic, QuadraticSVC, Logistic, Huber, Poisson, Gamma, Cox,
11+
QuadraticMultiTask,
12+
QuadraticGroup, LogisticGroup, WeightedQuadratic,
13+
QuadraticHessian
14+
]

build/lib/skglm/datafits/base.py

Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
2+
class BaseDatafit:
3+
"""Base class for datafits."""
4+
5+
def get_spec(self):
6+
"""Specify the numba types of the class attributes.
7+
8+
Returns
9+
-------
10+
spec: Tuple of (attribute_name, dtype)
11+
spec to be passed to Numba jitclass to compile the class.
12+
"""
13+
14+
def params_to_dict(self):
15+
"""Get the parameters to initialize an instance of the class.
16+
17+
Returns
18+
-------
19+
dict_of_params : dict
20+
The parameters to instantiate an object of the class.
21+
"""
22+
23+
def initialize(self, X, y):
24+
"""Pre-computations before fitting on X and y.
25+
26+
Parameters
27+
----------
28+
X : array, shape (n_samples, n_features)
29+
Design matrix.
30+
31+
y : array, shape (n_samples,)
32+
Target vector.
33+
"""
34+
35+
def initialize_sparse(self, X_data, X_indptr, X_indices, y):
36+
"""Pre-computations before fitting on X and y when X is a sparse matrix.
37+
38+
Parameters
39+
----------
40+
X_data : array, shape (n_elements,)
41+
`data` attribute of the sparse CSC matrix X.
42+
43+
X_indptr : array, shape (n_features + 1,)
44+
`indptr` attribute of the sparse CSC matrix X.
45+
46+
X_indices : array, shape (n_elements,)
47+
`indices` attribute of the sparse CSC matrix X.
48+
49+
y : array, shape (n_samples,)
50+
Target vector.
51+
"""
52+
53+
def value(self, y, w, Xw):
54+
"""Value of datafit at vector w.
55+
56+
Parameters
57+
----------
58+
y : array_like, shape (n_samples,)
59+
Target vector.
60+
61+
w : array_like, shape (n_features,)
62+
Coefficient vector.
63+
64+
Xw: array_like, shape (n_samples,)
65+
Model fit.
66+
67+
Returns
68+
-------
69+
value : float
70+
The datafit value at vector w.
71+
"""
72+
73+
74+
class BaseMultitaskDatafit:
75+
"""Base class for multitask datafits."""
76+
77+
def get_spec(self):
78+
"""Specify the numba types of the class attributes.
79+
80+
Returns
81+
-------
82+
spec: Tuple of (attribute_name, dtype)
83+
spec to be passed to Numba jitclass to compile the class.
84+
"""
85+
86+
def params_to_dict(self):
87+
"""Get the parameters to initialize an instance of the class.
88+
89+
Returns
90+
-------
91+
dict_of_params : dict
92+
The parameters to instantiate an object of the class.
93+
"""
94+
95+
def initialize(self, X, Y):
96+
"""Store useful values before fitting on X and Y.
97+
98+
Parameters
99+
----------
100+
X : array, shape (n_samples, n_features)
101+
Design matrix.
102+
103+
Y : array, shape (n_samples, n_tasks)
104+
Multitask target.
105+
"""
106+
107+
def initialize_sparse(self, X_data, X_indptr, X_indices, Y):
108+
"""Store useful values before fitting on X and Y, when X is sparse.
109+
110+
Parameters
111+
----------
112+
X_data : array-like
113+
`data` attribute of the sparse CSC matrix X.
114+
115+
X_indptr : array-like
116+
`indptr` attribute of the sparse CSC matrix X.
117+
118+
X_indices : array-like
119+
`indices` attribute of the sparse CSC matrix X.
120+
121+
Y : array, shape (n_samples, n_tasks)
122+
Target matrix.
123+
"""
124+
125+
def value(self, Y, W, XW):
126+
"""Value of datafit at matrix W.
127+
128+
Parameters
129+
----------
130+
Y : array_like, shape (n_samples, n_tasks)
131+
Target matrix.
132+
133+
W : array_like, shape (n_features, n_tasks)
134+
Coefficient matrix.
135+
136+
XW: array_like, shape (n_samples, n_tasks)
137+
Model fit.
138+
139+
Returns
140+
-------
141+
value : float
142+
The datafit value evaluated at matrix W.
143+
"""

build/lib/skglm/datafits/group.py

Lines changed: 163 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,163 @@
1+
import numpy as np
2+
from numpy.linalg import norm
3+
from numba import int32, float64
4+
5+
from skglm.datafits.base import BaseDatafit
6+
from skglm.datafits.single_task import Logistic
7+
from skglm.utils.sparse_ops import spectral_norm, sparse_columns_slice
8+
9+
10+
class QuadraticGroup(BaseDatafit):
11+
r"""Quadratic datafit used with group penalties.
12+
13+
The datafit reads:
14+
15+
.. math:: 1 / (2 xx n_"samples") ||y - Xw||_2 ^ 2
16+
17+
Attributes
18+
----------
19+
grp_indices : array, shape (n_features,)
20+
The group indices stacked contiguously
21+
([grp1_indices, grp2_indices, ...]).
22+
23+
grp_ptr : array, shape (n_groups + 1,)
24+
The group pointers such that two consecutive elements delimit
25+
the indices of a group in ``grp_indices``.
26+
"""
27+
28+
def __init__(self, grp_ptr, grp_indices):
29+
self.grp_ptr, self.grp_indices = grp_ptr, grp_indices
30+
31+
def get_spec(self):
32+
spec = (
33+
('grp_ptr', int32[:]),
34+
('grp_indices', int32[:]),
35+
)
36+
return spec
37+
38+
def params_to_dict(self):
39+
return dict(grp_ptr=self.grp_ptr,
40+
grp_indices=self.grp_indices)
41+
42+
def get_lipschitz(self, X, y):
43+
grp_ptr, grp_indices = self.grp_ptr, self.grp_indices
44+
n_groups = len(grp_ptr) - 1
45+
46+
lipschitz = np.zeros(n_groups)
47+
for g in range(n_groups):
48+
grp_g_indices = grp_indices[grp_ptr[g]: grp_ptr[g+1]]
49+
X_g = X[:, grp_g_indices]
50+
lipschitz[g] = norm(X_g, ord=2) ** 2 / len(y)
51+
52+
return lipschitz
53+
54+
def get_lipschitz_sparse(self, X_data, X_indptr, X_indices, y):
55+
grp_ptr, grp_indices = self.grp_ptr, self.grp_indices
56+
n_groups = len(grp_ptr) - 1
57+
58+
lipschitz = np.zeros(n_groups, dtype=X_data.dtype)
59+
for g in range(n_groups):
60+
grp_g_indices = grp_indices[grp_ptr[g]: grp_ptr[g+1]]
61+
X_data_g, X_indptr_g, X_indices_g = sparse_columns_slice(
62+
grp_g_indices, X_data, X_indptr, X_indices)
63+
lipschitz[g] = spectral_norm(
64+
X_data_g, X_indptr_g, X_indices_g, len(y)) ** 2 / len(y)
65+
66+
return lipschitz
67+
68+
def value(self, y, w, Xw):
69+
return norm(y - Xw) ** 2 / (2 * len(y))
70+
71+
def gradient_g(self, X, y, w, Xw, g):
72+
grp_ptr, grp_indices = self.grp_ptr, self.grp_indices
73+
grp_g_indices = grp_indices[grp_ptr[g]: grp_ptr[g+1]]
74+
75+
grad_g = np.zeros(len(grp_g_indices))
76+
for idx, j in enumerate(grp_g_indices):
77+
grad_g[idx] = self.gradient_scalar(X, y, w, Xw, j)
78+
79+
return grad_g
80+
81+
def gradient_g_sparse(self, X_data, X_indptr, X_indices, y, w, Xw, g):
82+
grp_ptr, grp_indices = self.grp_ptr, self.grp_indices
83+
grp_g_indices = grp_indices[grp_ptr[g]: grp_ptr[g+1]]
84+
85+
grad_g = np.zeros(len(grp_g_indices))
86+
for idx, j in enumerate(grp_g_indices):
87+
grad_g[idx] = self.gradient_scalar_sparse(
88+
X_data, X_indptr, X_indices, y, w, Xw, j)
89+
90+
return grad_g
91+
92+
def gradient_scalar_sparse(self, X_data, X_indptr, X_indices, y, w, Xw, j):
93+
grad_j = 0.
94+
for i in range(X_indptr[j], X_indptr[j+1]):
95+
grad_j += X_data[i] * (Xw[X_indices[i]] - y[X_indices[i]])
96+
97+
return grad_j / len(y)
98+
99+
def gradient_scalar(self, X, y, w, Xw, j):
100+
return X[:, j] @ (Xw - y) / len(y)
101+
102+
def intercept_update_step(self, y, Xw):
103+
return np.mean(Xw - y)
104+
105+
106+
class LogisticGroup(Logistic):
107+
r"""Logistic datafit used with group penalties.
108+
109+
The datafit reads:
110+
111+
.. math:: 1 / n_"samples" sum_(i=1)^(n_"samples") log(1 + exp(-y_i (Xw)_i))
112+
113+
Attributes
114+
----------
115+
grp_indices : array, shape (n_features,)
116+
The group indices stacked contiguously
117+
``[grp1_indices, grp2_indices, ...]``.
118+
119+
grp_ptr : array, shape (n_groups + 1,)
120+
The group pointers such that two consecutive elements delimit
121+
the indices of a group in ``grp_indices``.
122+
123+
lipschitz : array, shape (n_groups,)
124+
The lipschitz constants for each group.
125+
"""
126+
127+
def __init__(self, grp_ptr, grp_indices):
128+
self.grp_ptr, self.grp_indices = grp_ptr, grp_indices
129+
130+
def get_spec(self):
131+
spec = (
132+
('grp_ptr', int32[:]),
133+
('grp_indices', int32[:]),
134+
('lipschitz', float64[:])
135+
)
136+
return spec
137+
138+
def params_to_dict(self):
139+
return dict(grp_ptr=self.grp_ptr,
140+
grp_indices=self.grp_indices)
141+
142+
def initialize(self, X, y):
143+
grp_ptr, grp_indices = self.grp_ptr, self.grp_indices
144+
n_groups = len(grp_ptr) - 1
145+
146+
lipschitz = np.zeros(n_groups)
147+
for g in range(n_groups):
148+
grp_g_indices = grp_indices[grp_ptr[g]: grp_ptr[g+1]]
149+
X_g = X[:, grp_g_indices]
150+
lipschitz[g] = norm(X_g, ord=2) ** 2 / (4 * len(y))
151+
152+
self.lipschitz = lipschitz
153+
154+
def gradient_g(self, X, y, w, Xw, g):
155+
grp_ptr, grp_indices = self.grp_ptr, self.grp_indices
156+
grp_g_indices = grp_indices[grp_ptr[g]: grp_ptr[g+1]]
157+
raw_grad_val = self.raw_grad(y, Xw)
158+
159+
grad_g = np.zeros(len(grp_g_indices))
160+
for idx, j in enumerate(grp_g_indices):
161+
grad_g[idx] = X[:, j] @ raw_grad_val
162+
163+
return grad_g

0 commit comments

Comments
 (0)