Skip to content

Commit caecdeb

Browse files
first try at PoissonGroup
1 parent 1d03cdb commit caecdeb

File tree

2 files changed

+80
-1
lines changed

2 files changed

+80
-1
lines changed

debug_poisson_group.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
import numpy as np
2+
from skglm import GeneralizedLinearEstimator
3+
from skglm.datafits.group import PoissonGroup
4+
from skglm.penalties import WeightedGroupL2
5+
from skglm.solvers import GroupProxNewton
6+
from sklearn.metrics import mean_squared_error
7+
8+
# Sample data and group structure
9+
n_samples, n_features = 20, 10
10+
X = np.random.randn(n_samples, n_features)
11+
y = np.random.poisson(np.abs(X[:, 0] + X[:, 5]))
12+
13+
grp_ptr = np.array([0, 3, 5, 8, 10], dtype=np.int32)
14+
grp_indices = np.arange(n_features, dtype=np.int32)
15+
16+
# Estimator setup
17+
estimator = GeneralizedLinearEstimator(
18+
datafit=PoissonGroup(grp_ptr, grp_indices),
19+
penalty=WeightedGroupL2(alpha=0.1, grp_ptr=grp_ptr, grp_indices=grp_indices,
20+
weights=np.ones(len(grp_ptr) - 1)),
21+
solver=GroupProxNewton()
22+
)
23+
24+
estimator.fit(X, y)
25+
print("Coefficients:", estimator.coef_)
26+
print("Intercept:", estimator.intercept_)
27+
y_pred = estimator.predict(X)
28+
print("First 5 predictions:", y_pred[:5])
29+
print("First 5 true values:", y[:5])
30+
print("MSE:", mean_squared_error(y, np.exp(y_pred)))

skglm/datafits/group.py

Lines changed: 50 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
from numba import int32, float64
44

55
from skglm.datafits.base import BaseDatafit
6-
from skglm.datafits.single_task import Logistic
6+
from skglm.datafits.single_task import Logistic, Poisson
77
from skglm.utils.sparse_ops import spectral_norm, sparse_columns_slice
88

99

@@ -161,3 +161,52 @@ def gradient_g(self, X, y, w, Xw, g):
161161
grad_g[idx] = X[:, j] @ raw_grad_val
162162

163163
return grad_g
164+
165+
166+
class PoissonGroup(Poisson):
167+
r"""Poisson datafit used with group penalties.
168+
169+
The datafit reads:
170+
171+
.. math:: 1 / n_"samples" \sum_{i=1}^{n_"samples"} (\exp((Xw)_i) - y_i (Xw)_i)
172+
173+
Attributes
174+
----------
175+
grp_indices : array, shape (n_features,)
176+
The group indices stacked contiguously
177+
``[grp1_indices, grp2_indices, ...]``.
178+
179+
grp_ptr : array, shape (n_groups + 1,)
180+
The group pointers such that two consecutive elements delimit
181+
the indices of a group in ``grp_indices``.
182+
"""
183+
184+
def __init__(self, grp_ptr, grp_indices):
185+
self.grp_ptr, self.grp_indices = grp_ptr, grp_indices
186+
187+
def get_spec(self):
188+
return (
189+
('grp_ptr', int32[:]),
190+
('grp_indices', int32[:]),
191+
)
192+
193+
def params_to_dict(self):
194+
return dict(grp_ptr=self.grp_ptr, grp_indices=self.grp_indices)
195+
196+
def gradient_g(self, X, y, w, Xw, g):
197+
grp_ptr, grp_indices = self.grp_ptr, self.grp_indices
198+
grp_g_indices = grp_indices[grp_ptr[g]: grp_ptr[g+1]]
199+
raw_grad_val = self.raw_grad(y, Xw)
200+
grad_g = np.zeros(len(grp_g_indices))
201+
for idx, j in enumerate(grp_g_indices):
202+
grad_g[idx] = X[:, j] @ raw_grad_val
203+
return grad_g
204+
205+
def gradient_g_sparse(self, X_data, X_indptr, X_indices, y, w, Xw, g):
206+
grp_ptr, grp_indices = self.grp_ptr, self.grp_indices
207+
grp_g_indices = grp_indices[grp_ptr[g]: grp_ptr[g+1]]
208+
grad_g = np.zeros(len(grp_g_indices))
209+
for idx, j in enumerate(grp_g_indices):
210+
grad_g[idx] = self.gradient_scalar_sparse(
211+
X_data, X_indptr, X_indices, y, Xw, j)
212+
return grad_g

0 commit comments

Comments
 (0)