Skip to content
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion skglm/datafits/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from .base import BaseDatafit, BaseMultitaskDatafit
from .single_task import Quadratic, QuadraticSVC, Logistic, Huber, Poisson, Gamma, Cox
from .single_task import (
Quadratic, QuadraticSVC, CensoredQuadratic, Logistic, Huber, Poisson, Gamma, Cox,
)
from .multi_task import QuadraticMultiTask
from .group import QuadraticGroup, LogisticGroup

Expand Down
98 changes: 97 additions & 1 deletion skglm/datafits/single_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,8 @@ def initialize_sparse(self, X_data, X_indptr, X_indices, y):
self.Xty[j] = xty

def get_global_lipschitz(self, X, y):
return norm(X, ord=2) ** 2 / len(y)
n_samples = X.shape[0]
return norm(X, ord=2) ** 2 / n_samples

def get_global_lipschitz_sparse(self, X_data, X_indptr, X_indices, y):
return spectral_norm(X_data, X_indptr, X_indices, len(y)) ** 2 / len(y)
Expand Down Expand Up @@ -107,6 +108,101 @@ def full_grad_sparse(
def intercept_update_step(self, y, Xw):
return np.mean(Xw - y)

class CensoredQuadratic(Quadratic):
"""Quadratic datafit with access to Xty but not to y.

The datafit reads:

.. math:: 1 / (2 xx n_"samples") ||Xw||_2 ^ 2 - 1 / (n_"samples") w^\top (X^\top y)

Attributes
----------
Xty : array, shape (n_features,)
Pre-computed quantity used during the gradient evaluation.
Equal to ``X.T @ y``.

Note
----
The class is jit compiled at fit time using Numba compiler.
This allows for faster computations.
"""

def __init__(self, Xty, y_mean):
self.Xty = Xty
self.y_mean = y_mean
Comment on lines +130 to +132
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We better leave the constructor to define the Datafit hyper-parameters.
We can move Xty and y_mean to the initialize method

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this would break the existing code: all solvers call datafit.initialize(X, y) internally

Copy link
Collaborator Author

@mathurinm mathurinm May 28, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe a better API would be to instantiate all datafits with X, y and whatever they need (each has its own API)
then call datafit.initialize() that would use all stored attributes

eg

datafit = Quadratic(X, y)
penalty = L1(alpha=0.1)
solver = AndersonCD()
solver.solve(datafit, penalty)

It would give more freedom to each datafit, to require various quantities.

To me this makes more sense because we have datafits like Cox depneding on more than just X and y.

Wdyt @BadrMOUFAD ?

Edit: this may break GeneralizedEstimator, it would need X and y to be instantiated, not at fit time


def get_spec(self):
spec = (
('Xty', float64[:]),
('y_mean', float64),
)
return spec

def params_to_dict(self):
return dict(Xty=self.Xty, y_mean=self.y_mean)

# def get_lipschitz(self, X, y):


# lipschitz = np.zeros(n_features, dtype=X.dtype)
# for j in range(n_features):
# lipschitz[j] = (X[:, j] ** 2).sum() / len(y)

# return lipschitz

# XXX TODO check without y? or pass y = np.zeros(n_samples)
# def get_lipschitz_sparse(self, X_data, X_indptr, X_indices, y):
# n_features = len(X_indptr) - 1
# lipschitz = np.zeros(n_features, dtype=X_data.dtype)

# for j in range(n_features):
# nrm2 = 0.
# for idx in range(X_indptr[j], X_indptr[j + 1]):
# nrm2 += X_data[idx] ** 2

# lipschitz[j] = nrm2 / len(y)

# return lipschitz

def initialize(self, X, y):
pass

def initialize_sparse(self, X_data, X_indptr, X_indices, y):
pass

# def get_global_lipschitz(self, X, y):
# return norm(X, ord=2) ** 2 / len(y)

# def get_global_lipschitz_sparse(self, X_data, X_indptr, X_indices, y):
# return spectral_norm(X_data, X_indptr, X_indices, len(y)) ** 2 / len(y)

def value(self, y, w, Xw):
return np.sum((Xw) ** 2) / (2 * len(Xw)) - w @ self.Xty / len(Xw)

# def gradient_scalar(self, X, y, w, Xw, j):
# return (X[:, j] @ Xw - self.Xty[j]) / len(Xw)

# def gradient_scalar_sparse(self, X_data, X_indptr, X_indices, y, Xw, j):
# XjTXw = 0.
# for i in range(X_indptr[j], X_indptr[j+1]):
# XjTXw += X_data[i] * Xw[X_indices[i]]
# return (XjTXw - self.Xty[j]) / len(Xw)

# def full_grad_sparse(
# self, X_data, X_indptr, X_indices, y, Xw):
# n_features = X_indptr.shape[0] - 1
# n_samples = y.shape[0]
# grad = np.zeros(n_features, dtype=Xw.dtype)
# for j in range(n_features):
# XjTXw = 0.
# for i in range(X_indptr[j], X_indptr[j + 1]):
# XjTXw += X_data[i] * Xw[X_indices[i]]
# grad[j] = (XjTXw - self.Xty[j]) / n_samples
# return grad

def intercept_update_step(self, y, Xw):
return np.mean(Xw) - self.y_mean # MM TODO check


@njit
def sigmoid(x):
Expand Down
24 changes: 24 additions & 0 deletions skglm/demo_censored.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import numpy as np


from skglm.datafits import Quadratic, CensoredQuadratic
from skglm.penalties import L1
from skglm.solvers import AndersonCD
from skglm.utils.jit_compilation import compiled_clone
from skglm.utils.data import make_correlated_data

X, y, _ = make_correlated_data(100, 150)

pen = compiled_clone(L1(alpha=0))

solver = AndersonCD(verbose=3, fit_intercept=True)
df = Quadratic()
df = compiled_clone(df)

w = solver.solve(X, y, df, pen)[0]

df2 = CensoredQuadratic(X.T @ y, y.mean())
df2 = compiled_clone(df2)

w2 = solver.solve(X, np.zeros(X.shape[0]), df2, pen)[0]
np.testing.assert_allclose(w2, w)