1
+ import pytest
1
2
import numpy as np
3
+ import pandas as pd
2
4
3
- from skglm .solvers import LBFGS
4
5
from skglm .penalties import L2
5
- from skglm .datafits import Logistic
6
+ from skglm .solvers import LBFGS
7
+ from skglm .datafits import Logistic , Cox
6
8
7
9
from sklearn .linear_model import LogisticRegression
8
10
9
- from skglm .utils .data import make_correlated_data
10
11
from skglm .utils .jit_compilation import compiled_clone
12
+ from skglm .utils .data import make_correlated_data , make_dummy_survival_data
11
13
12
14
13
15
def test_lbfgs_L2_logreg ():
14
16
reg = 1.
15
- n_samples , n_features = 50 , 10
17
+ n_samples , n_features = 100 , 50
16
18
17
19
X , y , _ = make_correlated_data (
18
20
n_samples , n_features , random_state = 0 )
@@ -21,19 +23,59 @@ def test_lbfgs_L2_logreg():
21
23
# fit L-BFGS
22
24
datafit = compiled_clone (Logistic ())
23
25
penalty = compiled_clone (L2 (reg ))
24
- w , * _ = LBFGS ().solve (X , y , datafit , penalty )
26
+ w , * _ = LBFGS (tol = 1e-12 ).solve (X , y , datafit , penalty )
25
27
26
28
# fit scikit learn
27
29
estimator = LogisticRegression (
28
30
penalty = 'l2' ,
29
31
C = 1 / (n_samples * reg ),
30
- fit_intercept = False
31
- )
32
- estimator .fit (X , y )
32
+ fit_intercept = False ,
33
+ tol = 1e-12 ,
34
+ ).fit (X , y )
35
+
36
+ np .testing .assert_allclose (w , estimator .coef_ .flatten ())
37
+
38
+
39
+ @pytest .mark .parametrize ("use_efron" , [True , False ])
40
+ def test_L2_Cox (use_efron ):
41
+ try :
42
+ from lifelines import CoxPHFitter
43
+ except ModuleNotFoundError :
44
+ pytest .xfail (
45
+ "Testing L2 Cox Estimator requires `lifelines` packages\n "
46
+ "Run `pip install lifelines`"
47
+ )
48
+
49
+ alpha = 10.
50
+ n_samples , n_features = 100 , 50
33
51
34
- np .testing .assert_allclose (
35
- w , estimator .coef_ .flatten (), atol = 1e-4
52
+ tm , s , X = make_dummy_survival_data (
53
+ n_samples , n_features , normalize = True ,
54
+ with_ties = use_efron , random_state = 0 )
55
+
56
+ datafit = compiled_clone (Cox (use_efron ))
57
+ penalty = compiled_clone (L2 (alpha ))
58
+
59
+ datafit .initialize (X , (tm , s ))
60
+ w , * _ = LBFGS ().solve (X , (tm , s ), datafit , penalty )
61
+
62
+ # fit lifeline estimator
63
+ stacked_tm_s_X = np .hstack ((tm [:, None ], s [:, None ], X ))
64
+ df = pd .DataFrame (stacked_tm_s_X )
65
+
66
+ estimator = CoxPHFitter (penalizer = alpha , l1_ratio = 0. ).fit (
67
+ df , duration_col = 0 , event_col = 1
36
68
)
69
+ w_ll = estimator .params_ .values
70
+
71
+ p_obj_skglm = datafit .value ((tm , s ), w , X @ w ) + penalty .value (w )
72
+ p_obj_ll = datafit .value ((tm , s ), w_ll , X @ w_ll ) + penalty .value (w_ll )
73
+
74
+ # despite increasing tol in lifelines, solutions are quite far apart
75
+ # suspecting lifelines https://github.com/CamDavidsonPilon/lifelines/pull/1534
76
+ # as our solution gives the lowest objective value
77
+ np .testing .assert_allclose (w , w_ll , rtol = 1e-1 )
78
+ np .testing .assert_allclose (p_obj_skglm , p_obj_ll , rtol = 1e-6 )
37
79
38
80
39
81
if __name__ == "__main__" :
0 commit comments