2
2
import pytest
3
3
from sklearn .datasets import make_classification
4
4
from sklearn .linear_model import LogisticRegression
5
+ from sklearn .metrics import make_scorer , precision_score
5
6
from sklearn .model_selection import cross_validate
6
7
from sklearn .utils ._testing import assert_allclose
7
8
11
12
@pytest .fixture
12
13
def data ():
13
14
return make_classification (
14
- weights = [0.9 , 0.1 ],
15
- class_sep = 2 ,
15
+ weights = [0.5 , 0.5 ],
16
+ class_sep = 0.5 ,
16
17
n_informative = 3 ,
17
18
n_redundant = 1 ,
18
19
flip_y = 0.05 ,
19
- n_samples = 1000 ,
20
+ n_samples = 50 ,
20
21
random_state = 10 ,
21
22
)
22
23
23
24
24
25
def test_groups_parameter_warning (data ):
25
26
"""Test that a warning is raised when groups parameter is provided."""
26
27
X , y = data
27
- ih_cv = InstanceHardnessCV (estimator = LogisticRegression ())
28
+ ih_cv = InstanceHardnessCV (estimator = LogisticRegression (), n_splits = 3 )
28
29
29
30
warning_msg = "The groups parameter is ignored by InstanceHardnessCV"
30
31
with pytest .warns (UserWarning , match = warning_msg ):
@@ -42,9 +43,11 @@ def test_error_on_multiclass():
42
43
def test_default_params (data ):
43
44
"""Test that the default parameters are used."""
44
45
X , y = data
45
- ih_cv = InstanceHardnessCV (estimator = LogisticRegression ())
46
- cv_result = cross_validate (LogisticRegression (), X , y , cv = ih_cv )
47
- assert_allclose (cv_result ["test_score" ], [0.975 , 0.965 , 0.96 , 0.955 , 0.965 ])
46
+ ih_cv = InstanceHardnessCV (estimator = LogisticRegression (), n_splits = 3 )
47
+ cv_result = cross_validate (
48
+ LogisticRegression (), X , y , cv = ih_cv , scoring = "precision"
49
+ )
50
+ assert_allclose (cv_result ["test_score" ], [0.625 , 0.6 , 0.625 ], atol = 1e-6 , rtol = 1e-6 )
48
51
49
52
50
53
@pytest .mark .parametrize ("dtype_target" , [None , object ])
@@ -53,9 +56,15 @@ def test_target_string_labels(data, dtype_target):
53
56
X , y = data
54
57
labels = np .array (["a" , "b" ], dtype = dtype_target )
55
58
y = labels [y ]
56
- ih_cv = InstanceHardnessCV (estimator = LogisticRegression ())
57
- cv_result = cross_validate (LogisticRegression (), X , y , cv = ih_cv )
58
- assert_allclose (cv_result ["test_score" ], [0.975 , 0.965 , 0.96 , 0.955 , 0.965 ])
59
+ ih_cv = InstanceHardnessCV (estimator = LogisticRegression (), n_splits = 3 )
60
+ cv_result = cross_validate (
61
+ LogisticRegression (),
62
+ X ,
63
+ y ,
64
+ cv = ih_cv ,
65
+ scoring = make_scorer (precision_score , pos_label = "b" ),
66
+ )
67
+ assert_allclose (cv_result ["test_score" ], [0.625 , 0.6 , 0.625 ], atol = 1e-6 , rtol = 1e-6 )
59
68
60
69
61
70
@pytest .mark .parametrize ("dtype_target" , [None , object ])
@@ -68,9 +77,19 @@ def test_target_string_pos_label(data, dtype_target):
68
77
X , y = data
69
78
labels = np .array (["a" , "b" ], dtype = dtype_target )
70
79
y = labels [y ]
71
- ih_cv = InstanceHardnessCV (estimator = LogisticRegression (), pos_label = "a" )
72
- cv_result = cross_validate (LogisticRegression (), X , y , cv = ih_cv )
73
- assert_allclose (cv_result ["test_score" ], [0.965 , 0.975 , 0.965 , 0.955 , 0.96 ])
80
+ ih_cv = InstanceHardnessCV (
81
+ estimator = LogisticRegression (), pos_label = "a" , n_splits = 3
82
+ )
83
+ cv_result = cross_validate (
84
+ LogisticRegression (),
85
+ X ,
86
+ y ,
87
+ cv = ih_cv ,
88
+ scoring = make_scorer (precision_score , pos_label = "a" ),
89
+ )
90
+ assert_allclose (
91
+ cv_result ["test_score" ], [0.666667 , 0.666667 , 0.4 ], atol = 1e-6 , rtol = 1e-6
92
+ )
74
93
75
94
76
95
@pytest .mark .parametrize ("n_splits" , [2 , 3 , 4 ])
0 commit comments