2020import scipy .optimize as optimize
2121import numbers
2222
23+ from .._utils import (
24+ getFPType , sklearn_check_version , PatchingConditionsChain )
2325from .logistic_loss import (_daal4py_loss_and_grad ,
2426 _daal4py_logistic_loss_extra_args ,
2527 _daal4py_cross_entropy_loss_extra_args ,
3436from sklearn .utils .validation import _check_sample_weight , check_is_fitted
3537from sklearn .linear_model ._sag import sag_solver
3638from sklearn .utils .optimize import _newton_cg , _check_optimize_result
37- from sklearn .linear_model ._logistic import (
38- _check_solver ,
39- _check_multi_class ,
40- _fit_liblinear ,
41- _logistic_loss_and_grad ,
42- _logistic_loss ,
43- _logistic_grad_hess ,
44- _multinomial_loss ,
45- _multinomial_loss_grad ,
46- _multinomial_grad_hess ,
47- _LOGISTIC_SOLVER_CONVERGENCE_MSG ,
48- LogisticRegression as LogisticRegression_original )
39+ if sklearn_check_version ('1.1' ):
40+ from sklearn .linear_model ._linear_loss import LinearModelLoss
41+ from sklearn ._loss .loss import HalfBinomialLoss , HalfMultinomialLoss
42+ from sklearn .linear_model ._logistic import (
43+ _check_solver ,
44+ _check_multi_class ,
45+ _fit_liblinear ,
46+ _LOGISTIC_SOLVER_CONVERGENCE_MSG ,
47+ LogisticRegression as LogisticRegression_original )
48+ else :
49+ from sklearn .linear_model ._logistic import (
50+ _check_solver ,
51+ _check_multi_class ,
52+ _fit_liblinear ,
53+ _logistic_loss_and_grad ,
54+ _logistic_loss ,
55+ _logistic_grad_hess ,
56+ _multinomial_loss ,
57+ _multinomial_loss_grad ,
58+ _multinomial_grad_hess ,
59+ _LOGISTIC_SOLVER_CONVERGENCE_MSG ,
60+ LogisticRegression as LogisticRegression_original )
4961from sklearn .preprocessing import LabelEncoder , LabelBinarizer
50- from .._utils import (
51- getFPType , sklearn_check_version , PatchingConditionsChain )
5262from .._device_offload import support_usm_ndarray
5363
5464
@@ -74,6 +84,7 @@ def __logistic_regression_path(
7484 max_squared_sum = None ,
7585 sample_weight = None ,
7686 l1_ratio = None ,
87+ n_threads = 1 ,
7788):
7889 """Compute a Logistic Regression model for a list of regularization
7990 parameters.
@@ -231,8 +242,20 @@ def __logistic_regression_path(
231242
232243 # Preprocessing.
233244 if check_input :
234- X = check_array (X , accept_sparse = 'csr' , dtype = np .float64 ,
235- accept_large_sparse = solver != 'liblinear' )
245+ if sklearn_check_version ('1.1' ):
246+ X = check_array (
247+ X ,
248+ accept_sparse = 'csr' ,
249+ dtype = np .float64 ,
250+ accept_large_sparse = solver not in ["liblinear" , "sag" , "saga" ],
251+ )
252+ else :
253+ X = check_array (
254+ X ,
255+ accept_sparse = 'csr' ,
256+ dtype = np .float64 ,
257+ accept_large_sparse = solver != 'liblinear' ,
258+ )
236259 y = check_array (y , ensure_2d = False , dtype = None )
237260 check_consistent_length (X , y )
238261 _ , n_features = X .shape
@@ -278,12 +301,26 @@ def __logistic_regression_path(
278301 # For doing a ovr, we need to mask the labels first. for the
279302 # multinomial case this is not necessary.
280303 if multi_class == 'ovr' :
281- w0 = np .zeros (n_features + int (fit_intercept ), dtype = X .dtype )
282- mask_classes = np .array ([- 1 , 1 ])
283- mask = (y == pos_class )
284304 y_bin = np .ones (y .shape , dtype = X .dtype )
285- y_bin [~ mask ] = - 1.
286- # for compute_class_weight
305+
306+ if sklearn_check_version ('1.1' ):
307+ mask = (y == pos_class )
308+ y_bin = np .ones (y .shape , dtype = X .dtype )
309+ # for compute_class_weight
310+
311+ if solver in ["lbfgs" , "newton-cg" ]:
312+ # HalfBinomialLoss, used for those solvers, represents y in [0, 1] instead
313+ # of in [-1, 1].
314+ mask_classes = np .array ([0 , 1 ])
315+ y_bin [~ mask ] = 0.0
316+ else :
317+ mask_classes = np .array ([- 1 , 1 ])
318+ y_bin [~ mask ] = - 1.0
319+ else :
320+ mask_classes = np .array ([- 1 , 1 ])
321+ mask = (y == pos_class )
322+ y_bin [~ mask ] = - 1.
323+ # for compute_class_weight
287324
288325 if class_weight == "balanced" and not _dal_ready :
289326 class_weight_ = compute_class_weight (class_weight , classes = mask_classes ,
@@ -298,18 +335,36 @@ def __logistic_regression_path(
298335 w0 = np .zeros (n_features + int (fit_intercept ), dtype = X .dtype )
299336
300337 else :
301- if solver not in ['sag' , 'saga' ]:
302- if _dal_ready :
303- Y_multi = le .fit_transform (y ).astype (X .dtype , copy = False )
338+ if sklearn_check_version ('1.1' ):
339+ if solver in ["sag" , "saga" , "lbfgs" , "newton-cg" ]:
340+ # SAG, lbfgs and newton-cg multinomial solvers need LabelEncoder,
341+ # not LabelBinarizer, i.e. y as a 1d-array of integers.
342+ # LabelEncoder also saves memory compared to LabelBinarizer, especially
343+ # when n_classes is large.
344+ if _dal_ready :
345+ Y_multi = le .fit_transform (y ).astype (X .dtype , copy = False )
346+ else :
347+ le = LabelEncoder ()
348+ Y_multi = le .fit_transform (y ).astype (X .dtype , copy = False )
304349 else :
350+ # For liblinear solver, apply LabelBinarizer, i.e. y is one-hot encoded.
305351 lbin = LabelBinarizer ()
306352 Y_multi = lbin .fit_transform (y )
307353 if Y_multi .shape [1 ] == 1 :
308354 Y_multi = np .hstack ([1 - Y_multi , Y_multi ])
309355 else :
310- # SAG multinomial solver needs LabelEncoder, not LabelBinarizer
311- le = LabelEncoder ()
312- Y_multi = le .fit_transform (y ).astype (X .dtype , copy = False )
356+ if solver not in ['sag' , 'saga' ]:
357+ if _dal_ready :
358+ Y_multi = le .fit_transform (y ).astype (X .dtype , copy = False )
359+ else :
360+ lbin = LabelBinarizer ()
361+ Y_multi = lbin .fit_transform (y )
362+ if Y_multi .shape [1 ] == 1 :
363+ Y_multi = np .hstack ([1 - Y_multi , Y_multi ])
364+ else :
365+ # SAG multinomial solver needs LabelEncoder, not LabelBinarizer
366+ le = LabelEncoder ()
367+ Y_multi = le .fit_transform (y ).astype (X .dtype , copy = False )
313368
314369 if _dal_ready :
315370 w0 = np .zeros ((classes .size , n_features + 1 ),
@@ -368,8 +423,17 @@ def __logistic_regression_path(
368423 if solver in ['lbfgs' , 'newton-cg' ]:
369424 if _dal_ready and classes .size == 2 :
370425 w0 = w0 [- 1 :, :]
371- w0 = w0 .ravel ()
426+ if sklearn_check_version ('1.1' ):
427+ w0 = w0 .ravel (order = "F" )
428+ else :
429+ w0 = w0 .ravel ()
372430 target = Y_multi
431+ loss = None
432+ if sklearn_check_version ('1.1' ):
433+ loss = LinearModelLoss (
434+ base_loss = HalfMultinomialLoss (n_classes = classes .size ),
435+ fit_intercept = fit_intercept ,
436+ )
373437 if solver == 'lbfgs' :
374438 if _dal_ready :
375439 if classes .size == 2 :
@@ -381,8 +445,11 @@ def __logistic_regression_path(
381445 daal_extra_args_func = _daal4py_cross_entropy_loss_extra_args
382446 func = _daal4py_loss_and_grad
383447 else :
384- def func (x , * args ):
385- return _multinomial_loss_grad (x , * args )[0 :2 ]
448+ if sklearn_check_version ('1.1' ) and loss is not None :
449+ func = loss .loss_gradient
450+ else :
451+ def func (x , * args ):
452+ return _multinomial_loss_grad (x , * args )[0 :2 ]
386453 elif solver == 'newton-cg' :
387454 if _dal_ready :
388455 if classes .size == 2 :
@@ -396,12 +463,17 @@ def func(x, *args):
396463 grad = _daal4py_grad_
397464 hess = _daal4py_grad_hess_
398465 else :
399- def func (x , * args ):
400- return _multinomial_loss (x , * args )[0 ]
466+ if sklearn_check_version ('1.1' ) and loss is not None :
467+ func = loss .loss
468+ grad = loss .gradient
469+ hess = loss .gradient_hessian_product # hess = [gradient, hessp]
470+ else :
471+ def func (x , * args ):
472+ return _multinomial_loss (x , * args )[0 ]
401473
402- def grad (x , * args ):
403- return _multinomial_loss_grad (x , * args )[1 ]
404- hess = _multinomial_grad_hess
474+ def grad (x , * args ):
475+ return _multinomial_loss_grad (x , * args )[1 ]
476+ hess = _multinomial_grad_hess
405477 warm_start_sag = {'coef' : w0 .T }
406478 else :
407479 target = y_bin
@@ -410,19 +482,33 @@ def grad(x, *args):
410482 func = _daal4py_loss_and_grad
411483 daal_extra_args_func = _daal4py_logistic_loss_extra_args
412484 else :
413- func = _logistic_loss_and_grad
485+ if sklearn_check_version ('1.1' ):
486+ loss = LinearModelLoss (
487+ base_loss = HalfBinomialLoss (), fit_intercept = fit_intercept
488+ )
489+ func = loss .loss_gradient
490+ else :
491+ func = _logistic_loss_and_grad
414492 elif solver == 'newton-cg' :
415493 if _dal_ready :
416494 daal_extra_args_func = _daal4py_logistic_loss_extra_args
417495 func = _daal4py_loss_
418496 grad = _daal4py_grad_
419497 hess = _daal4py_grad_hess_
420498 else :
421- func = _logistic_loss
499+ if sklearn_check_version ('1.1' ):
500+ loss = LinearModelLoss (
501+ base_loss = HalfBinomialLoss (), fit_intercept = fit_intercept
502+ )
503+ func = loss .loss
504+ grad = loss .gradient
505+ hess = loss .gradient_hessian_product # hess = [gradient, hessp]
506+ else :
507+ func = _logistic_loss
422508
423- def grad (x , * args ):
424- return _logistic_loss_and_grad (x , * args )[1 ]
425- hess = _logistic_grad_hess
509+ def grad (x , * args ):
510+ return _logistic_loss_and_grad (x , * args )[1 ]
511+ hess = _logistic_grad_hess
426512 warm_start_sag = {'coef' : np .expand_dims (w0 , axis = 1 )}
427513
428514 coefs = list ()
@@ -443,7 +529,11 @@ def grad(x, *args):
443529 hessian = False
444530 )
445531 else :
446- extra_args = (X , target , 1. / C , sample_weight )
532+ if sklearn_check_version ('1.1' ):
533+ l2_reg_strength = 1.0 / C
534+ extra_args = (X , target , sample_weight , l2_reg_strength , n_threads )
535+ else :
536+ extra_args = (X , target , 1. / C , sample_weight )
447537
448538 iprint = [- 1 , 50 , 1 , 100 , 101 ][
449539 np .searchsorted (np .array ([0 , 1 , 2 , 3 ]), verbose )]
@@ -491,7 +581,12 @@ def _func_(x, *args):
491581 w0 , args = extra_args ,
492582 maxiter = max_iter , tol = tol )
493583 else :
494- args = (X , target , 1. / C , sample_weight )
584+ if sklearn_check_version ('1.1' ):
585+ l2_reg_strength = 1.0 / C
586+ args = (X , target , sample_weight , l2_reg_strength , n_threads )
587+ else :
588+ args = (X , target , 1. / C , sample_weight )
589+
495590 w0 , n_iter_i = _newton_cg (
496591 hess , func , grad , w0 , args = args , maxiter = max_iter , tol = tol
497592 )
@@ -563,8 +658,14 @@ def _func_(x, *args):
563658 else :
564659 multi_w0 = np .reshape (w0 , (classes .size , - 1 ))
565660 else :
566- n_classes = max (2 , classes .size )
567- multi_w0 = np .reshape (w0 , (n_classes , - 1 ))
661+ if sklearn_check_version ('1.1' ):
662+ if solver in ["lbfgs" , "newton-cg" ]:
663+ multi_w0 = np .reshape (w0 , (n_classes , - 1 ), order = "F" )
664+ else :
665+ multi_w0 = w0
666+ else :
667+ n_classes = max (2 , classes .size )
668+ multi_w0 = np .reshape (w0 , (n_classes , - 1 ))
568669 if n_classes == 2 :
569670 multi_w0 = multi_w0 [1 ][np .newaxis , :]
570671 coefs .append (multi_w0 .copy ())
@@ -686,7 +787,25 @@ def logistic_regression_path(
686787 max_squared_sum = None ,
687788 sample_weight = None ,
688789 l1_ratio = None ,
790+ n_threads = 1 ,
689791 ):
792+ if sklearn_check_version ('1.1' ):
793+ return __logistic_regression_path (
794+ X , y , pos_class = pos_class ,
795+ Cs = Cs , fit_intercept = fit_intercept ,
796+ max_iter = max_iter , tol = tol , verbose = verbose ,
797+ solver = solver , coef = coef ,
798+ class_weight = class_weight ,
799+ dual = dual , penalty = penalty ,
800+ intercept_scaling = intercept_scaling ,
801+ multi_class = multi_class ,
802+ random_state = random_state ,
803+ check_input = check_input ,
804+ max_squared_sum = max_squared_sum ,
805+ sample_weight = sample_weight ,
806+ l1_ratio = l1_ratio ,
807+ n_threads = n_threads
808+ )
690809 return __logistic_regression_path (
691810 X , y , pos_class = pos_class ,
692811 Cs = Cs , fit_intercept = fit_intercept ,
0 commit comments