@@ -500,12 +500,13 @@ def get_link(link: Union[str, Link], family: ExponentialDispersionModel) -> Link
500500
501501
502502def setup_p1 (
503- P1 : Union [str , np .ndarray ],
503+ P1 : Optional [ Union [str , np .ndarray ] ],
504504 X : Union [tm .MatrixBase , tm .StandardizedMatrix ],
505- _dtype ,
505+ dtype ,
506506 alpha : float ,
507507 l1_ratio : float ,
508508) -> np .ndarray :
509+
509510 if not isinstance (X , (tm .MatrixBase , tm .StandardizedMatrix )):
510511 raise TypeError
511512
@@ -514,11 +515,13 @@ def setup_p1(
514515 if isinstance (P1 , str ):
515516 if P1 != "identity" :
516517 raise ValueError (f"P1 must be either 'identity' or an array; got { P1 } ." )
517- P1 = np .ones (n_features , dtype = _dtype )
518+ P1 = np .ones (n_features , dtype = dtype )
519+ elif P1 is None :
520+ P1 = np .ones (n_features , dtype = dtype )
518521 else :
519522 P1 = np .atleast_1d (P1 )
520523 try :
521- P1 = P1 .astype (_dtype , casting = "safe" , copy = False )
524+ P1 = P1 .astype (dtype , casting = "safe" , copy = False )
522525 except TypeError as e :
523526 raise TypeError (
524527 "The given P1 cannot be converted to a numeric array; "
@@ -533,37 +536,41 @@ def setup_p1(
533536
534537 # P1 and P2 are now for sure copies
535538 P1 = alpha * l1_ratio * P1
536- return cast (np .ndarray , P1 ).astype (_dtype )
539+ return cast (np .ndarray , P1 ).astype (dtype )
537540
538541
539542def setup_p2 (
540- P2 : Union [str , np .ndarray , sparse .spmatrix ],
543+ P2 : Optional [ Union [str , np .ndarray , sparse .spmatrix ] ],
541544 X : Union [tm .MatrixBase , tm .StandardizedMatrix ],
542- _stype ,
543- _dtype ,
545+ stype ,
546+ dtype ,
544547 alpha : float ,
545548 l1_ratio : float ,
546549) -> Union [np .ndarray , sparse .spmatrix ]:
550+
547551 if not isinstance (X , (tm .MatrixBase , tm .StandardizedMatrix )):
548552 raise TypeError
549553
550554 n_features = X .shape [1 ]
551555
556+ def _setup_sparse_p2 (P2 ):
557+ return (sparse .dia_matrix ((P2 , 0 ), shape = (n_features , n_features ))).tocsc ()
558+
552559 if isinstance (P2 , str ):
553560 if P2 != "identity" :
554561 raise ValueError (f"P2 must be either 'identity' or an array. Got { P2 } ." )
555562 if sparse .issparse (X ): # if X is sparse, make P2 sparse, too
556- P2 = (
557- sparse . dia_matrix (
558- ( np .ones (n_features , dtype = _dtype ), 0 ),
559- shape = ( n_features , n_features ),
560- )
561- ). tocsc ( )
563+ P2 = _setup_sparse_p2 ( np . ones ( n_features , dtype = dtype ))
564+ else :
565+ P2 = np .ones (n_features , dtype = dtype )
566+ elif P2 is None :
567+ if sparse . issparse ( X ): # if X is sparse, make P2 sparse, too
568+ P2 = _setup_sparse_p2 ( np . ones ( n_features , dtype = dtype ) )
562569 else :
563- P2 = np .ones (n_features , dtype = _dtype )
570+ P2 = np .ones (n_features , dtype = dtype )
564571 else :
565572 P2 = check_array (
566- P2 , copy = True , accept_sparse = _stype , dtype = _dtype , ensure_2d = False
573+ P2 , copy = True , accept_sparse = stype , dtype = dtype , ensure_2d = False
567574 )
568575 P2 = cast (np .ndarray , P2 )
569576 if P2 .ndim == 1 :
@@ -575,9 +582,7 @@ def setup_p2(
575582 f"got (P2.shape={ P2 .shape } )."
576583 )
577584 if sparse .issparse (X ):
578- P2 = (
579- sparse .dia_matrix ((P2 , 0 ), shape = (n_features , n_features ))
580- ).tocsc ()
585+ P2 = _setup_sparse_p2 (P2 )
581586 elif P2 .ndim == 2 and P2 .shape [0 ] == P2 .shape [1 ] and P2 .shape [0 ] == n_features :
582587 if sparse .issparse (X ):
583588 P2 = sparse .csc_matrix (P2 )
@@ -604,7 +609,7 @@ def setup_p2(
604609
605610
606611def initialize_start_params (
607- start_params : Optional [np .ndarray ], n_cols : int , fit_intercept : bool , _dtype
612+ start_params : Optional [np .ndarray ], n_cols : int , fit_intercept : bool , dtype
608613) -> Optional [np .ndarray ]:
609614 if start_params is None :
610615 return None
@@ -614,7 +619,7 @@ def initialize_start_params(
614619 accept_sparse = False ,
615620 force_all_finite = True ,
616621 ensure_2d = False ,
617- dtype = _dtype ,
622+ dtype = dtype ,
618623 copy = True ,
619624 )
620625
@@ -696,12 +701,12 @@ class GeneralizedLinearRegressorBase(BaseEstimator, RegressorMixin):
696701 def __init__ (
697702 self ,
698703 l1_ratio : float = 0 ,
699- P1 = "identity" ,
700- P2 : Union [str , np .ndarray , sparse .spmatrix ] = "identity" ,
704+ P1 : Optional [ Union [ str , np . ndarray ]] = "identity" ,
705+ P2 : Optional [ Union [str , np .ndarray , sparse .spmatrix ] ] = "identity" ,
701706 fit_intercept = True ,
702707 family : Union [str , ExponentialDispersionModel ] = "normal" ,
703708 link : Union [str , Link ] = "auto" ,
704- solver = "auto" ,
709+ solver : str = "auto" ,
705710 max_iter = 100 ,
706711 gradient_tol : Optional [float ] = None ,
707712 step_size_tol : Optional [float ] = None ,
@@ -2186,14 +2191,14 @@ def _validate_hyperparameters(self) -> None:
21862191 "scale_predictors=True is not supported when fit_intercept=False."
21872192 )
21882193 if ((self .lower_bounds is not None ) or (self .upper_bounds is not None )) and (
2189- self .solver not in ["irls-cd " , "auto " ]
2194+ self .solver not in ["auto " , "irls-cd " ]
21902195 ):
21912196 raise ValueError (
21922197 "Only the 'cd' solver is supported when bounds are set; "
21932198 f"got { self .solver } ."
21942199 )
21952200 if ((self .A_ineq is not None ) or (self .b_ineq is not None )) and (
2196- self .solver not in ["trust-constr " , "auto " ]
2201+ self .solver not in [None , "auto " , "trust-constr " ]
21972202 ):
21982203 raise ValueError (
21992204 "Only the 'trust-constr' solver supports inequality constraints; "
@@ -2249,11 +2254,8 @@ def _set_up_and_check_fit_args(
22492254 Union [str , np .ndarray ],
22502255 Union [str , np .ndarray ],
22512256 ]:
2252- _dtype = [np .float64 , np .float32 ]
2253- if solver == "irls-cd" :
2254- _stype = ["csc" ]
2255- else :
2256- _stype = ["csc" , "csr" ]
2257+ dtype = [np .float64 , np .float32 ]
2258+ stype = ["csc" ] if solver == "irls-cd" else ["csc" , "csr" ]
22572259
22582260 P1 = self .P1
22592261 P2 = self .P2
@@ -2357,8 +2359,8 @@ def _expand_categorical_penalties(penalty, X, drop_first):
23572359 X , y = check_X_y_tabmat_compliant (
23582360 X ,
23592361 y ,
2360- accept_sparse = _stype ,
2361- dtype = _dtype ,
2362+ accept_sparse = stype ,
2363+ dtype = dtype ,
23622364 copy = copy_X ,
23632365 force_all_finite = force_all_finite ,
23642366 drop_first = getattr (self , "drop_first" , False ),
@@ -2369,8 +2371,8 @@ def _expand_categorical_penalties(penalty, X, drop_first):
23692371 X ,
23702372 y ,
23712373 ensure_2d = True ,
2372- accept_sparse = _stype ,
2373- dtype = _dtype ,
2374+ accept_sparse = stype ,
2375+ dtype = dtype ,
23742376 copy = copy_X ,
23752377 force_all_finite = force_all_finite ,
23762378 )
@@ -2459,7 +2461,7 @@ class GeneralizedLinearRegressor(GeneralizedLinearRegressorBase):
24592461 is an L1 penalty. For ``0 < l1_ratio < 1``, the penalty is a
24602462 combination of L1 and L2.
24612463
2462- P1 : {'identity', array-like}, shape (n_features,), optional (default='identity')
2464+ P1 : {'identity', array-like, None }, shape (n_features,), optional (default='identity')
24632465 This array controls the strength of the regularization for each coefficient
24642466 independently. A high value will lead to higher regularization while a value of
24652467 zero will remove the regularization on this parameter.
@@ -2468,20 +2470,20 @@ class GeneralizedLinearRegressor(GeneralizedLinearRegressorBase):
24682470 the penalty of the categorical column will be applied to all the levels of
24692471 the categorical.
24702472
2471- P2 : {'identity', array-like, sparse matrix}, shape (n_features,) \
2473+ P2 : {'identity', array-like, sparse matrix, None }, shape (n_features,) \
24722474 or (n_features, n_features), optional (default='identity')
24732475 With this option, you can set the P2 matrix in the L2 penalty
24742476 ``w*P2*w``. This gives a fine control over this penalty (Tikhonov
24752477 regularization). A 2d array is directly used as the square matrix P2. A
24762478 1d array is interpreted as diagonal (square) matrix. The default
2477- ``'identity'`` sets the identity matrix, which gives the usual squared
2478- L2-norm. If you just want to exclude certain coefficients, pass a 1d
2479- array filled with 1 and 0 for the coefficients to be excluded. Note that
2480- P2 must be positive semi-definite. If ``X`` is a pandas DataFrame
2481- with a categorical dtype and P2 has the same size as the number of columns,
2482- the penalty of the categorical column will be applied to all the levels of
2483- the categorical. Note that if P2 is two-dimensional, its size needs to be
2484- of the same length as the expanded ``X`` matrix.
2479+ ``'identity'`` and ``None`` set the identity matrix, which gives the usual
2480+ squared L2-norm. If you just want to exclude certain coefficients, pass a 1d
2481+ array filled with 1 and 0 for the coefficients to be excluded. Note that P2 must
2482+ be positive semi-definite. If ``X`` is a pandas DataFrame with a categorical
2483+ dtype and P2 has the same size as the number of columns, the penalty of the
2484+ categorical column will be applied to all the levels of the categorical. Note
2485+ that if P2 is two-dimensional, its size needs to be of the same length as the
2486+ expanded ``X`` matrix.
24852487
24862488 fit_intercept : bool, optional (default=True)
24872489 Specifies if a constant (a.k.a. bias or intercept) should be
@@ -2496,7 +2498,8 @@ class GeneralizedLinearRegressor(GeneralizedLinearRegressorBase):
24962498 specify it in parentheses (e.g., ``'tweedie (1.5)'``). The same applies
24972499 for ``'negative.binomial'`` and theta parameter.
24982500
2499- link : {'auto', 'identity', 'log', 'logit', 'cloglog'} or Link, optional (default='auto')
2501+ link : {'auto', 'identity', 'log', 'logit', 'cloglog'} oe Link, \
2502+ optional (default='auto')
25002503 The link function of the GLM, i.e. mapping from linear
25012504 predictor (``X * coef``) to expectation (``mu``). Option ``'auto'`` sets
25022505 the link depending on the chosen family as follows:
@@ -2510,8 +2513,7 @@ class GeneralizedLinearRegressor(GeneralizedLinearRegressorBase):
25102513 optional (default='auto')
25112514 Algorithm to use in the optimization problem:
25122515
2513- - ``'auto'``: ``'irls-ls'`` if ``l1_ratio`` is zero and ``'irls-cd'``
2514- otherwise.
2516+ - ``'auto'``: ``'irls-ls'`` if ``l1_ratio`` is zero and ``'irls-cd'`` otherwise.
25152517 - ``'irls-cd'``: Iteratively reweighted least squares with a coordinate
25162518 descent inner solver. This can deal with L1 as well as L2 penalties.
25172519 Note that in order to avoid unnecessary memory duplication of X in the
@@ -2740,12 +2742,12 @@ def __init__(
27402742 self ,
27412743 alpha = None ,
27422744 l1_ratio = 0 ,
2743- P1 = "identity" ,
2744- P2 = "identity" ,
2745+ P1 : Optional [ Union [ str , np . ndarray ]] = "identity" ,
2746+ P2 : Optional [ Union [ str , np . ndarray , sparse . spmatrix ]] = "identity" ,
27452747 fit_intercept = True ,
27462748 family : Union [str , ExponentialDispersionModel ] = "normal" ,
27472749 link : Union [str , Link ] = "auto" ,
2748- solver = "auto" ,
2750+ solver : str = "auto" ,
27492751 max_iter = 100 ,
27502752 gradient_tol : Optional [float ] = None ,
27512753 step_size_tol : Optional [float ] = None ,
@@ -2935,16 +2937,11 @@ def fit(
29352937
29362938 self ._set_up_for_fit (y )
29372939
2938- _dtype = [np .float64 , np .float32 ]
2939- if self ._solver == "irls-cd" :
2940- _stype = ["csc" ]
2941- else :
2942- _stype = ["csc" , "csr" ]
2943-
29442940 # 1.3 arguments to take special care ##################################
29452941 # P1, P2, start_params
2942+ stype = ["csc" ] if self ._solver == "irls-cd" else ["csc" , "csr" ]
29462943 P1_no_alpha = setup_p1 (P1 , X , X .dtype , 1 , self .l1_ratio )
2947- P2_no_alpha = setup_p2 (P2 , X , _stype , X .dtype , 1 , self .l1_ratio )
2944+ P2_no_alpha = setup_p2 (P2 , X , stype , X .dtype , 1 , self .l1_ratio )
29482945
29492946 lower_bounds = check_bounds (self .lower_bounds , X .shape [1 ], X .dtype )
29502947 upper_bounds = check_bounds (self .upper_bounds , X .shape [1 ], X .dtype )
@@ -2961,7 +2958,7 @@ def fit(
29612958 self .start_params ,
29622959 n_cols = X .shape [1 ],
29632960 fit_intercept = self .fit_intercept ,
2964- _dtype = _dtype ,
2961+ dtype = [ np . float64 , np . float32 ] ,
29652962 )
29662963
29672964 # 1.4 additional validations ##########################################
0 commit comments