Skip to content

Commit a80e043

Browse files
authored
Accept None more generally (#780)
1 parent 02e9e08 commit a80e043

File tree

2 files changed

+74
-77
lines changed

2 files changed

+74
-77
lines changed

src/glum/_glm.py

Lines changed: 56 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -500,12 +500,13 @@ def get_link(link: Union[str, Link], family: ExponentialDispersionModel) -> Link
500500

501501

502502
def setup_p1(
503-
P1: Union[str, np.ndarray],
503+
P1: Optional[Union[str, np.ndarray]],
504504
X: Union[tm.MatrixBase, tm.StandardizedMatrix],
505-
_dtype,
505+
dtype,
506506
alpha: float,
507507
l1_ratio: float,
508508
) -> np.ndarray:
509+
509510
if not isinstance(X, (tm.MatrixBase, tm.StandardizedMatrix)):
510511
raise TypeError
511512

@@ -514,11 +515,13 @@ def setup_p1(
514515
if isinstance(P1, str):
515516
if P1 != "identity":
516517
raise ValueError(f"P1 must be either 'identity' or an array; got {P1}.")
517-
P1 = np.ones(n_features, dtype=_dtype)
518+
P1 = np.ones(n_features, dtype=dtype)
519+
elif P1 is None:
520+
P1 = np.ones(n_features, dtype=dtype)
518521
else:
519522
P1 = np.atleast_1d(P1)
520523
try:
521-
P1 = P1.astype(_dtype, casting="safe", copy=False)
524+
P1 = P1.astype(dtype, casting="safe", copy=False)
522525
except TypeError as e:
523526
raise TypeError(
524527
"The given P1 cannot be converted to a numeric array; "
@@ -533,37 +536,41 @@ def setup_p1(
533536

534537
# P1 and P2 are now for sure copies
535538
P1 = alpha * l1_ratio * P1
536-
return cast(np.ndarray, P1).astype(_dtype)
539+
return cast(np.ndarray, P1).astype(dtype)
537540

538541

539542
def setup_p2(
540-
P2: Union[str, np.ndarray, sparse.spmatrix],
543+
P2: Optional[Union[str, np.ndarray, sparse.spmatrix]],
541544
X: Union[tm.MatrixBase, tm.StandardizedMatrix],
542-
_stype,
543-
_dtype,
545+
stype,
546+
dtype,
544547
alpha: float,
545548
l1_ratio: float,
546549
) -> Union[np.ndarray, sparse.spmatrix]:
550+
547551
if not isinstance(X, (tm.MatrixBase, tm.StandardizedMatrix)):
548552
raise TypeError
549553

550554
n_features = X.shape[1]
551555

556+
def _setup_sparse_p2(P2):
557+
return (sparse.dia_matrix((P2, 0), shape=(n_features, n_features))).tocsc()
558+
552559
if isinstance(P2, str):
553560
if P2 != "identity":
554561
raise ValueError(f"P2 must be either 'identity' or an array. Got {P2}.")
555562
if sparse.issparse(X): # if X is sparse, make P2 sparse, too
556-
P2 = (
557-
sparse.dia_matrix(
558-
(np.ones(n_features, dtype=_dtype), 0),
559-
shape=(n_features, n_features),
560-
)
561-
).tocsc()
563+
P2 = _setup_sparse_p2(np.ones(n_features, dtype=dtype))
564+
else:
565+
P2 = np.ones(n_features, dtype=dtype)
566+
elif P2 is None:
567+
if sparse.issparse(X): # if X is sparse, make P2 sparse, too
568+
P2 = _setup_sparse_p2(np.ones(n_features, dtype=dtype))
562569
else:
563-
P2 = np.ones(n_features, dtype=_dtype)
570+
P2 = np.ones(n_features, dtype=dtype)
564571
else:
565572
P2 = check_array(
566-
P2, copy=True, accept_sparse=_stype, dtype=_dtype, ensure_2d=False
573+
P2, copy=True, accept_sparse=stype, dtype=dtype, ensure_2d=False
567574
)
568575
P2 = cast(np.ndarray, P2)
569576
if P2.ndim == 1:
@@ -575,9 +582,7 @@ def setup_p2(
575582
f"got (P2.shape={P2.shape})."
576583
)
577584
if sparse.issparse(X):
578-
P2 = (
579-
sparse.dia_matrix((P2, 0), shape=(n_features, n_features))
580-
).tocsc()
585+
P2 = _setup_sparse_p2(P2)
581586
elif P2.ndim == 2 and P2.shape[0] == P2.shape[1] and P2.shape[0] == n_features:
582587
if sparse.issparse(X):
583588
P2 = sparse.csc_matrix(P2)
@@ -604,7 +609,7 @@ def setup_p2(
604609

605610

606611
def initialize_start_params(
607-
start_params: Optional[np.ndarray], n_cols: int, fit_intercept: bool, _dtype
612+
start_params: Optional[np.ndarray], n_cols: int, fit_intercept: bool, dtype
608613
) -> Optional[np.ndarray]:
609614
if start_params is None:
610615
return None
@@ -614,7 +619,7 @@ def initialize_start_params(
614619
accept_sparse=False,
615620
force_all_finite=True,
616621
ensure_2d=False,
617-
dtype=_dtype,
622+
dtype=dtype,
618623
copy=True,
619624
)
620625

@@ -696,12 +701,12 @@ class GeneralizedLinearRegressorBase(BaseEstimator, RegressorMixin):
696701
def __init__(
697702
self,
698703
l1_ratio: float = 0,
699-
P1="identity",
700-
P2: Union[str, np.ndarray, sparse.spmatrix] = "identity",
704+
P1: Optional[Union[str, np.ndarray]] = "identity",
705+
P2: Optional[Union[str, np.ndarray, sparse.spmatrix]] = "identity",
701706
fit_intercept=True,
702707
family: Union[str, ExponentialDispersionModel] = "normal",
703708
link: Union[str, Link] = "auto",
704-
solver="auto",
709+
solver: str = "auto",
705710
max_iter=100,
706711
gradient_tol: Optional[float] = None,
707712
step_size_tol: Optional[float] = None,
@@ -2186,14 +2191,14 @@ def _validate_hyperparameters(self) -> None:
21862191
"scale_predictors=True is not supported when fit_intercept=False."
21872192
)
21882193
if ((self.lower_bounds is not None) or (self.upper_bounds is not None)) and (
2189-
self.solver not in ["irls-cd", "auto"]
2194+
self.solver not in ["auto", "irls-cd"]
21902195
):
21912196
raise ValueError(
21922197
"Only the 'cd' solver is supported when bounds are set; "
21932198
f"got {self.solver}."
21942199
)
21952200
if ((self.A_ineq is not None) or (self.b_ineq is not None)) and (
2196-
self.solver not in ["trust-constr", "auto"]
2201+
self.solver not in [None, "auto", "trust-constr"]
21972202
):
21982203
raise ValueError(
21992204
"Only the 'trust-constr' solver supports inequality constraints; "
@@ -2249,11 +2254,8 @@ def _set_up_and_check_fit_args(
22492254
Union[str, np.ndarray],
22502255
Union[str, np.ndarray],
22512256
]:
2252-
_dtype = [np.float64, np.float32]
2253-
if solver == "irls-cd":
2254-
_stype = ["csc"]
2255-
else:
2256-
_stype = ["csc", "csr"]
2257+
dtype = [np.float64, np.float32]
2258+
stype = ["csc"] if solver == "irls-cd" else ["csc", "csr"]
22572259

22582260
P1 = self.P1
22592261
P2 = self.P2
@@ -2357,8 +2359,8 @@ def _expand_categorical_penalties(penalty, X, drop_first):
23572359
X, y = check_X_y_tabmat_compliant(
23582360
X,
23592361
y,
2360-
accept_sparse=_stype,
2361-
dtype=_dtype,
2362+
accept_sparse=stype,
2363+
dtype=dtype,
23622364
copy=copy_X,
23632365
force_all_finite=force_all_finite,
23642366
drop_first=getattr(self, "drop_first", False),
@@ -2369,8 +2371,8 @@ def _expand_categorical_penalties(penalty, X, drop_first):
23692371
X,
23702372
y,
23712373
ensure_2d=True,
2372-
accept_sparse=_stype,
2373-
dtype=_dtype,
2374+
accept_sparse=stype,
2375+
dtype=dtype,
23742376
copy=copy_X,
23752377
force_all_finite=force_all_finite,
23762378
)
@@ -2459,7 +2461,7 @@ class GeneralizedLinearRegressor(GeneralizedLinearRegressorBase):
24592461
is an L1 penalty. For ``0 < l1_ratio < 1``, the penalty is a
24602462
combination of L1 and L2.
24612463
2462-
P1 : {'identity', array-like}, shape (n_features,), optional (default='identity')
2464+
P1 : {'identity', array-like, None}, shape (n_features,), optional (default='identity')
24632465
This array controls the strength of the regularization for each coefficient
24642466
independently. A high value will lead to higher regularization while a value of
24652467
zero will remove the regularization on this parameter.
@@ -2468,20 +2470,20 @@ class GeneralizedLinearRegressor(GeneralizedLinearRegressorBase):
24682470
the penalty of the categorical column will be applied to all the levels of
24692471
the categorical.
24702472
2471-
P2 : {'identity', array-like, sparse matrix}, shape (n_features,) \
2473+
P2 : {'identity', array-like, sparse matrix, None}, shape (n_features,) \
24722474
or (n_features, n_features), optional (default='identity')
24732475
With this option, you can set the P2 matrix in the L2 penalty
24742476
``w*P2*w``. This gives a fine control over this penalty (Tikhonov
24752477
regularization). A 2d array is directly used as the square matrix P2. A
24762478
1d array is interpreted as diagonal (square) matrix. The default
2477-
``'identity'`` sets the identity matrix, which gives the usual squared
2478-
L2-norm. If you just want to exclude certain coefficients, pass a 1d
2479-
array filled with 1 and 0 for the coefficients to be excluded. Note that
2480-
P2 must be positive semi-definite. If ``X`` is a pandas DataFrame
2481-
with a categorical dtype and P2 has the same size as the number of columns,
2482-
the penalty of the categorical column will be applied to all the levels of
2483-
the categorical. Note that if P2 is two-dimensional, its size needs to be
2484-
of the same length as the expanded ``X`` matrix.
2479+
``'identity'`` and ``None`` set the identity matrix, which gives the usual
2480+
squared L2-norm. If you just want to exclude certain coefficients, pass a 1d
2481+
array filled with 1 and 0 for the coefficients to be excluded. Note that P2 must
2482+
be positive semi-definite. If ``X`` is a pandas DataFrame with a categorical
2483+
dtype and P2 has the same size as the number of columns, the penalty of the
2484+
categorical column will be applied to all the levels of the categorical. Note
2485+
that if P2 is two-dimensional, its size needs to be of the same length as the
2486+
expanded ``X`` matrix.
24852487
24862488
fit_intercept : bool, optional (default=True)
24872489
Specifies if a constant (a.k.a. bias or intercept) should be
@@ -2496,7 +2498,8 @@ class GeneralizedLinearRegressor(GeneralizedLinearRegressorBase):
24962498
specify it in parentheses (e.g., ``'tweedie (1.5)'``). The same applies
24972499
for ``'negative.binomial'`` and theta parameter.
24982500
2499-
link : {'auto', 'identity', 'log', 'logit', 'cloglog'} or Link, optional (default='auto')
2501+
link : {'auto', 'identity', 'log', 'logit', 'cloglog'} oe Link, \
2502+
optional (default='auto')
25002503
The link function of the GLM, i.e. mapping from linear
25012504
predictor (``X * coef``) to expectation (``mu``). Option ``'auto'`` sets
25022505
the link depending on the chosen family as follows:
@@ -2510,8 +2513,7 @@ class GeneralizedLinearRegressor(GeneralizedLinearRegressorBase):
25102513
optional (default='auto')
25112514
Algorithm to use in the optimization problem:
25122515
2513-
- ``'auto'``: ``'irls-ls'`` if ``l1_ratio`` is zero and ``'irls-cd'``
2514-
otherwise.
2516+
- ``'auto'``: ``'irls-ls'`` if ``l1_ratio`` is zero and ``'irls-cd'`` otherwise.
25152517
- ``'irls-cd'``: Iteratively reweighted least squares with a coordinate
25162518
descent inner solver. This can deal with L1 as well as L2 penalties.
25172519
Note that in order to avoid unnecessary memory duplication of X in the
@@ -2740,12 +2742,12 @@ def __init__(
27402742
self,
27412743
alpha=None,
27422744
l1_ratio=0,
2743-
P1="identity",
2744-
P2="identity",
2745+
P1: Optional[Union[str, np.ndarray]] = "identity",
2746+
P2: Optional[Union[str, np.ndarray, sparse.spmatrix]] = "identity",
27452747
fit_intercept=True,
27462748
family: Union[str, ExponentialDispersionModel] = "normal",
27472749
link: Union[str, Link] = "auto",
2748-
solver="auto",
2750+
solver: str = "auto",
27492751
max_iter=100,
27502752
gradient_tol: Optional[float] = None,
27512753
step_size_tol: Optional[float] = None,
@@ -2935,16 +2937,11 @@ def fit(
29352937

29362938
self._set_up_for_fit(y)
29372939

2938-
_dtype = [np.float64, np.float32]
2939-
if self._solver == "irls-cd":
2940-
_stype = ["csc"]
2941-
else:
2942-
_stype = ["csc", "csr"]
2943-
29442940
# 1.3 arguments to take special care ##################################
29452941
# P1, P2, start_params
2942+
stype = ["csc"] if self._solver == "irls-cd" else ["csc", "csr"]
29462943
P1_no_alpha = setup_p1(P1, X, X.dtype, 1, self.l1_ratio)
2947-
P2_no_alpha = setup_p2(P2, X, _stype, X.dtype, 1, self.l1_ratio)
2944+
P2_no_alpha = setup_p2(P2, X, stype, X.dtype, 1, self.l1_ratio)
29482945

29492946
lower_bounds = check_bounds(self.lower_bounds, X.shape[1], X.dtype)
29502947
upper_bounds = check_bounds(self.upper_bounds, X.shape[1], X.dtype)
@@ -2961,7 +2958,7 @@ def fit(
29612958
self.start_params,
29622959
n_cols=X.shape[1],
29632960
fit_intercept=self.fit_intercept,
2964-
_dtype=_dtype,
2961+
dtype=[np.float64, np.float32],
29652962
)
29662963

29672964
# 1.4 additional validations ##########################################

src/glum/_glm_cv.py

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ class GeneralizedLinearRegressorCV(GeneralizedLinearRegressorBase):
3737
If you pass ``l1_ratio`` as an array, the ``fit`` method will choose the
3838
best value of ``l1_ratio`` and store it as ``self.l1_ratio``.
3939
40-
P1 : {'identity', array-like}, shape (n_features,), optional (default='identity')
40+
P1 : {'identity', array-like, None}, shape (n_features,), optional (default='identity')
4141
This array controls the strength of the regularization for each coefficient
4242
independently. A high value will lead to higher regularization while a value of
4343
zero will remove the regularization on this parameter.
@@ -46,20 +46,20 @@ class GeneralizedLinearRegressorCV(GeneralizedLinearRegressorBase):
4646
the penalty of the categorical column will be applied to all the levels of
4747
the categorical.
4848
49-
P2 : {'identity', array-like, sparse matrix}, shape (n_features,) \
49+
P2 : {'identity', array-like, sparse matrix, None}, shape (n_features,) \
5050
or (n_features, n_features), optional (default='identity')
5151
With this option, you can set the P2 matrix in the L2 penalty
5252
``w*P2*w``. This gives a fine control over this penalty (Tikhonov
5353
regularization). A 2d array is directly used as the square matrix P2. A
5454
1d array is interpreted as diagonal (square) matrix. The default
55-
``'identity'`` sets the identity matrix, which gives the usual squared
56-
L2-norm. If you just want to exclude certain coefficients, pass a 1d
57-
array filled with 1 and 0 for the coefficients to be excluded. Note that
58-
P2 must be positive semi-definite. If ``X`` is a pandas DataFrame
59-
with a categorical dtype and P2 has the same size as the number of columns,
60-
the penalty of the categorical column will be applied to all the levels of
61-
the categorical. Note that if P2 is two-dimensional, its size needs to be
62-
of the same length as the expanded ``X`` matrix.
55+
``'identity'`` and ``None`` set the identity matrix, which gives the usual
56+
squared L2-norm. If you just want to exclude certain coefficients, pass a 1d
57+
array filled with 1 and 0 for the coefficients to be excluded. Note that P2 must
58+
be positive semi-definite. If ``X`` is a pandas DataFrame with a categorical
59+
dtype and P2 has the same size as the number of columns, the penalty of the
60+
categorical column will be applied to all the levels of the categorical. Note
61+
that if P2 is two-dimensional, its size needs to be of the same length as the
62+
expanded ``X`` matrix.
6363
6464
fit_intercept : bool, optional (default=True)
6565
Specifies if a constant (a.k.a. bias or intercept) should be
@@ -74,7 +74,8 @@ class GeneralizedLinearRegressorCV(GeneralizedLinearRegressorBase):
7474
specify it in parentheses (e.g., ``'tweedie (1.5)'``). The same applies
7575
for ``'negative.binomial'`` and theta parameter.
7676
77-
link : {'auto', 'identity', 'log', 'logit', 'cloglog'} or Link, optional (default='auto')
77+
link : {'auto', 'identity', 'log', 'logit', 'cloglog'}, Link or None, \
78+
optional (default='auto')
7879
The link function of the GLM, i.e. mapping from linear
7980
predictor (``X * coef``) to expectation (``mu``). Option ``'auto'`` sets
8081
the link depending on the chosen family as follows:
@@ -84,11 +85,11 @@ class GeneralizedLinearRegressorCV(GeneralizedLinearRegressorBase):
8485
``'inverse.gaussian'`` and ``'negative.binomial'``.
8586
- ``'logit'`` for family ``'binomial'``
8687
87-
solver : {'auto', 'irls-cd', 'irls-ls', 'lbfgs'}, optional (default='auto')
88+
solver : {'auto', 'irls-cd', 'irls-ls', 'lbfgs', 'trust-constr'}, \
89+
optional (default='auto')
8890
Algorithm to use in the optimization problem:
8991
90-
- ``'auto'``: ``'irls-ls'`` if ``l1_ratio`` is zero and ``'irls-cd'``
91-
otherwise.
92+
- ``'auto'``: ``'irls-ls'`` if ``l1_ratio`` is zero and ``'irls-cd'`` otherwise.
9293
- ``'irls-cd'``: Iteratively reweighted least squares with a coordinate
9394
descent inner solver. This can deal with L1 as well as L2 penalties.
9495
Note that in order to avoid unnecessary memory duplication of X in the
@@ -291,7 +292,7 @@ def __init__(
291292
fit_intercept=True,
292293
family: Union[str, ExponentialDispersionModel] = "normal",
293294
link: Union[str, Link] = "auto",
294-
solver="auto",
295+
solver: str = "auto",
295296
max_iter=100,
296297
gradient_tol: Optional[float] = None,
297298
step_size_tol: Optional[float] = None,
@@ -525,12 +526,11 @@ def _get_deviance(coef):
525526
):
526527
assert isinstance(self._link_instance, LogLink)
527528

528-
_dtype = [np.float64, np.float32]
529529
start_params = initialize_start_params(
530530
self.start_params,
531531
n_cols=X.shape[1],
532532
fit_intercept=self.fit_intercept,
533-
_dtype=_dtype,
533+
dtype=[np.float64, np.float32],
534534
)
535535

536536
P1_no_alpha = setup_p1(P1, X, X.dtype, 1, l1)
@@ -690,7 +690,7 @@ def _get_deviance(coef):
690690
self.start_params,
691691
n_cols=X.shape[1],
692692
fit_intercept=self.fit_intercept,
693-
_dtype=X.dtype,
693+
dtype=X.dtype,
694694
)
695695

696696
coef = self._get_start_coef(

0 commit comments

Comments
 (0)