Accept None more generally (#780)

lbittarello · web-flow · commit a80e0434529d · 2024-03-18T16:57:33.000Z
diff --git a/src/glum/_glm.py b/src/glum/_glm.py
@@ -500,12 +500,13 @@ def get_link(link: Union[str, Link], family: ExponentialDispersionModel) -> Link
 
 
 def setup_p1(
-    P1: Union[str, np.ndarray],
+    P1: Optional[Union[str, np.ndarray]],
     X: Union[tm.MatrixBase, tm.StandardizedMatrix],
-    _dtype,
+    dtype,
     alpha: float,
     l1_ratio: float,
 ) -> np.ndarray:
+
     if not isinstance(X, (tm.MatrixBase, tm.StandardizedMatrix)):
         raise TypeError
 
@@ -514,11 +515,13 @@ def setup_p1(
     if isinstance(P1, str):
         if P1 != "identity":
             raise ValueError(f"P1 must be either 'identity' or an array; got {P1}.")
-        P1 = np.ones(n_features, dtype=_dtype)
+        P1 = np.ones(n_features, dtype=dtype)
+    elif P1 is None:
+        P1 = np.ones(n_features, dtype=dtype)
     else:
         P1 = np.atleast_1d(P1)
         try:
-            P1 = P1.astype(_dtype, casting="safe", copy=False)
+            P1 = P1.astype(dtype, casting="safe", copy=False)
         except TypeError as e:
             raise TypeError(
                 "The given P1 cannot be converted to a numeric array; "
@@ -533,37 +536,41 @@ def setup_p1(
 
     # P1 and P2 are now for sure copies
     P1 = alpha * l1_ratio * P1
-    return cast(np.ndarray, P1).astype(_dtype)
+    return cast(np.ndarray, P1).astype(dtype)
 
 
 def setup_p2(
-    P2: Union[str, np.ndarray, sparse.spmatrix],
+    P2: Optional[Union[str, np.ndarray, sparse.spmatrix]],
     X: Union[tm.MatrixBase, tm.StandardizedMatrix],
-    _stype,
-    _dtype,
+    stype,
+    dtype,
     alpha: float,
     l1_ratio: float,
 ) -> Union[np.ndarray, sparse.spmatrix]:
+
     if not isinstance(X, (tm.MatrixBase, tm.StandardizedMatrix)):
         raise TypeError
 
     n_features = X.shape[1]
 
+    def _setup_sparse_p2(P2):
+        return (sparse.dia_matrix((P2, 0), shape=(n_features, n_features))).tocsc()
+
     if isinstance(P2, str):
         if P2 != "identity":
             raise ValueError(f"P2 must be either 'identity' or an array. Got {P2}.")
         if sparse.issparse(X):  # if X is sparse, make P2 sparse, too
-            P2 = (
-                sparse.dia_matrix(
-                    (np.ones(n_features, dtype=_dtype), 0),
-                    shape=(n_features, n_features),
-                )
-            ).tocsc()
+            P2 = _setup_sparse_p2(np.ones(n_features, dtype=dtype))
+        else:
+            P2 = np.ones(n_features, dtype=dtype)
+    elif P2 is None:
+        if sparse.issparse(X):  # if X is sparse, make P2 sparse, too
+            P2 = _setup_sparse_p2(np.ones(n_features, dtype=dtype))
         else:
-            P2 = np.ones(n_features, dtype=_dtype)
+            P2 = np.ones(n_features, dtype=dtype)
     else:
         P2 = check_array(
-            P2, copy=True, accept_sparse=_stype, dtype=_dtype, ensure_2d=False
+            P2, copy=True, accept_sparse=stype, dtype=dtype, ensure_2d=False
         )
         P2 = cast(np.ndarray, P2)
         if P2.ndim == 1:
@@ -575,9 +582,7 @@ def setup_p2(
                     f"got (P2.shape={P2.shape})."
                 )
             if sparse.issparse(X):
-                P2 = (
-                    sparse.dia_matrix((P2, 0), shape=(n_features, n_features))
-                ).tocsc()
+                P2 = _setup_sparse_p2(P2)
         elif P2.ndim == 2 and P2.shape[0] == P2.shape[1] and P2.shape[0] == n_features:
             if sparse.issparse(X):
                 P2 = sparse.csc_matrix(P2)
@@ -604,7 +609,7 @@ def setup_p2(
 
 
 def initialize_start_params(
-    start_params: Optional[np.ndarray], n_cols: int, fit_intercept: bool, _dtype
+    start_params: Optional[np.ndarray], n_cols: int, fit_intercept: bool, dtype
 ) -> Optional[np.ndarray]:
     if start_params is None:
         return None
@@ -614,7 +619,7 @@ def initialize_start_params(
         accept_sparse=False,
         force_all_finite=True,
         ensure_2d=False,
-        dtype=_dtype,
+        dtype=dtype,
         copy=True,
     )
 
@@ -696,12 +701,12 @@ class GeneralizedLinearRegressorBase(BaseEstimator, RegressorMixin):
     def __init__(
         self,
         l1_ratio: float = 0,
-        P1="identity",
-        P2: Union[str, np.ndarray, sparse.spmatrix] = "identity",
+        P1: Optional[Union[str, np.ndarray]] = "identity",
+        P2: Optional[Union[str, np.ndarray, sparse.spmatrix]] = "identity",
         fit_intercept=True,
         family: Union[str, ExponentialDispersionModel] = "normal",
         link: Union[str, Link] = "auto",
-        solver="auto",
+        solver: str = "auto",
         max_iter=100,
         gradient_tol: Optional[float] = None,
         step_size_tol: Optional[float] = None,
@@ -2186,14 +2191,14 @@ def _validate_hyperparameters(self) -> None:
                 "scale_predictors=True is not supported when fit_intercept=False."
             )
         if ((self.lower_bounds is not None) or (self.upper_bounds is not None)) and (
-            self.solver not in ["irls-cd", "auto"]
+            self.solver not in ["auto", "irls-cd"]
         ):
             raise ValueError(
                 "Only the 'cd' solver is supported when bounds are set; "
                 f"got {self.solver}."
             )
         if ((self.A_ineq is not None) or (self.b_ineq is not None)) and (
-            self.solver not in ["trust-constr", "auto"]
+            self.solver not in [None, "auto", "trust-constr"]
         ):
             raise ValueError(
                 "Only the 'trust-constr' solver supports inequality constraints; "
@@ -2249,11 +2254,8 @@ def _set_up_and_check_fit_args(
         Union[str, np.ndarray],
         Union[str, np.ndarray],
     ]:
-        _dtype = [np.float64, np.float32]
-        if solver == "irls-cd":
-            _stype = ["csc"]
-        else:
-            _stype = ["csc", "csr"]
+        dtype = [np.float64, np.float32]
+        stype = ["csc"] if solver == "irls-cd" else ["csc", "csr"]
 
         P1 = self.P1
         P2 = self.P2
@@ -2357,8 +2359,8 @@ def _expand_categorical_penalties(penalty, X, drop_first):
             X, y = check_X_y_tabmat_compliant(
                 X,
                 y,
-                accept_sparse=_stype,
-                dtype=_dtype,
+                accept_sparse=stype,
+                dtype=dtype,
                 copy=copy_X,
                 force_all_finite=force_all_finite,
                 drop_first=getattr(self, "drop_first", False),
@@ -2369,8 +2371,8 @@ def _expand_categorical_penalties(penalty, X, drop_first):
                 X,
                 y,
                 ensure_2d=True,
-                accept_sparse=_stype,
-                dtype=_dtype,
+                accept_sparse=stype,
+                dtype=dtype,
                 copy=copy_X,
                 force_all_finite=force_all_finite,
             )
@@ -2459,7 +2461,7 @@ class GeneralizedLinearRegressor(GeneralizedLinearRegressorBase):
         is an L1 penalty.  For ``0 < l1_ratio < 1``, the penalty is a
         combination of L1 and L2.
 
-    P1 : {'identity', array-like}, shape (n_features,), optional (default='identity')
+    P1 : {'identity', array-like, None}, shape (n_features,), optional (default='identity')
         This array controls the strength of the regularization for each coefficient
         independently. A high value will lead to higher regularization while a value of
         zero will remove the regularization on this parameter.
@@ -2468,20 +2470,20 @@ class GeneralizedLinearRegressor(GeneralizedLinearRegressorBase):
         the penalty of the categorical column will be applied to all the levels of
         the categorical.
 
-    P2 : {'identity', array-like, sparse matrix}, shape (n_features,) \
+    P2 : {'identity', array-like, sparse matrix, None}, shape (n_features,) \
             or (n_features, n_features), optional (default='identity')
         With this option, you can set the P2 matrix in the L2 penalty
         ``w*P2*w``. This gives a fine control over this penalty (Tikhonov
         regularization). A 2d array is directly used as the square matrix P2. A
         1d array is interpreted as diagonal (square) matrix. The default
-        ``'identity'`` sets the identity matrix, which gives the usual squared
-        L2-norm. If you just want to exclude certain coefficients, pass a 1d
-        array filled with 1 and 0 for the coefficients to be excluded. Note that
-        P2 must be positive semi-definite. If ``X`` is a pandas DataFrame
-        with a categorical dtype and P2 has the same size as the number of columns,
-        the penalty of the categorical column will be applied to all the levels of
-        the categorical. Note that if P2 is two-dimensional, its size needs to be
-        of the same length as the expanded ``X`` matrix.
+        ``'identity'`` and ``None`` set the identity matrix, which gives the usual
+        squared L2-norm. If you just want to exclude certain coefficients, pass a 1d
+        array filled with 1 and 0 for the coefficients to be excluded. Note that P2 must
+        be positive semi-definite. If ``X`` is a pandas DataFrame with a categorical
+        dtype and P2 has the same size as the number of columns, the penalty of the
+        categorical column will be applied to all the levels of the categorical. Note
+        that if P2 is two-dimensional, its size needs to be of the same length as the
+        expanded ``X`` matrix.
 
     fit_intercept : bool, optional (default=True)
         Specifies if a constant (a.k.a. bias or intercept) should be
@@ -2496,7 +2498,8 @@ class GeneralizedLinearRegressor(GeneralizedLinearRegressorBase):
         specify it in parentheses (e.g., ``'tweedie (1.5)'``). The same applies
         for ``'negative.binomial'`` and theta parameter.
 
-    link : {'auto', 'identity', 'log', 'logit', 'cloglog'} or Link, optional (default='auto')
+    link : {'auto', 'identity', 'log', 'logit', 'cloglog'} oe Link, \
+            optional (default='auto')
         The link function of the GLM, i.e. mapping from linear
         predictor (``X * coef``) to expectation (``mu``). Option ``'auto'`` sets
         the link depending on the chosen family as follows:
@@ -2510,8 +2513,7 @@ class GeneralizedLinearRegressor(GeneralizedLinearRegressorBase):
             optional (default='auto')
         Algorithm to use in the optimization problem:
 
-        - ``'auto'``: ``'irls-ls'`` if ``l1_ratio`` is zero and ``'irls-cd'``
-          otherwise.
+        - ``'auto'``: ``'irls-ls'`` if ``l1_ratio`` is zero and ``'irls-cd'`` otherwise.
         - ``'irls-cd'``: Iteratively reweighted least squares with a coordinate
           descent inner solver. This can deal with L1 as well as L2 penalties.
           Note that in order to avoid unnecessary memory duplication of X in the
@@ -2740,12 +2742,12 @@ def __init__(
         self,
         alpha=None,
         l1_ratio=0,
-        P1="identity",
-        P2="identity",
+        P1: Optional[Union[str, np.ndarray]] = "identity",
+        P2: Optional[Union[str, np.ndarray, sparse.spmatrix]] = "identity",
         fit_intercept=True,
         family: Union[str, ExponentialDispersionModel] = "normal",
         link: Union[str, Link] = "auto",
-        solver="auto",
+        solver: str = "auto",
         max_iter=100,
         gradient_tol: Optional[float] = None,
         step_size_tol: Optional[float] = None,
@@ -2935,16 +2937,11 @@ def fit(
 
         self._set_up_for_fit(y)
 
-        _dtype = [np.float64, np.float32]
-        if self._solver == "irls-cd":
-            _stype = ["csc"]
-        else:
-            _stype = ["csc", "csr"]
-
         # 1.3 arguments to take special care ##################################
         # P1, P2, start_params
+        stype = ["csc"] if self._solver == "irls-cd" else ["csc", "csr"]
         P1_no_alpha = setup_p1(P1, X, X.dtype, 1, self.l1_ratio)
-        P2_no_alpha = setup_p2(P2, X, _stype, X.dtype, 1, self.l1_ratio)
+        P2_no_alpha = setup_p2(P2, X, stype, X.dtype, 1, self.l1_ratio)
 
         lower_bounds = check_bounds(self.lower_bounds, X.shape[1], X.dtype)
         upper_bounds = check_bounds(self.upper_bounds, X.shape[1], X.dtype)
@@ -2961,7 +2958,7 @@ def fit(
             self.start_params,
             n_cols=X.shape[1],
             fit_intercept=self.fit_intercept,
-            _dtype=_dtype,
+            dtype=[np.float64, np.float32],
         )
 
         # 1.4 additional validations ##########################################
diff --git a/src/glum/_glm_cv.py b/src/glum/_glm_cv.py
@@ -37,7 +37,7 @@ class GeneralizedLinearRegressorCV(GeneralizedLinearRegressorBase):
         If you pass ``l1_ratio`` as an array, the ``fit`` method will choose the
         best value of ``l1_ratio`` and store it as ``self.l1_ratio``.
 
-    P1 : {'identity', array-like}, shape (n_features,), optional (default='identity')
+    P1 : {'identity', array-like, None}, shape (n_features,), optional (default='identity')
         This array controls the strength of the regularization for each coefficient
         independently. A high value will lead to higher regularization while a value of
         zero will remove the regularization on this parameter.
@@ -46,20 +46,20 @@ class GeneralizedLinearRegressorCV(GeneralizedLinearRegressorBase):
         the penalty of the categorical column will be applied to all the levels of
         the categorical.
 
-    P2 : {'identity', array-like, sparse matrix}, shape (n_features,) \
+    P2 : {'identity', array-like, sparse matrix, None}, shape (n_features,) \
             or (n_features, n_features), optional (default='identity')
         With this option, you can set the P2 matrix in the L2 penalty
         ``w*P2*w``. This gives a fine control over this penalty (Tikhonov
         regularization). A 2d array is directly used as the square matrix P2. A
         1d array is interpreted as diagonal (square) matrix. The default
-        ``'identity'`` sets the identity matrix, which gives the usual squared
-        L2-norm. If you just want to exclude certain coefficients, pass a 1d
-        array filled with 1 and 0 for the coefficients to be excluded. Note that
-        P2 must be positive semi-definite. If ``X`` is a pandas DataFrame
-        with a categorical dtype and P2 has the same size as the number of columns,
-        the penalty of the categorical column will be applied to all the levels of
-        the categorical. Note that if P2 is two-dimensional, its size needs to be
-        of the same length as the expanded ``X`` matrix.
+        ``'identity'`` and ``None`` set the identity matrix, which gives the usual
+        squared L2-norm. If you just want to exclude certain coefficients, pass a 1d
+        array filled with 1 and 0 for the coefficients to be excluded. Note that P2 must
+        be positive semi-definite. If ``X`` is a pandas DataFrame with a categorical
+        dtype and P2 has the same size as the number of columns, the penalty of the
+        categorical column will be applied to all the levels of the categorical. Note
+        that if P2 is two-dimensional, its size needs to be of the same length as the
+        expanded ``X`` matrix.
 
     fit_intercept : bool, optional (default=True)
         Specifies if a constant (a.k.a. bias or intercept) should be
@@ -74,7 +74,8 @@ class GeneralizedLinearRegressorCV(GeneralizedLinearRegressorBase):
         specify it in parentheses (e.g., ``'tweedie (1.5)'``). The same applies
         for ``'negative.binomial'`` and theta parameter.
 
-    link : {'auto', 'identity', 'log', 'logit', 'cloglog'} or Link, optional (default='auto')
+    link : {'auto', 'identity', 'log', 'logit', 'cloglog'}, Link or None, \
+            optional (default='auto')
         The link function of the GLM, i.e. mapping from linear
         predictor (``X * coef``) to expectation (``mu``). Option ``'auto'`` sets
         the link depending on the chosen family as follows:
@@ -84,11 +85,11 @@ class GeneralizedLinearRegressorCV(GeneralizedLinearRegressorBase):
           ``'inverse.gaussian'`` and ``'negative.binomial'``.
         - ``'logit'`` for family ``'binomial'``
 
-    solver : {'auto', 'irls-cd', 'irls-ls', 'lbfgs'}, optional (default='auto')
+    solver : {'auto', 'irls-cd', 'irls-ls', 'lbfgs', 'trust-constr'}, \
+            optional (default='auto')
         Algorithm to use in the optimization problem:
 
-        - ``'auto'``: ``'irls-ls'`` if ``l1_ratio`` is zero and ``'irls-cd'``
-          otherwise.
+        - ``'auto'``: ``'irls-ls'`` if ``l1_ratio`` is zero and ``'irls-cd'`` otherwise.
         - ``'irls-cd'``: Iteratively reweighted least squares with a coordinate
           descent inner solver. This can deal with L1 as well as L2 penalties.
           Note that in order to avoid unnecessary memory duplication of X in the
@@ -291,7 +292,7 @@ def __init__(
         fit_intercept=True,
         family: Union[str, ExponentialDispersionModel] = "normal",
         link: Union[str, Link] = "auto",
-        solver="auto",
+        solver: str = "auto",
         max_iter=100,
         gradient_tol: Optional[float] = None,
         step_size_tol: Optional[float] = None,
@@ -525,12 +526,11 @@ def _get_deviance(coef):
             ):
                 assert isinstance(self._link_instance, LogLink)
 
-            _dtype = [np.float64, np.float32]
             start_params = initialize_start_params(
                 self.start_params,
                 n_cols=X.shape[1],
                 fit_intercept=self.fit_intercept,
-                _dtype=_dtype,
+                dtype=[np.float64, np.float32],
             )
 
             P1_no_alpha = setup_p1(P1, X, X.dtype, 1, l1)
@@ -690,7 +690,7 @@ def _get_deviance(coef):
             self.start_params,
             n_cols=X.shape[1],
             fit_intercept=self.fit_intercept,
-            _dtype=X.dtype,
+            dtype=X.dtype,
         )
 
         coef = self._get_start_coef(