Combine parsing of initial values (#805)

lbittarello · web-flow · commit c8923e0ccb60 · 2024-06-13T09:30:56.000+01:00
diff --git a/src/glum/_glm.py b/src/glum/_glm.py
@@ -38,7 +38,6 @@
     _assert_all_finite,
     check_consistent_length,
     check_is_fitted,
-    check_random_state,
     column_or_1d,
 )
 
@@ -99,7 +98,7 @@ class WaldTestResult(NamedTuple):
     df: int
 
 
-def check_array_tabmat_compliant(mat: ArrayLike, drop_first: int = False, **kwargs):
+def check_array_tabmat_compliant(mat: ArrayLike, drop_first: bool = False, **kwargs):
     to_copy = kwargs.get("copy", False)
 
     if isinstance(mat, pd.DataFrame):
@@ -654,33 +653,6 @@ def _setup_sparse_p2(P2):
     return P2
 
 
-def initialize_start_params(
-    start_params: Optional[np.ndarray], n_cols: int, fit_intercept: bool, dtype
-) -> Optional[np.ndarray]:
-    if start_params is None:
-        return None
-
-    start_params = check_array(
-        start_params,
-        accept_sparse=False,
-        force_all_finite=True,
-        ensure_2d=False,
-        dtype=dtype,
-        copy=True,
-    )
-
-    start_params = cast(np.ndarray, start_params)
-
-    if start_params.shape != (n_cols + fit_intercept,):
-        raise ValueError(
-            "Start values for parameters must have the right length and dimension; "
-            f"got (length={start_params.shape[0]}, ndim={start_params.ndim}); "
-            f"needed (length={n_cols + fit_intercept}, ndim=1)."
-        )
-
-    return start_params
-
-
 def is_pos_semidef(p: Union[sparse.spmatrix, np.ndarray]) -> Union[bool, np.bool_]:
     """
     Checks for positive semidefiniteness of ``p`` if ``p`` is a matrix, or
@@ -833,13 +805,13 @@ def link_instance(self) -> Link:
 
     def _get_start_coef(
         self,
-        start_params,
         X: Union[tm.MatrixBase, tm.StandardizedMatrix],
         y: np.ndarray,
         sample_weight: np.ndarray,
         offset: Optional[np.ndarray],
-        col_means: Optional[np.ndarray],
+        col_means: np.ndarray,
         col_stds: Optional[np.ndarray],
+        dtype,
     ) -> np.ndarray:
         if self.warm_start and hasattr(self, "coef_"):
             coef = self.coef_  # type: ignore
@@ -849,7 +821,7 @@ def _get_start_coef(
             if self._center_predictors:
                 _standardize_warm_start(coef, col_means, col_stds)  # type: ignore
 
-        elif start_params is None:
+        elif self.start_params is None:
             if self.fit_intercept:
                 coef = np.zeros(
                     X.shape[1] + 1, dtype=_float_itemsize_to_dtype[X.dtype.itemsize]
@@ -863,13 +835,28 @@ def _get_start_coef(
                 )
 
         else:  # assign given array as start values
-            coef = start_params
+            coef = check_array(
+                self.start_params,
+                accept_sparse=False,
+                force_all_finite=True,
+                ensure_2d=False,
+                dtype=dtype,
+                copy=True,
+            )
+
+            if coef.shape != (len(col_means) + self.fit_intercept,):
+                raise ValueError(
+                    "Start values for parameters must have the right length "
+                    f"and dimension; got {coef.shape}, needed "
+                    f"({len(col_means) + self.fit_intercept},)."
+                )
+
             if self._center_predictors:
                 _standardize_warm_start(coef, col_means, col_stds)  # type: ignore
 
         # If starting values are outside the specified bounds (if set),
         # bring the starting value exactly at the bound.
-        idx = 1 if self.fit_intercept else 0
+        idx = int(self.fit_intercept)
         if self.lower_bounds is not None:
             if np.any(coef[idx:] < self.lower_bounds):
                 warnings.warn(
@@ -970,8 +957,6 @@ def _set_up_for_fit(self, y: np.ndarray) -> None:
         else:
             self._gradient_tol = self.gradient_tol
 
-        self._random_state = check_random_state(self.random_state)
-
         # 1.4 additional validations ##########################################
         if self.check_input:
             if not np.all(self._family_instance.in_y_range(y)):
@@ -980,12 +965,6 @@ def _set_up_for_fit(self, y: np.ndarray) -> None:
                     f"{self._family_instance.__class__.__name__}."
                 )
 
-    def _tear_down_from_fit(self):
-        """
-        Delete attributes that were only needed for the fit method.
-        """
-        del self._random_state
-
     def _get_alpha_path(
         self,
         P1_no_alpha: np.ndarray,
@@ -1083,8 +1062,8 @@ def _solve(
         b_ineq: Optional[np.ndarray],
     ) -> np.ndarray:
         """
-        Must be run after running :func:`_set_up_for_fit` and before running
-        :func:`_tear_down_from_fit`. Sets ``self.coef_`` and ``self.intercept_``.
+        Must be run after running :func:`_set_up_for_fit`. Sets
+        ``self.coef_`` and ``self.intercept_``.
         """
         fixed_inner_tol = None
         if (
@@ -1527,7 +1506,7 @@ def coef_table(
         captured_context = capture_context(
             context + 1 if isinstance(context, int) else context
         )
-        if (X is None) and not hasattr(self, "covariance_matrix_"):
+        if (X is None) and (getattr(self, "covariance_matrix_", None) is None):
             return pd.Series(beta, index=names, name="coef")
 
         covariance_matrix = self.covariance_matrix(
@@ -2374,10 +2353,9 @@ def _should_copy_X(self):
     def _set_up_and_check_fit_args(
         self,
         X: ArrayLike,
-        y: ArrayLike,
+        y: Optional[ArrayLike],
         sample_weight: Optional[VectorLike],
         offset: Optional[VectorLike],
-        solver: str,
         force_all_finite,
         context: Optional[Mapping[str, Any]] = None,
     ) -> tuple[
@@ -2390,7 +2368,7 @@ def _set_up_and_check_fit_args(
         Union[str, np.ndarray],
     ]:
         dtype = [np.float64, np.float32]
-        stype = ["csc"] if solver == "irls-cd" else ["csc", "csr"]
+        stype = ["csc"] if self.solver == "irls-cd" else ["csc", "csr"]
 
         P1 = self.P1
         P2 = self.P2
@@ -2418,8 +2396,8 @@ def _set_up_and_check_fit_args(
                         context=context,
                     )
 
-                    self.y_model_spec_ = y.model_spec
-                    y = y.toarray().ravel()
+                    self.y_model_spec_ = y.model_spec  # type: ignore
+                    y = y.toarray().ravel()  # type: ignore
 
                 X = tm.from_formula(
                     formula=rhs,
@@ -3128,7 +3106,6 @@ def fit(
             y,
             sample_weight,
             offset,
-            solver=self.solver,
             force_all_finite=self.force_all_finite,
             context=captured_context,
         )
@@ -3154,13 +3131,6 @@ def fit(
             if np.any(lower_bounds > upper_bounds):
                 raise ValueError("Upper bounds must be higher than lower bounds.")
 
-        start_params = initialize_start_params(
-            self.start_params,
-            n_cols=X.shape[1],
-            fit_intercept=self.fit_intercept,
-            dtype=[np.float64, np.float32],
-        )
-
         # 1.4 additional validations ##########################################
         if self.check_input:
             # check if P2 is positive semidefinite
@@ -3204,7 +3174,13 @@ def fit(
         #######################################################################
 
         coef = self._get_start_coef(
-            start_params, X, y, sample_weight, offset, col_means, col_stds
+            X,
+            y,
+            sample_weight,
+            offset,
+            col_means,
+            col_stds,
+            dtype=[np.float64, np.float32],
         )
 
         #######################################################################
@@ -3291,8 +3267,6 @@ def fit(
                     col_means, col_stds, 0.0, coef
                 )
 
-        self._tear_down_from_fit()
-
         self.covariance_matrix_ = None
         if store_covariance_matrix:
             self.covariance_matrix(
diff --git a/src/glum/_glm_cv.py b/src/glum/_glm_cv.py
@@ -15,7 +15,6 @@
     _standardize,
     _unstandardize,
     check_bounds,
-    initialize_start_params,
     is_pos_semidef,
     setup_p1,
     setup_p2,
@@ -500,7 +499,6 @@ def fit(
             y,
             sample_weight,
             offset,
-            solver=self.solver,
             force_all_finite=self.force_all_finite,
             context=captured_context,
         )
@@ -588,13 +586,6 @@ def _get_deviance(coef):
             ):
                 assert isinstance(self._link_instance, LogLink)
 
-            start_params = initialize_start_params(
-                self.start_params,
-                n_cols=X.shape[1],
-                fit_intercept=self.fit_intercept,
-                dtype=[np.float64, np.float32],
-            )
-
             P1_no_alpha = setup_p1(P1, X, X.dtype, 1, l1)
             P2_no_alpha = setup_p2(P2, X, _stype, X.dtype, 1, l1)
 
@@ -620,13 +611,13 @@ def _get_deviance(coef):
             )
 
             coef = self._get_start_coef(
-                start_params,
                 x_train,
                 y_train,
                 w_train,
                 offset_train,
                 col_means,
                 col_stds,
+                dtype=[np.float64, np.float32],
             )
 
             if self.check_input:
@@ -748,15 +739,8 @@ def _get_deviance(coef):
             P2,
         )
 
-        start_params = initialize_start_params(
-            self.start_params,
-            n_cols=X.shape[1],
-            fit_intercept=self.fit_intercept,
-            dtype=X.dtype,
-        )
-
         coef = self._get_start_coef(
-            start_params, X, y, sample_weight, offset, col_means, col_stds
+            X, y, sample_weight, offset, col_means, col_stds, dtype=X.dtype
         )
 
         coef = self._solve(
@@ -781,8 +765,6 @@ def _get_deviance(coef):
             # set intercept to zero as the other linear models do
             self.intercept_, self.coef_ = _unstandardize(col_means, col_stds, 0.0, coef)
 
-        self._tear_down_from_fit()
-
         self.covariance_matrix_ = None
         if store_covariance_matrix:
             self.covariance_matrix(