API remove is_classif attribute of GeneralizedLinearModel (#66)

mathurinm · QB3 · web-flow · commit c92c119b718a · 2022-09-08T08:23:08.000+02:00
Co-authored-by: QB3 &lt;quentin.bertrand@inria.fr&gt;
diff --git a/doc/add.rst b/doc/add.rst
@@ -4,13 +4,14 @@
 
 With skglm, you can solve any custom Generalized Linear Model with arbitrary smooth datafit and arbitrary proximable penalty, by defining two classes: a ``Penalty`` and a ``Datafit``.
 
-They can then be passed to a :class:`~skglm.GeneralizedLinearEstimator`, using ``is_classif`` to specify if the task is classification or regression.
+They can then be passed to a :class:`~skglm.GeneralizedLinearEstimator`.
 
 
 .. code-block:: python
 
    clf = GeneralizedLinearEstimator(
-      MyDatafit(), MyPenalty(), is_classif=True
+      MyDatafit(),
+      MyPenalty(),
    )
 
 
diff --git a/doc/conf.py b/doc/conf.py
@@ -91,7 +91,7 @@
 #
 # This is also used if you do content translation via gettext catalogs.
 # Usually you set "language" from the command line for these cases.
-language = None
+language = 'en'
 
 # There are two options for replacing |today|: either, you set today to some
 # non-false value, then it is used:
diff --git a/examples/plot_logreg_various_penalties.py b/examples/plot_logreg_various_penalties.py
@@ -35,13 +35,17 @@
 alpha = 0.005
 gamma = 3.0
 l1_ratio = 0.3
-clf_enet = GeneralizedLinearEstimator(Logistic(), L1_plus_L2(alpha, l1_ratio),
-                                      is_classif=True, verbose=0)
+clf_enet = GeneralizedLinearEstimator(
+    Logistic(),
+    L1_plus_L2(alpha, l1_ratio),
+    )
 y_pred_enet = clf_enet.fit(X_train, y_train).predict(X_test)
 f1_score_enet = f1_score(y_test, y_pred_enet)
 
-clf_mcp = GeneralizedLinearEstimator(Logistic(), MCPenalty(alpha, gamma),
-                                     is_classif=True, verbose=0)
+clf_mcp = GeneralizedLinearEstimator(
+    Logistic(),
+    MCPenalty(alpha, gamma),
+    )
 y_pred_mcp = clf_mcp.fit(X_train, y_train).predict(X_test)
 f1_score_mcp = f1_score(y_test, y_pred_mcp)
 
diff --git a/skglm/estimators.py b/skglm/estimators.py
@@ -26,9 +26,7 @@
 
 
 def _glm_fit(X, y, model, datafit, penalty):
-    is_classif = False
-    if isinstance(datafit, Logistic) or isinstance(datafit, QuadraticSVC):
-        is_classif = True
+    is_classif = isinstance(datafit, (Logistic, QuadraticSVC))
 
     if is_classif:
         check_classification_targets(y)
@@ -185,10 +183,6 @@ class GeneralizedLinearEstimator(LinearModel):
         Penalty. If None, `penalty` is initialized as a `L1` penalty.
         `penalty` is replaced by a JIT-compiled instance when calling fit.
 
-    is_classif : bool, optional
-        Whether the task is classification or regression. Used for input target
-        validation.
-
     max_iter : int, optional
         The maximum number of iterations (subproblem definitions).
 
@@ -229,11 +223,10 @@ class GeneralizedLinearEstimator(LinearModel):
         Number of subproblems solved to reach the specified tolerance.
     """
 
-    def __init__(self, datafit=None, penalty=None, is_classif=False, max_iter=100,
+    def __init__(self, datafit=None, penalty=None, max_iter=100,
                  max_epochs=50_000, p0=10, tol=1e-4, fit_intercept=True,
                  warm_start=False, ws_strategy="subdiff", verbose=0):
         super(GeneralizedLinearEstimator, self).__init__()
-        self.is_classif = is_classif
         self.tol = tol
         self.max_iter = max_iter
         self.fit_intercept = fit_intercept
@@ -254,9 +247,9 @@ def __repr__(self):
             String representation.
         """
         return (
-            'GeneralizedLinearEstimator(datafit=%s, penalty=%s, alpha=%s, classif=%s)'
+            'GeneralizedLinearEstimator(datafit=%s, penalty=%s, alpha=%s)'
             % (self.datafit.__class__.__name__, self.penalty.__class__.__name__,
-               self.penalty.alpha, self.is_classif))
+               self.penalty.alpha))
 
     def fit(self, X, y):
         """Fit estimator.
@@ -300,7 +293,7 @@ def predict(self, X):
         y_pred : array, shape (n_samples)
             Contain the target values for each sample.
         """
-        if self.is_classif:
+        if isinstance(self.datafit, (Logistic, QuadraticSVC)):
             scores = self._decision_function(X).ravel()
             if len(scores.shape) == 1:
                 indices = (scores > 0).astype(int)
diff --git a/skglm/tests/test_estimators.py b/skglm/tests/test_estimators.py
@@ -154,26 +154,25 @@ def test_mtl_path():
 
 
 # Test if GeneralizedLinearEstimator returns the correct coefficients
-@pytest.mark.parametrize("Datafit, Penalty, is_classif, Estimator, pen_args", [
-    (Quadratic, L1, False, Lasso, [alpha]),
-    (Quadratic, WeightedL1, False, WeightedLasso,
+@pytest.mark.parametrize("Datafit, Penalty, Estimator, pen_args", [
+    (Quadratic, L1, Lasso, [alpha]),
+    (Quadratic, WeightedL1, WeightedLasso,
      [alpha, np.random.choice(3, n_features)]),
-    (Quadratic, L1_plus_L2, False, ElasticNet, [alpha, 0.3]),
-    (Quadratic, MCPenalty, False, MCPRegression, [alpha, 3]),
-    (QuadraticSVC, IndicatorBox, True, LinearSVC, [alpha]),
-    (Logistic, L1, True, SparseLogisticRegression, [alpha]),
+    (Quadratic, L1_plus_L2, ElasticNet, [alpha, 0.3]),
+    (Quadratic, MCPenalty, MCPRegression, [alpha, 3]),
+    (QuadraticSVC, IndicatorBox, LinearSVC, [alpha]),
+    (Logistic, L1, SparseLogisticRegression, [alpha]),
 ])
 @pytest.mark.parametrize('fit_intercept', [True, False])
-def test_generic_estimator(
-        fit_intercept, Datafit, Penalty, is_classif, Estimator, pen_args):
+def test_generic_estimator(fit_intercept, Datafit, Penalty, Estimator, pen_args):
     if isinstance(Datafit(), QuadraticSVC) and fit_intercept:
         pytest.xfail()
     elif Datafit == Logistic and fit_intercept:
         pytest.xfail("TODO support intercept in Logistic datafit")
     else:
         target = Y if Datafit == QuadraticMultiTask else y
         gle = GeneralizedLinearEstimator(
-            Datafit(), Penalty(*pen_args), is_classif, tol=1e-10,
+            Datafit(), Penalty(*pen_args), tol=1e-10,
             fit_intercept=fit_intercept).fit(X, target)
         est = Estimator(
             *pen_args, tol=1e-10, fit_intercept=fit_intercept).fit(X, target)
@@ -201,7 +200,7 @@ def test_estimator_predict(Datafit, Penalty, Estimator_sk):
     }
     X_test = np.random.normal(0, 1, (n_samples, n_features))
     clf = GeneralizedLinearEstimator(
-        Datafit(), Penalty(1.), is_classif, fit_intercept=False, tol=tol).fit(X, y)
+        Datafit(), Penalty(1.), fit_intercept=False, tol=tol).fit(X, y)
     clf_sk = Estimator_sk(**estim_args[Estimator_sk]).fit(X, y)
     y_pred = clf.predict(X_test)
     y_pred_sk = clf_sk.predict(X_test)
@@ -221,8 +220,8 @@ def assert_deep_dict_equal(expected_attr, estimator):
             else:
                 assert v == v_est
 
-    reg = GeneralizedLinearEstimator(Quadratic(), L1(4.), is_classif=False)
-    clf = GeneralizedLinearEstimator(Logistic(), MCPenalty(2., 3.), is_classif=True)
+    reg = GeneralizedLinearEstimator(Quadratic(), L1(4.))
+    clf = GeneralizedLinearEstimator(Logistic(), MCPenalty(2., 3.))
 
     # Xty and lipschitz attributes are defined for jit compiled classes
     # hence they are not included in the test

Original file line number	Diff line number	Diff line change
`@@ -91,7 +91,7 @@`
`91`	`91`	`#`
`92`	`92`	`# This is also used if you do content translation via gettext catalogs.`
`93`	`93`	`# Usually you set "language" from the command line for these cases.`
`94`		`-language = None`
	`94`	`+language = 'en'`
`95`	`95`
`96`	`96`	`# There are two options for replacing \|today\|: either, you set today to some`
`97`	`97`	`# non-false value, then it is used:`