Support for sklearn 1.6 conformance testing (#2465)

ethanglaser · Alexsandruss · web-flow · commit a5b4e69d5548 · 2025-07-01T13:36:28.000-07:00
* tweaks to logistic_path to match sklearn

* follow ups

* sklearn 1.4 gpu conformance fix attempt

* restore svm changes and move ridge checks to sklearnex

* update deselected tests file

* add 4 more deselections

* apply black formatting

* xpass/xfail deselections...I give up

* one more xfail

---------

Co-authored-by: Alexander Andreev &lt;alexander.andreev@intel.com&gt;
diff --git a/daal4py/sklearn/linear_model/logistic_path.py b/daal4py/sklearn/linear_model/logistic_path.py
@@ -359,14 +359,16 @@ def __logistic_regression_path(
             y_bin = np.ones(y.shape, dtype=X.dtype)
             # for compute_class_weight
 
-            if solver in ["lbfgs", "newton-cg"]:
+            if solver == "liblinear" or (
+                not sklearn_check_version("1.6") and solver not in ["lbfgs", "newton-cg"]
+            ):
+                mask_classes = np.array([-1, 1])
+                y_bin[~mask] = -1.0
+            else:
                 # HalfBinomialLoss, used for those solvers, represents y in [0, 1] instead
                 # of in [-1, 1].
                 mask_classes = np.array([0, 1])
                 y_bin[~mask] = 0.0
-            else:
-                mask_classes = np.array([-1, 1])
-                y_bin[~mask] = -1.0
         else:
             mask_classes = np.array([-1, 1])
             mask = y == pos_class
@@ -388,7 +390,11 @@ def __logistic_regression_path(
 
     else:
         if sklearn_check_version("1.1"):
-            if solver in ["sag", "saga", "lbfgs", "newton-cg"]:
+            if sklearn_check_version("1.6"):
+                solver_list = ["sag", "saga", "lbfgs", "newton-cg", "newton-cholesky"]
+            else:
+                solver_list = ["sag", "saga", "lbfgs", "newton-cg"]
+            if solver in solver_list:
                 # SAG, lbfgs and newton-cg multinomial solvers need LabelEncoder,
                 # not LabelBinarizer, i.e. y as a 1d-array of integers.
                 # LabelEncoder also saves memory compared to LabelBinarizer, especially
@@ -488,7 +494,11 @@ def __logistic_regression_path(
 
     if multi_class == "multinomial":
         # fmin_l_bfgs_b and newton-cg accepts only ravelled parameters.
-        if solver in ["lbfgs", "newton-cg"]:
+        if sklearn_check_version("1.6"):
+            solver_list = ["lbfgs", "newton-cg", "newton-cholesky"]
+        else:
+            solver_list = ["lbfgs", "newton-cg"]
+        if solver in solver_list:
             if _dal_ready and classes.size == 2:
                 w0 = w0[-1:, :]
             if sklearn_check_version("1.1"):
@@ -753,7 +763,11 @@ def _func_(x, *args):
             else:
                 n_classes = max(2, classes.size)
                 if sklearn_check_version("1.1"):
-                    if solver in ["lbfgs", "newton-cg"]:
+                    if sklearn_check_version("1.6"):
+                        solver_list = ["lbfgs", "newton-cg", "newton-cholesky"]
+                    else:
+                        solver_list = ["lbfgs", "newton-cg"]
+                    if solver in solver_list:
                         multi_w0 = np.reshape(w0, (n_classes, -1), order="F")
                     else:
                         multi_w0 = w0
diff --git a/deselected_tests.yaml b/deselected_tests.yaml
@@ -342,6 +342,21 @@ deselected_tests:
   # to CI parameters, as parameter validation is globally handled in sklearn version 1.2 onward
   - cluster/tests/test_dbscan.py::test_dbscan_params_validation
 
+  # From sklearn 1.6, need to resolve logreg bug from joblib with_parallel_backend.
+  # Removal of this deselection will result in test_logistic fails (this one will pass).
+  - feature_selection/tests/test_rfe.py::test_rfe_with_joblib_threading_backend
+  # Failing tests since sklearn 1.6
+  - tests/test_common.py::test_estimators[CalibratedClassifierCV(cv=3,estimator=LogisticRegression(C=1))-check_sample_weight_equivalence_on_dense_data]
+  - tests/test_common.py::test_estimators[ExtraTreesClassifier(n_estimators=5)-check_sample_weight_equivalence_on_dense_data]
+  - tests/test_common.py::test_estimators[ExtraTreesRegressor(n_estimators=5)-check_sample_weight_equivalence_on_dense_data]
+  - utils/tests/test_estimator_checks.py::test_xfail_count_with_no_fast_fail
+  # XFail vs XPass differs between scikit-learn and scikit-learn-intelex since 1.6
+  - tests/test_common.py::test_estimators[LinearRegression()-check_sample_weight_equivalence_on_dense_data] <1.7
+  - tests/test_common.py::test_estimators[LogisticRegression(max_iter=5)-check_sample_weight_equivalence_on_dense_data]
+  - tests/test_common.py::test_estimators[LogisticRegression(max_iter=5,solver='newton-cg')-check_sample_weight_equivalence_on_dense_data]
+  - tests/test_common.py::test_estimators[NuSVC()-check_class_weight_classifiers]
+  - tests/test_common.py::test_estimators[CalibratedClassifierCV(estimator=LogisticRegression(C=1))-check_sample_weights_invariance(kind=ones)]
+
   # --------------------------------------------------------
   # No need to test daal4py patching
 reduced_tests:
diff --git a/onedal/linear_model/linear_model.py b/onedal/linear_model/linear_model.py
@@ -340,8 +340,4 @@ def fit(self, X, y, queue=None):
             packed_coefficients[:, 0],
         )
 
-        if self.coef_.shape[0] == 1 and y.ndim == 1:
-            self.coef_ = self.coef_.ravel()
-            self.intercept_ = self.intercept_[0]
-
         return self
diff --git a/sklearnex/linear_model/ridge.py b/sklearnex/linear_model/ridge.py
@@ -325,6 +325,15 @@ def _onedal_fit(self, X, y, sample_weight, queue=None):
             self._onedal_estimator.fit(X, y, queue=queue)
             self._save_attributes()
 
+            if sklearn_check_version("1.6"):
+                if y.ndim == 1 or y.shape[1] == 1:
+                    self.coef_ = self.coef_.ravel()
+                    self.intercept_ = self.intercept_[0]
+            else:
+                if self.coef_.shape[0] == 1 and y.ndim == 1:
+                    self.coef_ = self.coef_.ravel()
+                    self.intercept_ = self.intercept_[0]
+
         def _onedal_predict(self, X, queue=None):
             X = validate_data(self, X, accept_sparse=False, reset=False)
 

Original file line number	Diff line number	Diff line change
`@@ -340,8 +340,4 @@ def fit(self, X, y, queue=None):`
`340`	`340`	`packed_coefficients[:, 0],`
`341`	`341`	`)`
`342`	`342`
`343`		`- if self.coef_.shape[0] == 1 and y.ndim == 1:`
`344`		`- self.coef_ = self.coef_.ravel()`
`345`		`- self.intercept_ = self.intercept_[0]`
`346`		`-`
`347`	`343`	`return self`