diff --git a/src/skmatter/_selection.py b/src/skmatter/_selection.py
index 6869a2235..50e55cff3 100644
--- a/src/skmatter/_selection.py
+++ b/src/skmatter/_selection.py
@@ -83,8 +83,13 @@
 from scipy.sparse.linalg import eigsh
 from sklearn.base import BaseEstimator, MetaEstimatorMixin
 from sklearn.feature_selection._base import SelectorMixin
-from sklearn.utils import check_array, check_random_state, check_X_y, safe_mask
-from sklearn.utils.validation import FLOAT_DTYPES, as_float_array, check_is_fitted
+from sklearn.utils import check_random_state, safe_mask
+from sklearn.utils.validation import (
+    FLOAT_DTYPES,
+    as_float_array,
+    check_is_fitted,
+    validate_data,
+)
 
 from .utils import (
     X_orthogonalizer,
@@ -157,11 +162,6 @@ def __init__(
         self.n_to_select = n_to_select
         self.score_threshold = score_threshold
         self.score_threshold_type = score_threshold_type
-        if self.score_threshold_type not in ["relative", "absolute"]:
-            raise ValueError(
-                "invalid score_threshold_type, expected one of 'relative' or 'absolute'"
-            )
-
         self.full = full
         self.progress_bar = progress_bar
         self.random_state = random_state
@@ -184,6 +184,11 @@ def fit(self, X, y=None, warm_start=False):
         -------
         self : object
         """
+        if self.score_threshold_type not in ["relative", "absolute"]:
+            raise ValueError(
+                "invalid score_threshold_type, expected one of 'relative' or 'absolute'"
+            )
+
         if self.selection_type == "feature":
             self._axis = 1
         elif self.selection_type == "sample":
@@ -205,7 +210,7 @@ def fit(self, X, y=None, warm_start=False):
 
         if hasattr(self, "mixing") or y is not None:
             X, y = self._validate_data(X, y, **params)
-            X, y = check_X_y(X, y, multi_output=True)
+            X, y = validate_data(self, X, y, multi_output=True)
 
             if len(y.shape) == 1:
                 # force y to have multi_output 2D format even when it's 1D, since
@@ -214,7 +219,7 @@ def fit(self, X, y=None, warm_start=False):
                 y = y.reshape((len(y), 1))
 
         else:
-            X = check_array(X, **params)
+            X = validate_data(self, X, **params)
 
         if self.full and self.score_threshold is not None:
             raise ValueError(
@@ -308,7 +313,7 @@ def transform(self, X, y=None):
 
         mask = self.get_support()
 
-        X = check_array(X)
+        X = validate_data(self, X, reset=False)
 
         if len(X.shape) == 1:
             if self._axis == 0:
@@ -486,6 +491,11 @@ def _more_tags(self):
             "requires_y": False,
         }
 
+    def __sklearn_tags__(self):
+        tags = super().__sklearn_tags__()
+        tags.target_tags.required = False
+        return tags
+
 
 class _CUR(GreedySelector):
     """Transformer that performs Greedy Selection by choosing features
@@ -560,6 +570,8 @@ def score(self, X, y=None):
         score : numpy.ndarray of (n_to_select_from_)
             :math:`\pi` importance for the given samples or features
         """
+        X, y = validate_data(self, X, y, reset=False)
+
         return self.pi_
 
     def _init_greedy_search(self, X, y, n_to_select):
@@ -734,6 +746,8 @@ def score(self, X, y=None):
         score : numpy.ndarray of (n_to_select_from_)
             :math:`\pi` importance for the given samples or features
         """
+        X, y = validate_data(self, X, y, reset=False)
+
         return self.pi_
 
     def _init_greedy_search(self, X, y, n_to_select):
@@ -927,6 +941,8 @@ def score(self, X, y=None):
         -------
         hausdorff : Hausdorff distances
         """
+        X, y = validate_data(self, X, y, reset=False)
+
         return self.hausdorff_
 
     def get_distance(self):
@@ -1048,11 +1064,6 @@ def __init__(
         full=False,
         random_state=0,
     ):
-        if mixing == 1.0:
-            raise ValueError(
-                "Mixing = 1.0 corresponds to traditional FPS."
-                "Please use the FPS class."
-            )
 
         self.mixing = mixing
         self.initialize = initialize
@@ -1067,6 +1078,16 @@ def __init__(
             random_state=random_state,
         )
 
+    def fit(self, X, y=None, warm_start=False):
+
+        if self.mixing == 1.0:
+            raise ValueError(
+                "Mixing = 1.0 corresponds to traditional FPS."
+                "Please use the FPS class."
+            )
+
+        return super().fit(X, y)
+
     def score(self, X, y=None):
         """Returns the Hausdorff distances of all samples to previous selections.
 
@@ -1083,6 +1104,8 @@ def score(self, X, y=None):
         -------
         hausdorff : Hausdorff distances
         """
+        X, y = validate_data(self, X, y, reset=False)
+
         return self.hausdorff_
 
     def get_distance(self):
@@ -1159,3 +1182,8 @@ def _more_tags(self):
         return {
             "requires_y": True,
         }
+
+    def __sklearn_tags__(self):
+        tags = super().__sklearn_tags__()
+        tags.target_tags.required = True
+        return tags
diff --git a/src/skmatter/decomposition/_kernel_pcovr.py b/src/skmatter/decomposition/_kernel_pcovr.py
index 84a9439e1..65fe39a3e 100644
--- a/src/skmatter/decomposition/_kernel_pcovr.py
+++ b/src/skmatter/decomposition/_kernel_pcovr.py
@@ -9,10 +9,10 @@
 from sklearn.kernel_ridge import KernelRidge
 from sklearn.linear_model._base import LinearModel
 from sklearn.metrics.pairwise import pairwise_kernels
-from sklearn.utils import check_array, check_random_state
+from sklearn.utils import check_random_state
 from sklearn.utils._arpack import _init_arpack_v0
 from sklearn.utils.extmath import randomized_svd, stable_cumsum, svd_flip
-from sklearn.utils.validation import check_is_fitted, check_X_y
+from sklearn.utils.validation import check_is_fitted, validate_data
 
 from ..preprocessing import KernelNormalizer
 from ..utils import check_krr_fit, pcovr_kernel
@@ -270,7 +270,7 @@ def fit(self, X, Y, W=None):
         ):
             raise ValueError("Regressor must be an instance of `KernelRidge`")
 
-        X, Y = check_X_y(X, Y, y_numeric=True, multi_output=True)
+        X, Y = validate_data(self, X, Y, y_numeric=True, multi_output=True)
         self.X_fit_ = X.copy()
 
         if self.n_components is None:
@@ -387,7 +387,7 @@ def predict(self, X=None):
         """Predicts the property values"""
         check_is_fitted(self, ["pky_", "pty_"])
 
-        X = check_array(X)
+        X = validate_data(self, X, reset=False)
         K = self._get_kernel(X, self.X_fit_)
         if self.center:
             K = self.centerer_.transform(K)
@@ -408,7 +408,7 @@ def transform(self, X):
         """
         check_is_fitted(self, ["pkt_", "X_fit_"])
 
-        X = check_array(X)
+        X = validate_data(self, X, reset=False)
         K = self._get_kernel(X, self.X_fit_)
 
         if self.center:
@@ -440,7 +440,7 @@ def inverse_transform(self, T):
         """
         return T @ self.ptx_
 
-    def score(self, X, Y):
+    def score(self, X, y):
         r"""Computes the (negative) loss values for KernelPCovR on the given predictor
         and response variables. The loss in :math:`\mathbf{K}`, as explained in
         [Helfrecht2020]_ does not correspond to a traditional Gram loss
@@ -474,7 +474,7 @@ def score(self, X, Y):
         """
         check_is_fitted(self, ["pkt_", "X_fit_"])
 
-        X = check_array(X)
+        X, y = validate_data(self, X, y, reset=False)
 
         K_NN = self._get_kernel(self.X_fit_, self.X_fit_)
         K_VN = self._get_kernel(X, self.X_fit_)
@@ -485,8 +485,8 @@ def score(self, X, Y):
             K_VN = self.centerer_.transform(K_VN)
             K_VV = self.centerer_.transform(K_VV)
 
-        y = K_VN @ self.pky_
-        Lkrr = np.linalg.norm(Y - y) ** 2 / np.linalg.norm(Y) ** 2
+        ypred = K_VN @ self.pky_
+        Lkrr = np.linalg.norm(y - ypred) ** 2 / np.linalg.norm(y) ** 2
 
         t_n = K_NN @ self.pkt_
         t_v = K_VN @ self.pkt_
diff --git a/src/skmatter/decomposition/_pcovr.py b/src/skmatter/decomposition/_pcovr.py
index ddaf3bebd..8cdd24680 100644
--- a/src/skmatter/decomposition/_pcovr.py
+++ b/src/skmatter/decomposition/_pcovr.py
@@ -10,10 +10,10 @@
 from sklearn.decomposition._pca import _infer_dimension
 from sklearn.linear_model import LinearRegression, Ridge, RidgeCV
 from sklearn.linear_model._base import LinearModel
-from sklearn.utils import check_array, check_random_state
+from sklearn.utils import check_random_state
 from sklearn.utils._arpack import _init_arpack_v0
 from sklearn.utils.extmath import randomized_svd, stable_cumsum, svd_flip
-from sklearn.utils.validation import check_is_fitted, check_X_y
+from sklearn.utils.validation import check_is_fitted, validate_data
 
 from ..utils import check_lr_fit, pcovr_covariance, pcovr_kernel
 
@@ -221,7 +221,7 @@ def fit(self, X, Y, W=None):
             Regression weights, optional when regressor=`precomputed`. If not
             passed, it is assumed that `W = np.linalg.lstsq(X, Y, self.tol)[0]`
         """
-        X, Y = check_X_y(X, Y, y_numeric=True, multi_output=True)
+        X, Y = validate_data(self, X, Y, y_numeric=True, multi_output=True)
 
         # saved for inverse transformations from the latent space,
         # should be zero in the case that the features have been properly centered
@@ -582,10 +582,10 @@ def predict(self, X=None, T=None):
             raise ValueError("Either X or T must be supplied.")
 
         if X is not None:
-            X = check_array(X)
+            X = validate_data(self, X, reset=False)
             return X @ self.pxy_
         else:
-            T = check_array(T)
+            T = validate_data(self, T, reset=False)
             return T @ self.pty_
 
     def transform(self, X=None):
@@ -604,7 +604,7 @@ def transform(self, X=None):
 
         return super().transform(X)
 
-    def score(self, X, Y, T=None):
+    def score(self, X, y, T=None):
         r"""Return the (negative) total reconstruction error for X and Y,
         defined as:
 
@@ -635,13 +635,15 @@ def score(self, X, Y, T=None):
             Negative sum of the loss in reconstructing X from the latent-space
             projection T and the loss in predicting Y from the latent-space projection T
         """
+        X, y = validate_data(self, X, y, reset=False)
+
         if T is None:
             T = self.transform(X)
 
-        x = self.inverse_transform(T)
-        y = self.predict(T=T)
+        Xrec = self.inverse_transform(T)
+        ypred = self.predict(T=T)
 
         return -(
-            np.linalg.norm(X - x) ** 2.0 / np.linalg.norm(X) ** 2.0
-            + np.linalg.norm(Y - y) ** 2.0 / np.linalg.norm(Y) ** 2.0
+            np.linalg.norm(X - Xrec) ** 2.0 / np.linalg.norm(X) ** 2.0
+            + np.linalg.norm(y - ypred) ** 2.0 / np.linalg.norm(y) ** 2.0
         )
diff --git a/src/skmatter/linear_model/_ridge.py b/src/skmatter/linear_model/_ridge.py
index 6e4fcf1f3..9dd5e1678 100644
--- a/src/skmatter/linear_model/_ridge.py
+++ b/src/skmatter/linear_model/_ridge.py
@@ -3,11 +3,10 @@
 from sklearn.base import BaseEstimator, MultiOutputMixin, RegressorMixin
 from sklearn.metrics import check_scoring
 from sklearn.model_selection import KFold, check_cv
-from sklearn.utils import check_array
-from sklearn.utils.validation import check_is_fitted
+from sklearn.utils.validation import check_is_fitted, validate_data
 
 
-class Ridge2FoldCV(BaseEstimator, MultiOutputMixin, RegressorMixin):
+class Ridge2FoldCV(RegressorMixin, MultiOutputMixin, BaseEstimator):
     r"""Ridge regression with an efficient 2-fold cross-validation method using the SVD
     solver.
 
@@ -20,7 +19,7 @@ class Ridge2FoldCV(BaseEstimator, MultiOutputMixin, RegressorMixin):
     while the alpha value is determined with a 2-fold cross-validation from a list of
     alpha values. It is more efficient version than doing 2-fold cross-validation
     naively The algorithmic trick is to reuse the matrices obtained by SVD for each
-    regularization paramater :param alpha: The 2-fold CV can be broken donw to
+    regularization paramater :param alpha: The 2-fold CV can be broken down to
 
     .. math::
 
@@ -136,6 +135,11 @@ def __init__(
         self.shuffle = shuffle
         self.n_jobs = n_jobs
 
+    def __sklearn_tags__(self):
+        tags = super().__sklearn_tags__()
+        tags.target_tags.single_output = False
+        return tags
+
     def _more_tags(self):
         return {"multioutput_only": True}
 
@@ -195,7 +199,7 @@ def predict(self, X):
             Training data, where n_samples is the number of samples
             and n_features is the number of features.
         """
-        X = check_array(X)
+        X = validate_data(self, X, reset=False)
 
         check_is_fitted(self, ["coef_"])
 
diff --git a/src/skmatter/sample_selection/_base.py b/src/skmatter/sample_selection/_base.py
index f5531d897..67d5f0472 100644
--- a/src/skmatter/sample_selection/_base.py
+++ b/src/skmatter/sample_selection/_base.py
@@ -4,7 +4,7 @@
 
 import numpy as np
 from scipy.interpolate import LinearNDInterpolator, interp1d
-from scipy.interpolate.interpnd import _ndim_coords_from_arrays
+from scipy.interpolate._interpnd import _ndim_coords_from_arrays
 from scipy.spatial import ConvexHull
 from sklearn.utils.validation import check_array, check_is_fitted, check_X_y