rapidsai
diff --git a/‎python/cuml/cuml/_thirdparty/sklearn/preprocessing/_column_transformer.py‎
Lines changed: 2 additions & 21 deletions b/‎python/cuml/cuml/_thirdparty/sklearn/preprocessing/_column_transformer.py‎
Lines changed: 2 additions & 21 deletions
diff --git a/‎python/cuml/cuml/_thirdparty/sklearn/utils/skl_dependencies.py‎
Lines changed: 7 additions & 32 deletions b/‎python/cuml/cuml/_thirdparty/sklearn/utils/skl_dependencies.py‎
Lines changed: 7 additions & 32 deletions
diff --git a/‎python/cuml/cuml/accel/_overrides/sklearn/preprocessing.py‎
Lines changed: 2 additions & 34 deletions b/‎python/cuml/cuml/accel/_overrides/sklearn/preprocessing.py‎
Lines changed: 2 additions & 34 deletions
diff --git a/‎python/cuml/cuml/accel/estimator_proxy.py‎
Lines changed: 3 additions & 6 deletions b/‎python/cuml/cuml/accel/estimator_proxy.py‎
Lines changed: 3 additions & 6 deletions
diff --git a/‎python/cuml/cuml/internals/interop.py‎
Lines changed: 6 additions & 10 deletions b/‎python/cuml/cuml/internals/interop.py‎
Lines changed: 6 additions & 10 deletions
diff --git a/‎python/cuml/cuml/internals/validation.py‎
Lines changed: 121 additions & 9 deletions b/‎python/cuml/cuml/internals/validation.py‎
Lines changed: 121 additions & 9 deletions
diff --git a/‎python/cuml/cuml/neighbors/kernel_density.py‎
Lines changed: 2 additions & 0 deletions b/‎python/cuml/cuml/neighbors/kernel_density.py‎
Lines changed: 2 additions & 0 deletions
@@ -36,7 +36,7 @@
 import cuml
 from cuml.internals.array_sparse import SparseCumlArray
 from cuml.internals.global_settings import _global_settings_data
-from cuml.internals.validation import check_is_fitted
+from cuml.internals.validation import check_is_fitted, check_features
 
 from ....thirdparty_adapters import check_array
 from ..preprocessing._function_transformer import FunctionTransformer
@@ -880,13 +880,6 @@ def fit_transform(self, X, y=None) -> SparseCumlArray:
             sparse matrices.
 
         """
-        # TODO: this should be `feature_names_in_` when we start having it
-        if hasattr(X, "columns"):
-            self._feature_names_in = cpu_np.asarray(X.columns)
-        else:
-            self._feature_names_in = None
-        # set n_features_in_ attribute
-        self._check_n_features(X, reset=True)
         self._validate_transformers()
         self._validate_column_callables(X)
         self._validate_remainder(X)
@@ -935,19 +928,7 @@ def transform(self, X) -> SparseCumlArray:
 
         """
         check_is_fitted(self)
-        if hasattr(X, "columns"):
-            X_feature_names = cpu_np.asarray(X.columns)
-        else:
-            X_feature_names = None
-
-        self._check_n_features(X, reset=False)
-        if (self._feature_names_in is not None and
-            X_feature_names is not None and
-                cpu_np.any(self._feature_names_in != X_feature_names)):
-            raise RuntimeError(
-                "Given feature/column names do not match the ones for the "
-                "data given during fit."
-            )
+        check_features(self, X)
         Xs = self._fit_transform(X, None, _transform_one, fitted=True)
         self._validate_output(Xs)
 
 
@@ -14,6 +14,7 @@
 
 
 from cuml.internals.array_sparse import SparseCumlArray
+from cuml.internals.validation import check_features
 
 from ....internals.base import Base
 from ....thirdparty_adapters import check_array
@@ -41,35 +42,6 @@ def init(self, *args, **kwargs):
 
         cls.__init__ = init
 
-    def _check_n_features(self, X, reset):
-        """Set the `n_features_in_` attribute, or check against it.
-
-        Parameters
-        ----------
-        X : {ndarray, sparse matrix} of shape (n_samples, n_features)
-            The input samples.
-        reset : bool
-            If True, the `n_features_in_` attribute is set to `X.shape[1]`.
-            Else, the attribute must already exist and the function checks
-            that it is equal to `X.shape[1]`.
-        """
-        n_features = X.shape[1]
-
-        if reset:
-            self.n_features_in_ = n_features
-        else:
-            if not hasattr(self, 'n_features_in_'):
-                raise RuntimeError(
-                    "The reset parameter is False but there is no "
-                    "n_features_in_ attribute. Is this estimator fitted?"
-                )
-            if n_features != self.n_features_in_:
-                raise ValueError(
-                    'X has {} features, but {} is expecting {} features '
-                    'as input.'.format(n_features, self.__class__.__name__,
-                                       self.n_features_in_)
-                )
-
     def _validate_data(self, X, y=None, reset=True,
                        validate_separately=False, **check_params):
         """Validate input data and set or check the `n_features_in_` attribute.
@@ -100,6 +72,12 @@ def _validate_data(self, X, y=None, reset=True,
         out : {ndarray, sparse matrix} or tuple of these
             The validated input. A tuple is returned if `y` is not None.
         """
+        if check_params.get('ensure_2d', True) and not reset:
+            # The `reset=True` case is always handled by the mandatory
+            # `reflect(reset=True)` decorators currently. To avoid
+            # duplicate calls, we avoid `check_features(self, X, reset=True)`
+            # for now.
+            check_features(self, X)
 
         if y is None:
             if self._get_tags()['requires_y']:
@@ -122,9 +100,6 @@ def _validate_data(self, X, y=None, reset=True,
                 X, y = check_X_y(X, y, **check_params)
             out = X, y
 
-        if check_params.get('ensure_2d', True):
-            self._check_n_features(X, reset=reset)
-
         return out
 
 
 
@@ -121,46 +121,14 @@ class TargetEncoder(ProxyBase):
     _gpu_class = cuml.preprocessing.TargetEncoder
 
     def _gpu_fit(self, X, y, **kwargs):
-        """Fit with independent mode for sklearn compatibility.
-
-        sklearn's TargetEncoder always encodes features independently,
-        so we force independent mode when using cuml.accel.
-        """
         # Check for unsupported inputs (triggers CPU fallback)
         _check_unsupported_inputs(X, y, self._cpu)
-
-        # Ensure independent mode is set for sklearn compatibility
-        self._gpu.multi_feature_mode = "independent"
-        result = self._gpu.fit(X, y, **kwargs)
-
-        # Sync sklearn-expected attributes to the proxy
-        if hasattr(self._gpu, "feature_names_in_"):
-            self.feature_names_in_ = self._gpu.feature_names_in_
-        if hasattr(self._gpu, "n_features_in_"):
-            self.n_features_in_ = self._gpu.n_features_in_
-
-        return result
+        return self._gpu.fit(X, y, **kwargs)
 
     def _gpu_fit_transform(self, X, y, **kwargs):
-        """Fit-transform with independent mode for sklearn compatibility.
-
-        sklearn's TargetEncoder always encodes features independently,
-        so we force independent mode when using cuml.accel.
-        """
         # Check for unsupported inputs (triggers CPU fallback)
         _check_unsupported_inputs(X, y, self._cpu)
-
-        # Ensure independent mode is set for sklearn compatibility
-        self._gpu.multi_feature_mode = "independent"
-        result = self._gpu.fit_transform(X, y, **kwargs)
-
-        # Sync sklearn-expected attributes to the proxy
-        if hasattr(self._gpu, "feature_names_in_"):
-            self.feature_names_in_ = self._gpu.feature_names_in_
-        if hasattr(self._gpu, "n_features_in_"):
-            self.n_features_in_ = self._gpu.n_features_in_
-
-        return result
+        return self._gpu.fit_transform(X, y, **kwargs)
 
     def _gpu_get_feature_names_out(self, input_features=None):
         """Return feature names for output features.
 
@@ -422,12 +422,9 @@ def __getattr__(self, name: str) -> Any:
             try:
                 return getattr(self._cpu, name)
             except AttributeError:
-                # We special case `feature_names_in_` here since it's the only common
-                # fitted attribute that cuml doesn't support anywhere.
-                if (
-                    name in self._not_implemented_attributes
-                    or name == "feature_names_in_"
-                ) and is_fitted(self._cpu):
+                if name in self._not_implemented_attributes and is_fitted(
+                    self._cpu
+                ):
                     raise AttributeError(
                         f"The `{type(self).__name__}.{name}` attribute is not yet "
                         "implemented in `cuml.accel`.\n\n"
 
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION.
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026, NVIDIA CORPORATION.
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
@@ -189,15 +189,11 @@ def _attrs_to_cpu(self, model) -> dict[str, Any]:
             If one or more attributes are unsupported by the CPU model.
         """
         out = {}
-        if (
-            n_features_in_ := getattr(self, "n_features_in_", None)
-        ) is not None:
-            out["n_features_in_"] = n_features_in_
-
-        # TODO: Some cuml estimators set `feature_names_in_`, but they don't
-        # do this properly per sklearn conventions. For now we skip forwarding
-        # feature_names_in_ to CPU. Revisit once
-        # https://github.com/rapidsai/cuml/issues/6650 is resolved.
+        for name in ["n_features_in_", "feature_names_in_"]:
+            try:
+                out[name] = getattr(self, name)
+            except AttributeError:
+                pass
         return out
 
     def _sync_attrs_to_cpu(self, model) -> None:
 
@@ -3,9 +3,12 @@
 # SPDX-License-Identifier: Apache-2.0
 #
 import numbers
+import warnings
 
+import cudf
 import cupy as cp
 import numpy as np
+import pandas as pd
 from sklearn.utils.validation import check_is_fitted
 
 __all__ = (
@@ -84,8 +87,63 @@ def _get_n_features(X):
     return shape[1] if len(shape) >= 2 else 1
 
 
+def _warn_or_error(exc_cls, msg):
+    """Errors if running in cuml.accel, otherwise warns that an error will be
+    raised in the future."""
+    import cuml.accel
+
+    if cuml.accel.enabled():
+        raise exc_cls(msg)
+    else:
+        warnings.warn(
+            "cuml is adding support for `feature_names_in_` for validating "
+            "the feature names of dataframe-like inputs. In cuml 26.06 this "
+            f"will error with the following message:\n\n{msg}",
+            FutureWarning,
+        )
+
+
+def _get_feature_names(X):
+    """Get feature names from X.
+
+    Returns
+    -------
+    names: ndarray or None
+        Feature names of `X`. Unrecognized array containers will return `None`.
+    """
+    if isinstance(X, (pd.DataFrame, cudf.DataFrame)):
+        feature_names = np.asarray(X.columns, dtype=object)
+    elif hasattr(X, "__dataframe__"):
+        feature_names = np.asarray(
+            list(X.__dataframe__().column_names()), dtype=object
+        )
+    else:
+        return None
+
+    if len(feature_names) == 0:
+        # No features, just return None
+        return None
+
+    # Check the types of the column names.
+    types = sorted(t.__qualname__ for t in set(type(v) for v in feature_names))
+    if len(types) == 1 and types[0] == "str":
+        return feature_names
+    elif len(types) > 1 and "str" in types:
+        msg = (
+            "Feature names are only supported if all input features have string names, "
+            f"but your input has {types} as feature name / column name types. "
+            "If you want feature names to be stored and validated, you must convert "
+            "them all to strings, by using X.columns = X.columns.astype(str) for "
+            "example. Otherwise you can remove feature / column names from your input "
+            "data, or convert them all to a non-string data type."
+        )
+        _warn_or_error(TypeError, msg)
+
+    return None
+
+
 def check_features(estimator, X, reset=False) -> None:
-    """Check or set ``n_features_in_``.
+    """Check or set ``n_features_in_`` and ``feature_names_in_``.
 
     Parameters
     ----------
@@ -95,17 +153,71 @@ def check_features(estimator, X, reset=False) -> None:
         The original user-provided `X` input. No conversion or processing steps
         should have occurred to this array yet.
     reset : bool, default=False
-        If true, ``n_features_in_`` is set on ``estimator`` to match ``X``.
-        Otherwise the ``X`` is checked to match the existing
-        ``n_features_in_``. ``reset=True`` should be used for fit-like methods.
+        If True, ``n_features_in_`` and ``feature_names_in_`` are set on
+        ``estimator`` to match ``X``. Otherwise ``X`` is checked to match the
+        existing ``n_features_in_`` and ``feature_names_in_``. ``reset=True``
+        should be used for fit-like methods, and False otherwise.
     """
     n_features = _get_n_features(X)
+    feature_names = _get_feature_names(X)
 
     if reset:
         estimator.n_features_in_ = n_features
-    else:
-        if n_features != estimator.n_features_in_:
-            raise ValueError(
-                f"X has {n_features} features, but {estimator.__class__.__name__} "
-                f"is expecting {estimator.n_features_in_} features as input."
+        if feature_names is not None:
+            estimator.feature_names_in_ = feature_names
+        elif hasattr(estimator, "feature_names_in_"):
+            # Clear old feature names if present
+            delattr(estimator, "feature_names_in_")
+        return
+
+    est_feature_names = getattr(estimator, "feature_names_in_", None)
+
+    # Check feature_names_in_ first
+    if est_feature_names is not None or feature_names is not None:
+        if est_feature_names is None:
+            warnings.warn(
+                f"X has feature names, but {estimator.__class__.__name__} was fitted "
+                "without feature names"
             )
+
+        elif feature_names is None:
+            warnings.warn(
+                "X does not have valid feature names, but"
+                f" {estimator.__class__.__name__} was fitted with feature names"
+            )
+
+        elif len(est_feature_names) != len(feature_names) or np.any(
+            est_feature_names != feature_names
+        ):
+            unexpected = sorted(
+                set(feature_names).difference(est_feature_names)
+            )
+            missing = sorted(set(est_feature_names).difference(feature_names))
+
+            parts = [
+                "The feature names should match those that were passed during fit."
+            ]
+            for heading, names in [
+                ("Feature names unseen at fit time:", unexpected),
+                ("Feature names seen at fit time, yet now missing:", missing),
+            ]:
+                if names:
+                    parts.append(heading)
+                    parts.extend([f"- {name}" for name in names[:5]])
+                    if len(names) > 5:
+                        parts.append("- ...")
+
+            if not missing and not unexpected:
+                parts.append(
+                    "Feature names must be in the same order as they were in fit."
+                )
+
+            msg = "\n".join(parts)
+            _warn_or_error(ValueError, msg)
+
+    # Then check n_features_in_
+    if n_features != estimator.n_features_in_:
+        raise ValueError(
+            f"X has {n_features} features, but {estimator.__class__.__name__} "
+            f"is expecting {estimator.n_features_in_} features as input."
+        )
@@ -248,6 +248,8 @@ def _sync_attrs_to_cpu(self, model):
             else cp.asnumpy(self._sample_weight)
         )
         model.fit(X, sample_weight=sample_weight)
+        if hasattr(self, "feature_names_in_"):
+            model.feature_names_in_ = self.feature_names_in_
 
     def __init__(
         self,
Original file line number	Diff line number	Diff line change
`@@ -248,6 +248,8 @@ def _sync_attrs_to_cpu(self, model):`
`248`	`248`	`else cp.asnumpy(self._sample_weight)`
`249`	`249`	`)`
`250`	`250`	`model.fit(X, sample_weight=sample_weight)`
	`251`	`+ if hasattr(self, "feature_names_in_"):`
	`252`	`+ model.feature_names_in_ = self.feature_names_in_`
`251`	`253`
`252`	`254`	`def __init__(`
`253`	`255`	`self,`