Skip to content

Commit 52335ff

Browse files
introduce get_features_name_out in accordance to sklearn guidelines
1 parent 0fd5d28 commit 52335ff

File tree

3 files changed

+31
-16
lines changed

3 files changed

+31
-16
lines changed

category_encoders/quantile_encoder.py

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,17 @@
11
"""Quantile Encoder"""
22
__author__ = "david26694", "cmougan"
33

4+
from functools import reduce
5+
import operator
6+
from typing import List
7+
import warnings
8+
49
import numpy as np
5-
from category_encoders.ordinal import OrdinalEncoder
10+
import pandas as pd
611
from sklearn.base import BaseEstimator
12+
713
import category_encoders.utils as util
8-
import pandas as pd
9-
from functools import reduce
10-
import operator
14+
from category_encoders.ordinal import OrdinalEncoder
1115

1216

1317
class QuantileEncoder(util.BaseEncoder, util.SupervisedTransformerMixin):
@@ -334,15 +338,20 @@ def transform(self, X, y=None, override_return_df=False):
334338
else:
335339
new_feat = X_encoded[[c for c in X_encoded.columns if c not in orig_cols]]
336340
transformed_df = pd.concat([transformed_df, new_feat], axis=1)
337-
feature_order = [c for c in self.get_feature_names() if c in transformed_df]
341+
feature_order = [c for c in self.get_feature_names_out() if c in transformed_df]
338342
transformed_df = transformed_df[feature_order]
339343

340344
if self.return_df or override_return_df:
341345
return transformed_df
342346
else:
343347
return transformed_df.values
344348

345-
def get_feature_names(self):
349+
def get_feature_names(self) -> List[str]:
350+
warnings.warn("`get_feature_names` is deprecated in all of sklearn. Use `get_feature_names_out` instead.",
351+
category=FutureWarning)
352+
return self.get_feature_names_out()
353+
354+
def get_feature_names_out(self):
346355
"""
347356
Returns the names of all transformed / added columns.
348357
Returns

category_encoders/utils.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
"""A collection of shared utilities for all encoders, not intended for external use."""
22
from abc import abstractmethod
33
from enum import Enum, auto
4+
import warnings
45

56
import pandas as pd
67
import numpy as np
@@ -356,6 +357,11 @@ def _get_fit_columns(self, X: pd.DataFrame) -> None:
356357
self.cols = convert_cols_to_list(self.cols)
357358

358359
def get_feature_names(self) -> List[str]:
360+
warnings.warn("`get_feature_names` is deprecated in all of sklearn. Use `get_feature_names_out` instead.",
361+
category=FutureWarning)
362+
return self.get_feature_names_out()
363+
364+
def get_feature_names_out(self) -> List[str]:
359365
"""
360366
Returns the names of all transformed / added columns.
361367

tests/test_encoders.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -443,47 +443,47 @@ def test_string_index(self):
443443
result = enc.fit_transform(X, y)
444444
self.assertFalse(result.isnull().values.any(), 'There should not be any missing value!')
445445

446-
def test_get_feature_names(self):
446+
def test_get_feature_names_out(self):
447447
for encoder_name in encoders.__all__:
448448
with self.subTest(encoder_name=encoder_name):
449449
enc = getattr(encoders, encoder_name)()
450450
# Target encoders also need y
451451
if enc._get_tags().get('supervised_encoder'):
452-
obtained = enc.fit(X, y).get_feature_names()
452+
obtained = enc.fit(X, y).get_feature_names_out()
453453
expected = enc.transform(X, y).columns.tolist()
454454
else:
455-
obtained = enc.fit(X).get_feature_names()
455+
obtained = enc.fit(X).get_feature_names_out()
456456
expected = enc.transform(X).columns.tolist()
457457
self.assertEqual(obtained, expected)
458458

459-
def test_get_feature_names_drop_invariant(self):
459+
def test_get_feature_names_out_drop_invariant(self):
460460
# TODO: What could a DF look like that results in constant
461461
# columns for all encoders?
462462
for encoder_name in encoders.__all__:
463463
with self.subTest(encoder_name=encoder_name):
464464
enc = getattr(encoders, encoder_name)(drop_invariant=True)
465465
# Target encoders also need y
466466
if enc._get_tags().get('supervised_encoder'):
467-
obtained = enc.fit(X, y).get_feature_names()
467+
obtained = enc.fit(X, y).get_feature_names_out()
468468
expected = enc.transform(X, y).columns.tolist()
469469
else:
470-
obtained = enc.fit(X).get_feature_names()
470+
obtained = enc.fit(X).get_feature_names_out()
471471
expected = enc.transform(X).columns.tolist()
472472
self.assertEqual(obtained, expected)
473473

474-
def test_get_feature_names_not_set(self):
474+
def test_get_feature_names_out_not_set(self):
475475
for encoder_name in encoders.__all__:
476476
with self.subTest(encoder_name=encoder_name):
477477
enc = getattr(encoders, encoder_name)()
478-
self.assertRaises(ValueError, enc.get_feature_names)
478+
self.assertRaises(ValueError, enc.get_feature_names_out)
479479

480-
def test_get_feature_names_after_transform(self):
480+
def test_get_feature_names_out_after_transform(self):
481481
for encoder_name in encoders.__all__:
482482
with self.subTest(encoder_name=encoder_name):
483483
enc = getattr(encoders, encoder_name)()
484484
enc.fit(X, y)
485485
out = enc.transform(X_t)
486-
self.assertEqual(set(enc.get_feature_names()), set(out.columns))
486+
self.assertEqual(set(enc.get_feature_names_out()), set(out.columns))
487487

488488
def test_truncated_index(self):
489489
# see: https://github.com/scikit-learn-contrib/categorical-encoding/issues/152

0 commit comments

Comments
 (0)