Skip to content

Commit bbe2bb7

Browse files
elegant implementation of binary encoder
1 parent cf339ae commit bbe2bb7

File tree

1 file changed

+4
-92
lines changed

1 file changed

+4
-92
lines changed

category_encoders/binary.py

Lines changed: 4 additions & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,12 @@
11
"""Binary encoding"""
2-
3-
from sklearn.base import BaseEstimator, TransformerMixin
4-
5-
import category_encoders as ce
2+
from functools import partialmethod
63
from category_encoders import utils
4+
from category_encoders.basen import BaseNEncoder
75

86
__author__ = 'willmcginnis'
97

108

11-
# todo this does not fit into our schema
12-
class BinaryEncoder(BaseEstimator, TransformerMixin):
9+
class BinaryEncoder(BaseNEncoder):
1310
"""Binary encoding for categorical variables, similar to onehot, but stores categories as binary bitstrings.
1411
1512
Parameters
@@ -70,89 +67,4 @@ class BinaryEncoder(BaseEstimator, TransformerMixin):
7067
7168
"""
7269
encoding_relation = utils.EncodingRelation.ONE_TO_M
73-
74-
def __init__(self, verbose=0, cols=None, mapping=None, drop_invariant=False, return_df=True,
75-
handle_unknown='value', handle_missing='value'):
76-
self.verbose = verbose
77-
self.use_default_cols = cols is None # if True, even a repeated call of fit() will select string columns from X
78-
self.cols = cols
79-
self.mapping = mapping
80-
self.drop_invariant = drop_invariant
81-
self.return_df = return_df
82-
self.handle_unknown = handle_unknown
83-
self.handle_missing = handle_missing
84-
self.base_n_encoder = ce.BaseNEncoder(base=2, verbose=self.verbose, cols=self.cols, mapping=self.mapping,
85-
drop_invariant=self.drop_invariant, return_df=self.return_df,
86-
handle_unknown=self.handle_unknown, handle_missing=self.handle_missing)
87-
88-
def fit(self, X, y=None, **kwargs):
89-
"""Fit encoder according to X and y.
90-
91-
Parameters
92-
----------
93-
94-
X : array-like, shape = [n_samples, n_features]
95-
Training vectors, where n_samples is the number of samples
96-
and n_features is the number of features.
97-
y : array-like, shape = [n_samples]
98-
Target values.
99-
100-
Returns
101-
-------
102-
103-
self : encoder
104-
Returns self.
105-
106-
"""
107-
108-
self.base_n_encoder.fit(X, y, **kwargs)
109-
110-
return self
111-
112-
def transform(self, X, override_return_df=False):
113-
"""Perform the transformation to new categorical data.
114-
115-
Parameters
116-
----------
117-
118-
X : array-like, shape = [n_samples, n_features]
119-
120-
Returns
121-
-------
122-
123-
p : array, shape = [n_samples, n_numeric + N]
124-
Transformed values with encoding applied.
125-
126-
"""
127-
128-
return self.base_n_encoder.transform(X)
129-
130-
def inverse_transform(self, X_in):
131-
"""
132-
Perform the inverse transformation to encoded data.
133-
134-
Parameters
135-
----------
136-
X_in : array-like, shape = [n_samples, n_features]
137-
138-
Returns
139-
-------
140-
p: array, the same size of X_in
141-
142-
"""
143-
144-
return self.base_n_encoder.inverse_transform(X_in)
145-
146-
def get_feature_names(self):
147-
"""
148-
Returns the names of all transformed / added columns.
149-
150-
Returns
151-
-------
152-
feature_names: list
153-
A list with all feature names transformed or added.
154-
Note: potentially dropped features are not included!
155-
156-
"""
157-
158-
return self.base_n_encoder.get_feature_names()
70+
__init__ = partialmethod(BaseNEncoder.__init__, base=2)

0 commit comments

Comments
 (0)