|
1 | 1 | """Binary encoding""" |
2 | | - |
3 | | -from sklearn.base import BaseEstimator, TransformerMixin |
4 | | - |
5 | | -import category_encoders as ce |
| 2 | +from functools import partialmethod |
6 | 3 | from category_encoders import utils |
| 4 | +from category_encoders.basen import BaseNEncoder |
7 | 5 |
|
8 | 6 | __author__ = 'willmcginnis' |
9 | 7 |
|
10 | 8 |
|
11 | | -# todo this does not fit into our schema |
12 | | -class BinaryEncoder(BaseEstimator, TransformerMixin): |
| 9 | +class BinaryEncoder(BaseNEncoder): |
13 | 10 | """Binary encoding for categorical variables, similar to onehot, but stores categories as binary bitstrings. |
14 | 11 |
|
15 | 12 | Parameters |
@@ -70,89 +67,4 @@ class BinaryEncoder(BaseEstimator, TransformerMixin): |
70 | 67 |
|
71 | 68 | """ |
72 | 69 | encoding_relation = utils.EncodingRelation.ONE_TO_M |
73 | | - |
74 | | - def __init__(self, verbose=0, cols=None, mapping=None, drop_invariant=False, return_df=True, |
75 | | - handle_unknown='value', handle_missing='value'): |
76 | | - self.verbose = verbose |
77 | | - self.use_default_cols = cols is None # if True, even a repeated call of fit() will select string columns from X |
78 | | - self.cols = cols |
79 | | - self.mapping = mapping |
80 | | - self.drop_invariant = drop_invariant |
81 | | - self.return_df = return_df |
82 | | - self.handle_unknown = handle_unknown |
83 | | - self.handle_missing = handle_missing |
84 | | - self.base_n_encoder = ce.BaseNEncoder(base=2, verbose=self.verbose, cols=self.cols, mapping=self.mapping, |
85 | | - drop_invariant=self.drop_invariant, return_df=self.return_df, |
86 | | - handle_unknown=self.handle_unknown, handle_missing=self.handle_missing) |
87 | | - |
88 | | - def fit(self, X, y=None, **kwargs): |
89 | | - """Fit encoder according to X and y. |
90 | | -
|
91 | | - Parameters |
92 | | - ---------- |
93 | | -
|
94 | | - X : array-like, shape = [n_samples, n_features] |
95 | | - Training vectors, where n_samples is the number of samples |
96 | | - and n_features is the number of features. |
97 | | - y : array-like, shape = [n_samples] |
98 | | - Target values. |
99 | | -
|
100 | | - Returns |
101 | | - ------- |
102 | | -
|
103 | | - self : encoder |
104 | | - Returns self. |
105 | | -
|
106 | | - """ |
107 | | - |
108 | | - self.base_n_encoder.fit(X, y, **kwargs) |
109 | | - |
110 | | - return self |
111 | | - |
112 | | - def transform(self, X, override_return_df=False): |
113 | | - """Perform the transformation to new categorical data. |
114 | | -
|
115 | | - Parameters |
116 | | - ---------- |
117 | | -
|
118 | | - X : array-like, shape = [n_samples, n_features] |
119 | | -
|
120 | | - Returns |
121 | | - ------- |
122 | | -
|
123 | | - p : array, shape = [n_samples, n_numeric + N] |
124 | | - Transformed values with encoding applied. |
125 | | -
|
126 | | - """ |
127 | | - |
128 | | - return self.base_n_encoder.transform(X) |
129 | | - |
130 | | - def inverse_transform(self, X_in): |
131 | | - """ |
132 | | - Perform the inverse transformation to encoded data. |
133 | | -
|
134 | | - Parameters |
135 | | - ---------- |
136 | | - X_in : array-like, shape = [n_samples, n_features] |
137 | | -
|
138 | | - Returns |
139 | | - ------- |
140 | | - p: array, the same size of X_in |
141 | | -
|
142 | | - """ |
143 | | - |
144 | | - return self.base_n_encoder.inverse_transform(X_in) |
145 | | - |
146 | | - def get_feature_names(self): |
147 | | - """ |
148 | | - Returns the names of all transformed / added columns. |
149 | | -
|
150 | | - Returns |
151 | | - ------- |
152 | | - feature_names: list |
153 | | - A list with all feature names transformed or added. |
154 | | - Note: potentially dropped features are not included! |
155 | | -
|
156 | | - """ |
157 | | - |
158 | | - return self.base_n_encoder.get_feature_names() |
| 70 | + __init__ = partialmethod(BaseNEncoder.__init__, base=2) |
0 commit comments