Skip to content

Commit a745057

Browse files
Merge pull request #393 from pimlock/fix-basen-inverse-transform
Fix basen_to_integer when column name contains regex metachar
2 parents 1def428 + 7a4482e commit a745057

File tree

2 files changed

+11
-1
lines changed

2 files changed

+11
-1
lines changed

category_encoders/basen.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -266,7 +266,7 @@ def basen_to_integer(self, X, cols, base):
266266
out_cols = X.columns.values.tolist()
267267

268268
for col in cols:
269-
col_list = [col0 for col0 in out_cols if re.match(str(col)+'_\\d+', str(col0))]
269+
col_list = [col0 for col0 in out_cols if re.match(re.escape(str(col))+'_\\d+', str(col0))]
270270
insert_at = out_cols.index(col_list[0])
271271

272272
if base == 1:

tests/test_basen.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,16 @@ def test_inverse_transform_HaveHandleMissingValueAndHandleUnknownReturnNan_Expec
140140

141141
pd.testing.assert_frame_equal(expected, original)
142142

143+
def test_inverse_transform_HaveRegexMetacharactersInColumnName_ExpectInversed(self):
144+
train = pd.DataFrame({'state (2-letter code)': ['il', 'ny', 'ca']})
145+
146+
enc = encoders.BaseNEncoder()
147+
enc.fit(train)
148+
result = enc.transform(train)
149+
original = enc.inverse_transform(result)
150+
151+
pd.testing.assert_frame_equal(train, original)
152+
143153
def test_num_cols(self):
144154
"""
145155
Test that BaseNEncoder produces the correct number of output columns.

0 commit comments

Comments
 (0)