Skip to content

Commit 0f9f866

Browse files
force sklearn >= v1 and fix issue#384
1 parent 21d64ab commit 0f9f866

File tree

3 files changed

+8
-6
lines changed

3 files changed

+8
-6
lines changed

requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
numpy>=1.14.0
2-
scikit-learn>=0.20.0
2+
scikit-learn>=1.0.0
33
scipy>=1.0.0
44
statsmodels>=0.9.0
55
pandas>=1.0.5

tests/helpers.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,13 +46,12 @@ def create_dataset(n_rows=1000, extras=False, has_missing=True, random_seed=2001
4646
random.choice(['A', 'B_b', 'C_c_c']), # Strings with underscores to test reverse_dummies()
4747
random.choice(['A', 'B', 'C', np.NaN]) if has_missing else random.choice(['A', 'B', 'C']), # None
4848
random.choice(['A', 'B', 'C', 'D']) if extras else random.choice(['A', 'B', 'C']), # With a new string value
49-
random.choice([12, 43, -32]), # Number in the column name
5049
random.choice(['A', 'B', 'C']), # What is going to become the categorical column
5150
random.choice(['A', 'B', 'C', np.nan]), # Categorical with missing values
5251
random.choice([1, 2, 3]) # Ordinal integers
5352
] for row in range(n_rows)]
5453

55-
df = pd.DataFrame(ds, columns=['float', 'float_edge', 'unique_int', 'unique_str', 'invariant', 'underscore', 'none', 'extra', 321, 'categorical', 'na_categorical', 'categorical_int'])
54+
df = pd.DataFrame(ds, columns=['float', 'float_edge', 'unique_int', 'unique_str', 'invariant', 'underscore', 'none', 'extra', 'categorical', 'na_categorical', 'categorical_int'])
5655
df['categorical'] = pd.Categorical(df['categorical'], categories=['A', 'B', 'C'])
5756
df['na_categorical'] = pd.Categorical(df['na_categorical'], categories=['A', 'B', 'C'])
5857
df['categorical_int'] = pd.Categorical(df['categorical_int'], categories=[1, 2, 3])

tests/test_encoders.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -432,14 +432,17 @@ def test_duplicate_index_value(self):
432432
def test_string_index(self):
433433
# https://github.com/scikit-learn-contrib/categorical-encoding/issues/131
434434

435-
bunch = sklearn.datasets.load_boston()
436-
y = (bunch.target > 20)
435+
bunch = sklearn.datasets.fetch_openml(name="house_prices", as_frame=True)
436+
y = (bunch.target > 200000).values
437437
X = pd.DataFrame(bunch.data, columns=bunch.feature_names)
438438
X.index = X.index.values.astype(str)
439439

440+
display_cols = ["Id", "MSSubClass", "MSZoning", "YearBuilt", "Heating", "CentralAir"]
441+
X = X[display_cols]
442+
440443
for encoder_name in encoders.__all__:
441444
with self.subTest(encoder_name=encoder_name):
442-
enc = getattr(encoders, encoder_name)(cols=['CHAS', 'RAD'])
445+
enc = getattr(encoders, encoder_name)(cols=['CentralAir', 'Heating'])
443446
result = enc.fit_transform(X, y)
444447
self.assertFalse(result.isnull().values.any(), 'There should not be any missing value!')
445448

0 commit comments

Comments
 (0)