@@ -35,39 +35,33 @@ class BackwardDifferenceEncoder(BaseContrastEncoder):
3535 -------
3636 >>> from category_encoders import *
3737 >>> import pandas as pd
38- >>> from sklearn.datasets import load_boston
39- >>> bunch = load_boston()
38+ >>> from sklearn.datasets import fetch_openml
39+ >>> bunch = fetch_openml(name="house_prices", as_frame=True)
40+ >>> display_cols = ["Id", "MSSubClass", "MSZoning", "LotFrontage", "YearBuilt", "Heating", "CentralAir"]
4041 >>> y = bunch.target
41- >>> X = pd.DataFrame(bunch.data, columns=bunch.feature_names_out_)
42- >>> enc = BackwardDifferenceEncoder(cols=['CHAS ', 'RAD ']).fit(X, y)
42+ >>> X = pd.DataFrame(bunch.data, columns=bunch.feature_names)[display_cols]
43+ >>> enc = BackwardDifferenceEncoder(cols=['CentralAir ', 'Heating ']).fit(X, y)
4344 >>> numeric_dataset = enc.transform(X)
4445 >>> print(numeric_dataset.info())
4546 <class 'pandas.core.frame.DataFrame'>
46- RangeIndex: 506 entries, 0 to 505
47- Data columns (total 21 columns):
48- intercept 506 non-null int64
49- CRIM 506 non-null float64
50- ZN 506 non-null float64
51- INDUS 506 non-null float64
52- CHAS_0 506 non-null float64
53- NOX 506 non-null float64
54- RM 506 non-null float64
55- AGE 506 non-null float64
56- DIS 506 non-null float64
57- RAD_0 506 non-null float64
58- RAD_1 506 non-null float64
59- RAD_2 506 non-null float64
60- RAD_3 506 non-null float64
61- RAD_4 506 non-null float64
62- RAD_5 506 non-null float64
63- RAD_6 506 non-null float64
64- RAD_7 506 non-null float64
65- TAX 506 non-null float64
66- PTRATIO 506 non-null float64
67- B 506 non-null float64
68- LSTAT 506 non-null float64
69- dtypes: float64(20), int64(1)
70- memory usage: 83.1 KB
47+ RangeIndex: 1460 entries, 0 to 1459
48+ Data columns (total 12 columns):
49+ # Column Non-Null Count Dtype
50+ --- ------ -------------- -----
51+ 0 intercept 1460 non-null int64
52+ 1 Id 1460 non-null float64
53+ 2 MSSubClass 1460 non-null float64
54+ 3 MSZoning 1460 non-null object
55+ 4 LotFrontage 1201 non-null float64
56+ 5 YearBuilt 1460 non-null float64
57+ 6 Heating_0 1460 non-null float64
58+ 7 Heating_1 1460 non-null float64
59+ 8 Heating_2 1460 non-null float64
60+ 9 Heating_3 1460 non-null float64
61+ 10 Heating_4 1460 non-null float64
62+ 11 CentralAir_0 1460 non-null float64
63+ dtypes: float64(10), int64(1), object(1)
64+ memory usage: 137.0+ KB
7165 None
7266
7367 References
0 commit comments