Skip to content

Commit 6625d0c

Browse files
SebastienMeloSebastienMeloArturoAmorQogrisel
authored
MAINT Changed the use of ColumnTransformer to make_column_transformer (INRIA#831)
* changed besides to additionally for better phrasing * Apply suggestions from code review * Changed the use of ColumnTransformer to make_column_transformer * fixed format * fixed format * changed additional mentions of ColumnTransformer * Rerender notebooks --------- Co-authored-by: SebastienMelo <[email protected]> Co-authored-by: Arturo Amor <[email protected]> Co-authored-by: Olivier Grisel <[email protected]>
1 parent 10ecac4 commit 6625d0c

17 files changed

+103
-121
lines changed

jupyter-book/appendix/glossary.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -368,7 +368,7 @@ The dataset used to train the [model](#model).
368368

369369
An [estimator](#estimator) (i.e. an object that has a `fit` method) supporting
370370
`transform` and/or `fit_transform`. Examples for transformers are
371-
`StandardScaler` or `ColumnTransformer`.
371+
`StandardScaler` or `OneHotEncoder`.
372372

373373
### underfitting
374374

notebooks/03_categorical_pipeline_column_transformer.ipynb

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -96,9 +96,10 @@
9696
" categories.\n",
9797
"* **numerical scaling** numerical features which will be standardized.\n",
9898
"\n",
99-
"Now, we create our `ColumnTransfomer` by specifying three values: the\n",
100-
"preprocessor name, the transformer, and the columns. First, let's create the\n",
101-
"preprocessors for the numerical and categorical parts."
99+
"Now, we create our `ColumnTransfomer` using the helper function\n",
100+
"`make_column_transformer`. We specify two values: the transformer, and the\n",
101+
"columns. First, let's create the preprocessors for the numerical and\n",
102+
"categorical parts."
102103
]
103104
},
104105
{
@@ -127,13 +128,11 @@
127128
"metadata": {},
128129
"outputs": [],
129130
"source": [
130-
"from sklearn.compose import ColumnTransformer\n",
131+
"from sklearn.compose import make_column_transformer\n",
131132
"\n",
132-
"preprocessor = ColumnTransformer(\n",
133-
" [\n",
134-
" (\"one-hot-encoder\", categorical_preprocessor, categorical_columns),\n",
135-
" (\"standard_scaler\", numerical_preprocessor, numerical_columns),\n",
136-
" ]\n",
133+
"preprocessor = make_column_transformer(\n",
134+
" (categorical_preprocessor, categorical_columns),\n",
135+
" (numerical_preprocessor, numerical_columns),\n",
137136
")"
138137
]
139138
},
@@ -365,8 +364,8 @@
365364
" handle_unknown=\"use_encoded_value\", unknown_value=-1\n",
366365
")\n",
367366
"\n",
368-
"preprocessor = ColumnTransformer(\n",
369-
" [(\"categorical\", categorical_preprocessor, categorical_columns)],\n",
367+
"preprocessor = make_column_transformer(\n",
368+
" (categorical_preprocessor, categorical_columns),\n",
370369
" remainder=\"passthrough\",\n",
371370
")\n",
372371
"\n",

notebooks/03_categorical_pipeline_ex_02.ipynb

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -82,18 +82,19 @@
8282
"\n",
8383
"from sklearn.model_selection import cross_validate\n",
8484
"from sklearn.pipeline import make_pipeline\n",
85-
"from sklearn.compose import ColumnTransformer\n",
85+
"from sklearn.compose import make_column_transformer\n",
8686
"from sklearn.preprocessing import OrdinalEncoder\n",
8787
"from sklearn.ensemble import HistGradientBoostingClassifier\n",
8888
"\n",
8989
"categorical_preprocessor = OrdinalEncoder(\n",
9090
" handle_unknown=\"use_encoded_value\", unknown_value=-1\n",
9191
")\n",
92-
"preprocessor = ColumnTransformer(\n",
93-
" [(\"categorical\", categorical_preprocessor, categorical_columns)],\n",
92+
"preprocessor = make_column_transformer(\n",
93+
" (categorical_preprocessor, categorical_columns),\n",
9494
" remainder=\"passthrough\",\n",
9595
")\n",
9696
"\n",
97+
"\n",
9798
"model = make_pipeline(preprocessor, HistGradientBoostingClassifier())\n",
9899
"\n",
99100
"start = time.time()\n",

notebooks/03_categorical_pipeline_sol_02.ipynb

Lines changed: 12 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -82,18 +82,19 @@
8282
"\n",
8383
"from sklearn.model_selection import cross_validate\n",
8484
"from sklearn.pipeline import make_pipeline\n",
85-
"from sklearn.compose import ColumnTransformer\n",
85+
"from sklearn.compose import make_column_transformer\n",
8686
"from sklearn.preprocessing import OrdinalEncoder\n",
8787
"from sklearn.ensemble import HistGradientBoostingClassifier\n",
8888
"\n",
8989
"categorical_preprocessor = OrdinalEncoder(\n",
9090
" handle_unknown=\"use_encoded_value\", unknown_value=-1\n",
9191
")\n",
92-
"preprocessor = ColumnTransformer(\n",
93-
" [(\"categorical\", categorical_preprocessor, categorical_columns)],\n",
92+
"preprocessor = make_column_transformer(\n",
93+
" (categorical_preprocessor, categorical_columns),\n",
9494
" remainder=\"passthrough\",\n",
9595
")\n",
9696
"\n",
97+
"\n",
9798
"model = make_pipeline(preprocessor, HistGradientBoostingClassifier())\n",
9899
"\n",
99100
"start = time.time()\n",
@@ -130,17 +131,12 @@
130131
"\n",
131132
"from sklearn.preprocessing import StandardScaler\n",
132133
"\n",
133-
"preprocessor = ColumnTransformer(\n",
134-
" [\n",
135-
" (\"numerical\", StandardScaler(), numerical_columns),\n",
136-
" (\n",
137-
" \"categorical\",\n",
138-
" OrdinalEncoder(\n",
139-
" handle_unknown=\"use_encoded_value\", unknown_value=-1\n",
140-
" ),\n",
141-
" categorical_columns,\n",
142-
" ),\n",
143-
" ]\n",
134+
"preprocessor = make_column_transformer(\n",
135+
" (StandardScaler(), numerical_columns),\n",
136+
" (\n",
137+
" OrdinalEncoder(handle_unknown=\"use_encoded_value\", unknown_value=-1),\n",
138+
" categorical_columns,\n",
139+
" ),\n",
144140
")\n",
145141
"\n",
146142
"model = make_pipeline(preprocessor, HistGradientBoostingClassifier())\n",
@@ -209,8 +205,8 @@
209205
"categorical_preprocessor = OneHotEncoder(\n",
210206
" handle_unknown=\"ignore\", sparse_output=False\n",
211207
")\n",
212-
"preprocessor = ColumnTransformer(\n",
213-
" [(\"one-hot-encoder\", categorical_preprocessor, categorical_columns)],\n",
208+
"preprocessor = make_column_transformer(\n",
209+
" (categorical_preprocessor, categorical_columns),\n",
214210
" remainder=\"passthrough\",\n",
215211
")\n",
216212
"\n",

notebooks/parameter_tuning_ex_02.ipynb

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -41,21 +41,15 @@
4141
"metadata": {},
4242
"outputs": [],
4343
"source": [
44-
"from sklearn.compose import ColumnTransformer\n",
44+
"from sklearn.compose import make_column_transformer\n",
4545
"from sklearn.compose import make_column_selector as selector\n",
4646
"from sklearn.preprocessing import OrdinalEncoder\n",
4747
"\n",
4848
"categorical_preprocessor = OrdinalEncoder(\n",
4949
" handle_unknown=\"use_encoded_value\", unknown_value=-1\n",
5050
")\n",
51-
"preprocessor = ColumnTransformer(\n",
52-
" [\n",
53-
" (\n",
54-
" \"cat_preprocessor\",\n",
55-
" categorical_preprocessor,\n",
56-
" selector(dtype_include=object),\n",
57-
" )\n",
58-
" ],\n",
51+
"preprocessor = make_column_transformer(\n",
52+
" (categorical_preprocessor, selector(dtype_include=object)),\n",
5953
" remainder=\"passthrough\",\n",
6054
")\n",
6155
"\n",
@@ -113,6 +107,13 @@
113107
"source": [
114108
"# Write your code here."
115109
]
110+
},
111+
{
112+
"cell_type": "code",
113+
"execution_count": null,
114+
"metadata": {},
115+
"outputs": [],
116+
"source": []
116117
}
117118
],
118119
"metadata": {

notebooks/parameter_tuning_grid_search.ipynb

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,7 @@
142142
"cell_type": "markdown",
143143
"metadata": {},
144144
"source": [
145-
"We then use a `ColumnTransformer` to select the categorical columns and apply\n",
145+
"We then use `make_column_transformer` to select the categorical columns and apply\n",
146146
"the `OrdinalEncoder` to them."
147147
]
148148
},
@@ -152,10 +152,10 @@
152152
"metadata": {},
153153
"outputs": [],
154154
"source": [
155-
"from sklearn.compose import ColumnTransformer\n",
155+
"from sklearn.compose import make_column_transformer\n",
156156
"\n",
157-
"preprocessor = ColumnTransformer(\n",
158-
" [(\"cat_preprocessor\", categorical_preprocessor, categorical_columns)],\n",
157+
"preprocessor = make_column_transformer(\n",
158+
" (categorical_preprocessor, categorical_columns),\n",
159159
" remainder=\"passthrough\",\n",
160160
" # Silence a deprecation warning in scikit-learn v1.6 related to how the\n",
161161
" # ColumnTransformer stores an attribute that we do not use in this notebook\n",

notebooks/parameter_tuning_nested.ipynb

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@
5555
"metadata": {},
5656
"outputs": [],
5757
"source": [
58-
"from sklearn.compose import ColumnTransformer\n",
58+
"from sklearn.compose import make_column_transformer\n",
5959
"from sklearn.preprocessing import OrdinalEncoder\n",
6060
"from sklearn.compose import make_column_selector as selector\n",
6161
"\n",
@@ -65,10 +65,8 @@
6565
"categorical_preprocessor = OrdinalEncoder(\n",
6666
" handle_unknown=\"use_encoded_value\", unknown_value=-1\n",
6767
")\n",
68-
"preprocessor = ColumnTransformer(\n",
69-
" [\n",
70-
" (\"cat_preprocessor\", categorical_preprocessor, categorical_columns),\n",
71-
" ],\n",
68+
"preprocessor = make_column_transformer(\n",
69+
" (categorical_preprocessor, categorical_columns),\n",
7270
" remainder=\"passthrough\",\n",
7371
" force_int_remainder_cols=False, # Silence a warning in scikit-learn v1.6.\n",
7472
")"

notebooks/parameter_tuning_randomized_search.ipynb

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@
108108
"metadata": {},
109109
"outputs": [],
110110
"source": [
111-
"from sklearn.compose import ColumnTransformer\n",
111+
"from sklearn.compose import make_column_transformer\n",
112112
"from sklearn.preprocessing import OrdinalEncoder\n",
113113
"from sklearn.compose import make_column_selector as selector\n",
114114
"\n",
@@ -118,8 +118,8 @@
118118
"categorical_preprocessor = OrdinalEncoder(\n",
119119
" handle_unknown=\"use_encoded_value\", unknown_value=-1\n",
120120
")\n",
121-
"preprocessor = ColumnTransformer(\n",
122-
" [(\"cat_preprocessor\", categorical_preprocessor, categorical_columns)],\n",
121+
"preprocessor = make_column_transformer(\n",
122+
" (categorical_preprocessor, categorical_columns),\n",
123123
" remainder=\"passthrough\",\n",
124124
" force_int_remainder_cols=False, # Silence a warning in scikit-learn v1.6.\n",
125125
")"

notebooks/parameter_tuning_sol_02.ipynb

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -41,21 +41,15 @@
4141
"metadata": {},
4242
"outputs": [],
4343
"source": [
44-
"from sklearn.compose import ColumnTransformer\n",
44+
"from sklearn.compose import make_column_transformer\n",
4545
"from sklearn.compose import make_column_selector as selector\n",
4646
"from sklearn.preprocessing import OrdinalEncoder\n",
4747
"\n",
4848
"categorical_preprocessor = OrdinalEncoder(\n",
4949
" handle_unknown=\"use_encoded_value\", unknown_value=-1\n",
5050
")\n",
51-
"preprocessor = ColumnTransformer(\n",
52-
" [\n",
53-
" (\n",
54-
" \"cat_preprocessor\",\n",
55-
" categorical_preprocessor,\n",
56-
" selector(dtype_include=object),\n",
57-
" )\n",
58-
" ],\n",
51+
"preprocessor = make_column_transformer(\n",
52+
" (categorical_preprocessor, selector(dtype_include=object)),\n",
5953
" remainder=\"passthrough\",\n",
6054
")\n",
6155
"\n",
@@ -152,6 +146,13 @@
152146
"\n",
153147
"print(f\"Test score after the parameter tuning: {test_score:.3f}\")"
154148
]
149+
},
150+
{
151+
"cell_type": "code",
152+
"execution_count": null,
153+
"metadata": {},
154+
"outputs": [],
155+
"source": []
155156
}
156157
],
157158
"metadata": {

python_scripts/03_categorical_pipeline_column_transformer.py

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -74,9 +74,10 @@
7474
# categories.
7575
# * **numerical scaling** numerical features which will be standardized.
7676
#
77-
# Now, we create our `ColumnTransfomer` by specifying three values: the
78-
# preprocessor name, the transformer, and the columns. First, let's create the
79-
# preprocessors for the numerical and categorical parts.
77+
# Now, we create our `ColumnTransfomer` using the helper function
78+
# `make_column_transformer`. We specify two values: the transformer, and the
79+
# columns. First, let's create the preprocessors for the numerical and
80+
# categorical parts.
8081

8182
# %%
8283
from sklearn.preprocessing import OneHotEncoder, StandardScaler
@@ -89,13 +90,11 @@
8990
# their respective columns.
9091

9192
# %%
92-
from sklearn.compose import ColumnTransformer
93+
from sklearn.compose import make_column_transformer
9394

94-
preprocessor = ColumnTransformer(
95-
[
96-
("one-hot-encoder", categorical_preprocessor, categorical_columns),
97-
("standard_scaler", numerical_preprocessor, numerical_columns),
98-
]
95+
preprocessor = make_column_transformer(
96+
(categorical_preprocessor, categorical_columns),
97+
(numerical_preprocessor, numerical_columns),
9998
)
10099

101100
# %% [markdown]
@@ -234,8 +233,8 @@
234233
handle_unknown="use_encoded_value", unknown_value=-1
235234
)
236235

237-
preprocessor = ColumnTransformer(
238-
[("categorical", categorical_preprocessor, categorical_columns)],
236+
preprocessor = make_column_transformer(
237+
(categorical_preprocessor, categorical_columns),
239238
remainder="passthrough",
240239
)
241240

0 commit comments

Comments
 (0)