MNT Add info about the estimators html diagram (INRIA#844)

ArturoAmorQ · ArturoAmorQ · commit e1a5f07ac7e3 · 2025-10-23T16:47:06.000+02:00
diff --git a/python_scripts/03_categorical_pipeline_visualization.py b/python_scripts/03_categorical_pipeline_visualization.py
@@ -97,15 +97,41 @@
         ("classifier", LogisticRegression()),
     ]
 )
+model
 
 # %% [markdown]
-# Let's visualize it!
+# Let's fit it!
 
 # %%
-model
+model.fit(data, target)
+
+# %% [markdown]
+# Notice that the diagram changes color once the estimator is fit.
+#
+# So far we used `Pipeline` and `ColumnTransformer`, which allows us to custom
+# the names of the steps in the pipeline. An alternative is to use
+# `make_column_transformer` and `make_pipeline`, they do not require, and do not
+# permit, naming the estimators. Instead, their names are set to the lowercase
+# of their types automatically.
+
+# %%
+from sklearn.compose import make_column_transformer
+from sklearn.pipeline import make_pipeline
+
+numeric_transformer = make_pipeline(
+    SimpleImputer(strategy="median"), StandardScaler()
+)
+categorical_transformer = OneHotEncoder(handle_unknown="ignore")
+
+preprocessor = make_column_transformer(
+    (numeric_transformer, numeric_features),
+    (categorical_transformer, categorical_features),
+)
+model = make_pipeline(preprocessor, LogisticRegression())
+model.fit(data, target)
 
 # %% [markdown]
-# ## Finally we score the model
+# ## Finally we can score the model using cross-validation:
 
 # %%
 from sklearn.model_selection import cross_validate