Update examples to remove deprecation warnings from scikit-learn (#729)

hinchliff · mfeurer · commit 347c4a6c2a7b · 2019-07-12T18:01:30.000+02:00
* WIP for #726 * changing imputer to remove warnings * cleanup * updating changelog
diff --git a/doc/progress.rst b/doc/progress.rst
@@ -16,6 +16,7 @@ Changelog
 * ADD #687: Adds a function to retrieve the list of evaluation measures available.
 * ADD #695: A function to retrieve all the data quality measures available.
 * FIX #447: All files created by unit tests are deleted after the completion of all unit tests.
+* MAINT #726: Update examples to remove deprecation warnings from scikit-learn
 
 0.9.0
 ~~~~~
diff --git a/examples/flows_and_runs_tutorial.py b/examples/flows_and_runs_tutorial.py
@@ -7,7 +7,7 @@
 
 import openml
 from pprint import pprint
-from sklearn import ensemble, neighbors, preprocessing, pipeline, tree
+from sklearn import compose, ensemble, impute, neighbors, preprocessing, pipeline, tree
 
 ############################################################################
 # Train machine learning models
@@ -39,8 +39,9 @@
     target=dataset.default_target_attribute
 )
 print("Categorical features: {}".format(categorical_indicator))
-enc = preprocessing.OneHotEncoder(categorical_features=categorical_indicator)
-X = enc.fit_transform(X)
+transformer = compose.ColumnTransformer(
+    [('one_hot_encoder', preprocessing.OneHotEncoder(categories='auto'), categorical_indicator)])
+X = transformer.fit_transform(X)
 clf.fit(X, y)
 
 ############################################################################
@@ -83,9 +84,9 @@
 # When you need to handle 'dirty' data, build pipelines to model then automatically.
 task = openml.tasks.get_task(115)
 pipe = pipeline.Pipeline(steps=[
-    ('Imputer', preprocessing.Imputer(strategy='median')),
+    ('Imputer', impute.SimpleImputer(strategy='median')),
     ('OneHotEncoder', preprocessing.OneHotEncoder(sparse=False, handle_unknown='ignore')),
-    ('Classifier', ensemble.RandomForestClassifier())
+    ('Classifier', ensemble.RandomForestClassifier(n_estimators=10))
 ])
 
 run = openml.runs.run_model_on_task(pipe, task, avoid_duplicate_runs=False)
diff --git a/examples/sklearn/openml_run_example.py b/examples/sklearn/openml_run_example.py
@@ -5,7 +5,7 @@
 An example of an automated machine learning experiment.
 """
 import openml
-from sklearn import tree, preprocessing, pipeline
+from sklearn import impute, tree, pipeline
 
 ############################################################################
 # .. warning:: This example uploads data. For that reason, this example
@@ -21,7 +21,7 @@
 # Define a scikit-learn pipeline
 clf = pipeline.Pipeline(
     steps=[
-        ('imputer', preprocessing.Imputer()),
+        ('imputer', impute.SimpleImputer()),
         ('estimator', tree.DecisionTreeClassifier())
     ]
 )