|
7 | 7 |
|
8 | 8 | import openml |
9 | 9 | from pprint import pprint |
10 | | -from sklearn import ensemble, neighbors, preprocessing, pipeline, tree |
| 10 | +from sklearn import compose, ensemble, impute, neighbors, preprocessing, pipeline, tree |
11 | 11 |
|
12 | 12 | ############################################################################ |
13 | 13 | # Train machine learning models |
|
39 | 39 | target=dataset.default_target_attribute |
40 | 40 | ) |
41 | 41 | print("Categorical features: {}".format(categorical_indicator)) |
42 | | -enc = preprocessing.OneHotEncoder(categorical_features=categorical_indicator) |
43 | | -X = enc.fit_transform(X) |
| 42 | +transformer = compose.ColumnTransformer( |
| 43 | + [('one_hot_encoder', preprocessing.OneHotEncoder(categories='auto'), categorical_indicator)]) |
| 44 | +X = transformer.fit_transform(X) |
44 | 45 | clf.fit(X, y) |
45 | 46 |
|
46 | 47 | ############################################################################ |
|
83 | 84 | # When you need to handle 'dirty' data, build pipelines to model then automatically. |
84 | 85 | task = openml.tasks.get_task(115) |
85 | 86 | pipe = pipeline.Pipeline(steps=[ |
86 | | - ('Imputer', preprocessing.Imputer(strategy='median')), |
| 87 | + ('Imputer', impute.SimpleImputer(strategy='median')), |
87 | 88 | ('OneHotEncoder', preprocessing.OneHotEncoder(sparse=False, handle_unknown='ignore')), |
88 | | - ('Classifier', ensemble.RandomForestClassifier()) |
| 89 | + ('Classifier', ensemble.RandomForestClassifier(n_estimators=10)) |
89 | 90 | ]) |
90 | 91 |
|
91 | 92 | run = openml.runs.run_model_on_task(pipe, task, avoid_duplicate_runs=False) |
|
0 commit comments