simplify and fix examples

mfeurer · mfeurer · commit 7f891fa3badc · 2017-05-16T15:16:20.000+02:00
diff --git a/autosklearn/estimators.py b/autosklearn/estimators.py
@@ -321,7 +321,7 @@ def fit_ensemble(self, y, task=None, metric=None, precision='32',
             the task type (binary classification, multiclass classification,
             multilabel classification or regression).
 
-        metric : callable, optional (default='acc_metric')
+        metric : callable, optional
             An instance of :class:`autosklearn.metrics.Scorer` as created by
             :meth:`autosklearn.metrics.make_scorer`. These are the `Built-in
             Metrics`_.
@@ -463,7 +463,7 @@ def fit(self, X, y,
         y : array-like, shape = [n_samples] or [n_samples, n_outputs]
             The regression target.
 
-        metric : callable, optional (default='autosklearn.metrics.accuracy')
+        metric : callable, optional (default='autosklearn.metrics.r2')
             An instance of :class:`autosklearn.metrics.Scorer` as created by
             :meth:`autosklearn.metrics.make_scorer`. These are the `Built-in
             Metrics`_.
diff --git a/example/example_crossvalidation.py b/example/example_crossvalidation.py
@@ -7,9 +7,7 @@
 
 
 def main():
-    digits = sklearn.datasets.load_digits()
-    X = digits.data
-    y = digits.target
+    X, y = sklearn.datasets.load_digits(return_X_y=True)
     X_train, X_test, y_train, y_test = \
         sklearn.model_selection.train_test_split(X, y, random_state=1)
 
diff --git a/example/example_feature_types.py b/example/example_feature_types.py
@@ -11,14 +11,9 @@
     print("#"*80 + """
     To run this example you need to install openml-python:
 
-    git+https://github.com/renatopp/liac-arff
-    # OpenML is currently not on pypi, use an old version to not depend on
-    # scikit-learn 0.18
-    requests
-    xmltodict
-    git+https://github.com/renatopp/liac-arff
-    git+https://github.com/openml/""" +
-    "openml-python@0b9009b0436fda77d9f7c701bd116aff4158d5e1\n""" +
+    pip install git+https://github.com/renatopp/liac-arff
+    pip install requests xmltodict
+    pip install git+https://github.com/openml/openml-python@develop --no-deps\n""" +
           "#"*80)
     raise
 
@@ -41,7 +36,7 @@ def main():
         get_data(target=task.target_name, return_categorical_indicator=True)
 
     # Create feature type list from openml.org indicator and run autosklearn
-    feat_type = ['categorical' if ci else 'numerical'
+    feat_type = ['Categorical' if ci else 'Numerical'
                  for ci in categorical_indicator]
 
     cls = autosklearn.classification.\
diff --git a/example/example_holdout.py b/example/example_holdout.py
@@ -6,9 +6,7 @@
 
 
 def main():
-    digits = sklearn.datasets.load_digits()
-    X = digits.data
-    y = digits.target
+    X, y = sklearn.datasets.load_digits(return_X_y=True)
     X_train, X_test, y_train, y_test = \
         sklearn.model_selection.train_test_split(X, y, random_state=1)
 
diff --git a/example/example_metrics.py b/example/example_metrics.py
@@ -8,22 +8,6 @@
 import autosklearn.classification
 import autosklearn.metrics
 
-try:
-    import openml
-except ImportError:
-    print("#"*80 + """
-    To run this example you need to install openml-python:
-
-    git+https://github.com/renatopp/liac-arff
-    # OpenML is currently not on pypi, use an old version to not depend on
-    # scikit-learn 0.18
-    requests
-    xmltodict
-    git+https://github.com/renatopp/liac-arff
-    git+https://github.com/openml/""" +
-    "openml-python@0b9009b0436fda77d9f7c701bd116aff4158d5e1\n""" +
-          "#"*80)
-    raise
 
 
 def accuracy(solution, prediction):
@@ -38,25 +22,10 @@ def accuracy_wk(solution, prediction, dummy):
 
 
 def main():
-    # Load adult dataset from openml.org, see https://www.openml.org/t/2117
-    openml.config.apikey = '610344db6388d9ba34f6db45a3cf71de'
 
-    task = openml.tasks.get_task(2117)
-    train_indices, test_indices = task.get_train_test_split_indices()
-    X, y = task.get_X_and_y()
-
-    X_train = X[train_indices]
-    y_train = y[train_indices]
-    X_test = X[test_indices]
-    y_test = y[test_indices]
-
-    dataset = task.get_dataset()
-    _, _, categorical_indicator = dataset.\
-        get_data(target=task.target_name, return_categorical_indicator=True)
-
-    # Create feature type list from openml.org indicator and run autosklearn
-    feat_type = ['categorical' if ci else 'numerical'
-                 for ci in categorical_indicator]
+    X, y = sklearn.datasets.load_breast_cancer(return_X_y=True)
+    X_train, X_test, y_train, y_test = \
+        sklearn.model_selection.train_test_split(X, y, random_state=1)
 
     # Print a list of available metrics
     print("Available CLASSIFICATION metrics autosklearn.metrics.*:")
@@ -71,14 +40,14 @@ def main():
     cls = autosklearn.classification.\
         AutoSklearnClassifier(time_left_for_this_task=60,
                               per_run_time_limit=30, seed=1)
-    cls.fit(X_train, y_train, feat_type=feat_type,
-            metric=autosklearn.metrics.accuracy)
+    cls.fit(X_train, y_train, metric=autosklearn.metrics.accuracy)
 
     predictions = cls.predict(X_test)
     print("Accuracy score {:g} using {:s}".
           format(sklearn.metrics.accuracy_score(y_test, predictions),
                  cls._automl._automl._metric.name))
 
+    # Second example: Use own accuracy metric
     print("#"*80)
     print("Use self defined accuracy accuracy metric")
     accuracy_scorer = autosklearn.metrics.make_scorer(name="accu",
@@ -89,13 +58,14 @@ def main():
     cls = autosklearn.classification.\
         AutoSklearnClassifier(time_left_for_this_task=60,
                               per_run_time_limit=30, seed=1)
-    cls.fit(X_train, y_train, feat_type=feat_type, metric=accuracy_scorer)
+    cls.fit(X_train, y_train, metric=accuracy_scorer)
 
     predictions = cls.predict(X_test)
     print("Accuracy score {:g} using {:s}".
           format(sklearn.metrics.accuracy_score(y_test, predictions),
                  cls._automl._automl._metric.name))
 
+    # Third example: Use own accuracy metric with additional argument
     print("#"*80)
     print("Use self defined accuracy with additional argument")
     accuracy_scorer = autosklearn.metrics.make_scorer(name="accu_add",
@@ -107,7 +77,7 @@ def main():
     cls = autosklearn.classification.\
         AutoSklearnClassifier(time_left_for_this_task=60,
                               per_run_time_limit=30, seed=1)
-    cls.fit(X_train, y_train, feat_type=feat_type, metric=accuracy_scorer)
+    cls.fit(X_train, y_train, metric=accuracy_scorer)
 
     predictions = cls.predict(X_test)
     print("Accuracy score {:g} using {:s}".
diff --git a/example/example_parallel.py b/example/example_parallel.py
@@ -6,6 +6,7 @@
 import sklearn.datasets
 import sklearn.metrics
 
+from autosklearn.metrics import accuracy
 from autosklearn.classification import AutoSklearnClassifier
 from autosklearn.constants import *
 
@@ -42,10 +43,10 @@ def spawn_classifier(seed, dataset_name):
     # models.
     # 3. all instances of the AutoSklearnClassifier must have a different seed!
     automl = AutoSklearnClassifier(
-        time_left_for_this_task=120, # sec., how long should this seed fit
+        time_left_for_this_task=60, # sec., how long should this seed fit
         # process run
-        per_run_time_limit=60, # sec., each model may only take this long before it's killed
-        ml_memory_limit=1024, # MB, memory limit imposed on each call to a ML  algorithm
+        per_run_time_limit=15, # sec., each model may only take this long before it's killed
+        ml_memory_limit=1024, # MB, memory limit imposed on each call to a ML algorithm
         shared_mode=True, # tmp folder will be shared between seeds
         tmp_folder=tmp_folder,
         output_folder=output_folder,
@@ -57,9 +58,7 @@ def spawn_classifier(seed, dataset_name):
 
 if __name__ == '__main__':
     
-    digits = sklearn.datasets.load_digits()
-    X = digits.data
-    y = digits.target
+    X, y = sklearn.datasets.load_digits(return_X_y=True)
     X_train, X_test, y_train, y_test = \
         sklearn.model_selection.train_test_split(X, y, random_state=1)
 
@@ -87,7 +86,7 @@ def spawn_classifier(seed, dataset_name):
     # necessary
     automl.fit_ensemble(y_train,
                         task=MULTICLASS_CLASSIFICATION,
-                        metric=ACC_METRIC,
+                        metric=accuracy,
                         precision='32',
                         dataset_name='digits',
                         ensemble_size=20,
diff --git a/example/example_regression.py b/example/example_regression.py
@@ -7,9 +7,7 @@
 
 
 def main():
-    boston = sklearn.datasets.load_boston()
-    X = boston.data
-    y = boston.target
+    X, y = sklearn.datasets.load_boston(return_X_y=True)
     feature_types = (['numerical'] * 3) + ['categorical'] + (['numerical'] * 9)
     X_train, X_test, y_train, y_test = \
         sklearn.model_selection.train_test_split(X, y, random_state=1)
diff --git a/example/example_sequential.py b/example/example_sequential.py
@@ -6,9 +6,7 @@
 
 
 def main():
-    digits = sklearn.datasets.load_digits()
-    X = digits.data
-    y = digits.target
+    X, y = sklearn.datasets.load_digits(return_X_y=True)
     X_train, X_test, y_train, y_test = \
         sklearn.model_selection.train_test_split(X, y, random_state=1)