Merge pull request #282 from automl/FIX#121

mfeurer · web-flow · commit 4643aa57eea1 · 2017-05-16T14:28:36.000+02:00
Fix#121
diff --git a/autosklearn/estimators.py b/autosklearn/estimators.py
@@ -14,7 +14,6 @@
 from autosklearn.util.backend import create
 
 
-
 class AutoMLDecorator(object):
 
     def __init__(self, automl):
@@ -379,7 +378,7 @@ def fit(self, X, y,
         y : array-like, shape = [n_samples] or [n_samples, n_outputs]
             The target classes.
 
-        metric : callable, optional (default='acc_metric')
+        metric : callable, optional (default='autosklearn.metrics.accuracy')
             An instance of :class:`autosklearn.metrics.Scorer` as created by
             :meth:`autosklearn.metrics.make_scorer`. These are the `Built-in
             Metrics`_.
@@ -388,7 +387,7 @@ def fit(self, X, y,
             List of str of `len(X.shape[1])` describing the attribute type.
             Possible types are `Categorical` and `Numerical`. `Categorical`
             attributes will be automatically One-Hot encoded. The values
-            used for a categorical attribute must be integers, obtainde for
+            used for a categorical attribute must be integers, obtained for
             example by `sklearn.preprocessing.LabelEncoder
             <http://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.LabelEncoder.html>`_.
 
@@ -464,11 +463,10 @@ def fit(self, X, y,
         y : array-like, shape = [n_samples] or [n_samples, n_outputs]
             The regression target.
 
-        metric : str, optional (default='r2_metric')
-            The metric to optimize for. Can be one of: ['r2_metric',
-            'a_metric']. A description of the metrics can be found in
-            `the paper describing the AutoML Challenge
-            <http://www.causality.inf.ethz.ch/AutoML/automl_ijcnn15.pdf>`_.
+        metric : callable, optional (default='autosklearn.metrics.accuracy')
+            An instance of :class:`autosklearn.metrics.Scorer` as created by
+            :meth:`autosklearn.metrics.make_scorer`. These are the `Built-in
+            Metrics`_.
 
         feat_type : list, optional (default=None)
             List of str of `len(X.shape[1])` describing the attribute type.
@@ -541,7 +539,7 @@ def fit(self, X, y,
             if task == MULTILABEL_CLASSIFICATION:
                 metric = f1_macro
             else:
-                metric=accuracy
+                metric = accuracy
 
         y = self._process_target_classes(y)
 
@@ -586,7 +584,6 @@ def _process_target_classes(self, y):
 
         return y
 
-
     def predict(self, X, batch_size=None, n_jobs=1):
         predicted_probabilities = self._automl.predict(
             X, batch_size=batch_size, n_jobs=n_jobs)
diff --git a/doc/manual.rst b/doc/manual.rst
@@ -20,6 +20,9 @@ aspects of its usage:
 * `Parallel usage <https://github.com/automl/auto-sklearn/blob/master/example/example_parallel.py>`_
 * `Sequential usage <https://github.com/automl/auto-sklearn/blob/master/example/example_sequential.py>`_
 * `Regression <https://github.com/automl/auto-sklearn/blob/master/example/example_regression.py>`_
+* `Continuous and Categorical Data <https://github.com/automl/auto-sklearn/blob/master/example/example_feature_types.py>`_
+* `Using Custom metrics <https://github.com/automl/auto-sklearn/blob/master/example/example_metrics.py>`_
+
 
 Time and memory limits
 ======================
@@ -64,7 +67,7 @@ For a full list please have a look at the source code (in `autosklearn/pipeline/
   * `Regressors <https://github.com/automl/auto-sklearn/tree/master/autosklearn/pipeline/components/regression>`_
   * `Preprocessors <https://github.com/automl/auto-sklearn/tree/master/autosklearn/pipeline/components/feature_preprocessing>`_
 
-Turning of preprocessing
+Turning off preprocessing
 ~~~~~~~~~~~~~~~~~~~~~~~~
 
 Preprocessing in *auto-sklearn* is divided into data preprocessing and
diff --git a/example/example_feature_types.py b/example/example_feature_types.py
@@ -0,0 +1,57 @@
+# -*- encoding: utf-8 -*-
+import sklearn.model_selection
+import sklearn.datasets
+import sklearn.metrics
+
+import autosklearn.classification
+
+try:
+    import openml
+except ImportError:
+    print("#"*80 + """
+    To run this example you need to install openml-python:
+
+    git+https://github.com/renatopp/liac-arff
+    # OpenML is currently not on pypi, use an old version to not depend on
+    # scikit-learn 0.18
+    requests
+    xmltodict
+    git+https://github.com/renatopp/liac-arff
+    git+https://github.com/openml/""" +
+    "openml-python@0b9009b0436fda77d9f7c701bd116aff4158d5e1\n""" +
+          "#"*80)
+    raise
+
+
+def main():
+    # Load adult dataset from openml.org, see https://www.openml.org/t/2117
+    openml.config.apikey = '610344db6388d9ba34f6db45a3cf71de'
+
+    task = openml.tasks.get_task(2117)
+    train_indices, test_indices = task.get_train_test_split_indices()
+    X, y = task.get_X_and_y()
+
+    X_train = X[train_indices]
+    y_train = y[train_indices]
+    X_test = X[test_indices]
+    y_test = y[test_indices]
+
+    dataset = task.get_dataset()
+    _, _, categorical_indicator = dataset.\
+        get_data(target=task.target_name, return_categorical_indicator=True)
+
+    # Create feature type list from openml.org indicator and run autosklearn
+    feat_type = ['categorical' if ci else 'numerical'
+                 for ci in categorical_indicator]
+
+    cls = autosklearn.classification.\
+        AutoSklearnClassifier(time_left_for_this_task=120,
+                              per_run_time_limit=30)
+    cls.fit(X_train, y_train, feat_type=feat_type)
+
+    predictions = cls.predict(X_test)
+    print("Accuracy score", sklearn.metrics.accuracy_score(y_test, predictions))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/example/example_metrics.py b/example/example_metrics.py
@@ -0,0 +1,119 @@
+# -*- encoding: utf-8 -*-
+import numpy as np
+
+import sklearn.model_selection
+import sklearn.datasets
+import sklearn.metrics
+
+import autosklearn.classification
+import autosklearn.metrics
+
+try:
+    import openml
+except ImportError:
+    print("#"*80 + """
+    To run this example you need to install openml-python:
+
+    git+https://github.com/renatopp/liac-arff
+    # OpenML is currently not on pypi, use an old version to not depend on
+    # scikit-learn 0.18
+    requests
+    xmltodict
+    git+https://github.com/renatopp/liac-arff
+    git+https://github.com/openml/""" +
+    "openml-python@0b9009b0436fda77d9f7c701bd116aff4158d5e1\n""" +
+          "#"*80)
+    raise
+
+
+def accuracy(solution, prediction):
+    # function defining accuracy
+    return np.mean(solution == prediction)
+
+
+def accuracy_wk(solution, prediction, dummy):
+    # function defining accuracy and accepting an additional argument
+    assert dummy is None
+    return np.mean(solution == prediction)
+
+
+def main():
+    # Load adult dataset from openml.org, see https://www.openml.org/t/2117
+    openml.config.apikey = '610344db6388d9ba34f6db45a3cf71de'
+
+    task = openml.tasks.get_task(2117)
+    train_indices, test_indices = task.get_train_test_split_indices()
+    X, y = task.get_X_and_y()
+
+    X_train = X[train_indices]
+    y_train = y[train_indices]
+    X_test = X[test_indices]
+    y_test = y[test_indices]
+
+    dataset = task.get_dataset()
+    _, _, categorical_indicator = dataset.\
+        get_data(target=task.target_name, return_categorical_indicator=True)
+
+    # Create feature type list from openml.org indicator and run autosklearn
+    feat_type = ['categorical' if ci else 'numerical'
+                 for ci in categorical_indicator]
+
+    # Print a list of available metrics
+    print("Available CLASSIFICATION metrics autosklearn.metrics.*:")
+    print("\t*" + "\n\t*".join(autosklearn.metrics.CLASSIFICATION_METRICS))
+
+    print("Available REGRESSION autosklearn.metrics.*:")
+    print("\t*" + "\n\t*".join(autosklearn.metrics.REGRESSION_METRICS))
+
+    # First example: Use predefined accuracy metric
+    print("#"*80)
+    print("Use predefined accuracy metric")
+    cls = autosklearn.classification.\
+        AutoSklearnClassifier(time_left_for_this_task=60,
+                              per_run_time_limit=30, seed=1)
+    cls.fit(X_train, y_train, feat_type=feat_type,
+            metric=autosklearn.metrics.accuracy)
+
+    predictions = cls.predict(X_test)
+    print("Accuracy score {:g} using {:s}".
+          format(sklearn.metrics.accuracy_score(y_test, predictions),
+                 cls._automl._automl._metric.name))
+
+    print("#"*80)
+    print("Use self defined accuracy accuracy metric")
+    accuracy_scorer = autosklearn.metrics.make_scorer(name="accu",
+                                                      score_func=accuracy,
+                                                      greater_is_better=True,
+                                                      needs_proba=False,
+                                                      needs_threshold=False)
+    cls = autosklearn.classification.\
+        AutoSklearnClassifier(time_left_for_this_task=60,
+                              per_run_time_limit=30, seed=1)
+    cls.fit(X_train, y_train, feat_type=feat_type, metric=accuracy_scorer)
+
+    predictions = cls.predict(X_test)
+    print("Accuracy score {:g} using {:s}".
+          format(sklearn.metrics.accuracy_score(y_test, predictions),
+                 cls._automl._automl._metric.name))
+
+    print("#"*80)
+    print("Use self defined accuracy with additional argument")
+    accuracy_scorer = autosklearn.metrics.make_scorer(name="accu_add",
+                                                      score_func=accuracy_wk,
+                                                      greater_is_better=True,
+                                                      needs_proba=False,
+                                                      needs_threshold=False,
+                                                      dummy=None)
+    cls = autosklearn.classification.\
+        AutoSklearnClassifier(time_left_for_this_task=60,
+                              per_run_time_limit=30, seed=1)
+    cls.fit(X_train, y_train, feat_type=feat_type, metric=accuracy_scorer)
+
+    predictions = cls.predict(X_test)
+    print("Accuracy score {:g} using {:s}".
+          format(sklearn.metrics.accuracy_score(y_test, predictions),
+                 cls._automl._automl._metric.name))
+
+
+if __name__ == "__main__":
+    main()