MAINT rework documentation

mfeurer · mfeurer · commit d306d88ffb47 · 2016-10-17T11:01:05.000+02:00
diff --git a/autosklearn/estimators.py b/autosklearn/estimators.py
@@ -113,32 +113,46 @@ def __init__(self,
         ----------
         time_left_for_this_task : int, optional (default=3600)
             Time limit in seconds for the search of appropriate
-            models. By increasing this value, *auto-sklearn* will find better
-            configurations.
+            models. By increasing this value, *auto-sklearn* has a higher
+            chance of finding better models.
 
         per_run_time_limit : int, optional (default=360)
-            Time limit for a single call to machine learning model.
+            Time limit for a single call to the machine learning model.
+            Model fitting will be terminated if the machine learning
+            algorithm runs over the time limit. Set this value high enough so
+            that typical machine learning algorithms can be fit on the
+            training data.
 
         initial_configurations_via_metalearning : int, optional (default=25)
+            Initialize the hyperparameter optimization algorithm with this
+            many configurations which worked well on previously seen
+            datasets. Disable if the hyperparameter optimization algorithm
+            should start from scratch.
 
         ensemble_size : int, optional (default=50)
+            Number of models added to the ensemble built by `Ensemble
+            selection from libraries of models. Models are drawn with
+            replacement.
 
         ensemble_nbest : int, optional (default=50)
+            Only consider the ``ensemble_nbest`` models when building an
+            ensemble. Implements `Model Library Pruning` from `Getting the
+            most out of ensemble selection`.
 
         seed : int, optional (default=1)
 
         ml_memory_limit : int, optional (3000)
-            Memory limit for the machine learning algorithm. If the machine
-            learning algorithm allocates tries to allocate more memory,
-            its evaluation will be stopped.
+            Memory limit in MB for the machine learning algorithm.
+            `auto-sklearn` will stop fitting the machine learning algorithm if
+            it tries to allocate more than `ml_memory_limit` MB.
 
         include_estimators : dict, optional (None)
-            If None all possible estimators are used. Otherwise specifies set of
-            estimators to use
+            If None, all possible estimators are used. Otherwise specifies
+            set of estimators to use
 
         include_preprocessors : dict, optional (None)
-            If None all possible preprocessors are used. Otherwise specifies set of
-            preprocessors to use
+            If None all possible preprocessors are used. Otherwise specifies set
+            of preprocessors to use
 
         resampling_strategy : string, optional ('holdout')
             how to to handle overfitting, might need 'resampling_strategy_arguments'
@@ -148,24 +162,21 @@ def __init__(self,
               fit where possible
             * 'cv': crossvalidation, requires 'folds'
             * 'nested-cv': crossvalidation, requires 'outer-folds, 'inner-folds'
-            * 'partial-cv': crossvalidation, requires 'folds' , calls
-              iterative fit where possible
 
         resampling_strategy_arguments : dict, optional if 'holdout' (None)
             Additional arguments for resampling_strategy
             * 'holdout': None
             * 'holdout-iterative-fit':  None
             * 'cv': {'folds': int}
             * 'nested-cv': {'outer_folds': int, 'inner_folds'
-            * 'partial-cv': {'folds': int}
 
         tmp_folder : string, optional (None)
-            folder to store configuration output, if None automatically use
-            /tmp/autosklearn_tmp_$pid_$random_number
+            folder to store configuration output and log files, if ``None``
+            automatically use ``/tmp/autosklearn_tmp_$pid_$random_number``
 
         output_folder : string, optional (None)
-            folder to store trained models, if None automatically use
-            /tmp/autosklearn_output_$pid_$random_number
+            folder to store predictions for optional test set, if ``None``
+            automatically use ``/tmp/autosklearn_output_$pid_$random_number``
 
         delete_tmp_folder_after_terminate: string, optional (True)
             remove tmp_folder, when finished. If tmp_folder is None
@@ -176,10 +187,10 @@ def __init__(self,
             output_dir will always be deleted
 
         shared_mode: bool, optional (False)
-            run smac in shared-model-node. This only works if arguments
-            tmp_folder and output_folder are given and sets both
-            delete_tmp_folder_after_terminate and
-            delete_output_folder_after_terminate to False.
+            Run smac in shared-model-node. This only works if arguments
+            ``tmp_folder`` and ``output_folder`` are given and both
+            ``delete_tmp_folder_after_terminate`` and
+            ``delete_output_folder_after_terminate`` are set to False.
 
         Attributes
         ----------
@@ -193,6 +204,14 @@ def __init__(self,
               cross-validation folds
             * ``cv_validation_scores``, the list of scores for each fold
 
+        cv_results_ : dict of numpy (masked) ndarrays
+            A dict with keys as column headers and values as columns, that can be
+            imported into a pandas ``DataFrame``.
+
+            This attribute is a backward port to already support the advanced
+            output of scikit-learn 0.18. Not all keys returned by scikit-learn
+            are supported yet.
+
         """
         self.time_left_for_this_task = time_left_for_this_task
         self.per_run_time_limit = per_run_time_limit
@@ -276,7 +295,7 @@ def fit(self, X, y,
             metric='acc_metric',
             feat_type=None,
             dataset_name=None):
-        """Fit *autosklearn* to given training set (X, y).
+        """Fit *auto-sklearn* to given training set (X, y).
 
         Parameters
         ----------
@@ -308,8 +327,6 @@ def fit(self, X, y,
         self
 
         """
-        # Fit is supposed to be idempotent!
-        # But not if we use share_mode.
         return super(AutoSklearnClassifier, self).fit(X, y, metric, feat_type, dataset_name)
 
     def predict(self, X):
diff --git a/doc/index.rst b/doc/index.rst
@@ -27,19 +27,13 @@ Example
 *******
 
     >>> import autosklearn.classification
+    >>> import sklearn.cross_validation
     >>> import sklearn.datasets
     >>> digits = sklearn.datasets.load_digits()
     >>> X = digits.data
     >>> y = digits.target
-    >>> import numpy as np
-    >>> indices = np.arange(X.shape[0])
-    >>> np.random.shuffle(indices)
-    >>> X = X[indices]
-    >>> y = y[indices]
-    >>> X_train = X[:1000]
-    >>> y_train = y[:1000]
-    >>> X_test = X[1000:]
-    >>> y_test = y[1000:]
+    >>> X_train, X_test, y_train, y_test = \
+            sklearn.cross_validation.train_test_split(X, y, random_state=1)
     >>> automl = autosklearn.classification.AutoSklearnClassifier()
     >>> automl.fit(X_train, y_train)
     >>> print(automl.score(X_test,y_test))
@@ -69,8 +63,7 @@ Then install *auto-sklearn*
     pip install auto-sklearn
 
 We recommend installing *auto-sklearn* into a `virtual environment
-<http://docs.python-guide.org/en/latest/dev/virtualenvs/>`_ or into an
-`anaconda environment <https://www.continuum.io/downloads>`_..
+<http://docs.python-guide.org/en/latest/dev/virtualenvs/>`_.
 
 Manual
 ******
@@ -83,14 +76,13 @@ Manual
 License
 *******
 *auto-sklearn* is licensed the same way as *scikit-learn*,
-namely the 3-clause BSD license. The subprojects it uses, most notably SMAC,
-can have different licenses.
+namely the 3-clause BSD license.
 
 Citing auto-sklearn
 *******************
 
 If you use auto-sklearn in a scientific publication, we would appreciate
-citations to the following paper:
+references to the following paper:
 
 
  `Efficient and Robust Automated Machine Learning
@@ -113,9 +105,11 @@ citations to the following paper:
 
 Contributing
 ************
-*auto-sklearn* is developed mainly by the `Machine Learning for Automated
-Algorithm Design <http://aad.informatik.uni-freiburg.de>`_ group at the
-University of Freiburg.
+
+We appreciate all contribution to auto-sklearn, from bug reports,
+documentation to new features. If you want to contribute to the code, you can
+pick an issue from the `issue tracker <https://github.com/automl/auto-sklearn/issues>`_
+which is marked with `Needs contributer`.
 
 .. note::
 
diff --git a/doc/manual.rst b/doc/manual.rst
@@ -34,8 +34,8 @@ the model building procedure may use up to all cores. Such behaviour is
 unintended by auto-sklearn and is most likely due to numpy being installed
 from `pypi` as a binary wheel (`see here http://scikit-learn-general.narkive
 .com/44ywvAHA/binary-wheel-packages-for-linux-are-coming`_). Executing
-`export OPENBLAS_NUM_THREADS=1` should disable such behaviours and make numpy
- only use a single core at a time.
+``export OPENBLAS_NUM_THREADS=1`` should disable such behaviours and make numpy
+only use a single core at a time.
 
 Model persistence
 *****************