MNT add documentation testing in Azure (#715)

glemaitre · web-flow · commit 37f27ee6a1d5 · 2020-06-08T19:42:02.000+02:00
diff --git a/.travis.yml b/.travis.yml
@@ -13,11 +13,7 @@ cache:
   - $HOME/.cache/pip
   - $HOME/.cache/pip
   - $HOME/download
-addons:
-  apt:
-    packages:
-      - python3-numpy
-      - python3-scipy
+
 env:
   global:
     # Directory where tests are run from
@@ -28,15 +24,7 @@ env:
 
 matrix:
   include:
-    - env: PYTHON_VERSION="3.7"
-           NUMPY_VERSION="*" SCIPY_VERSION="*" SKLEARN_VERSION="master"
-           OPTIONAL_DEPS="keras" TEST_DOC="true" TEST_NUMPYDOC="false"
-    - env: PYTHON_VERSION="3.7"
-           NUMPY_VERSION="*" SCIPY_VERSION="*" SKLEARN_VERSION="master"
-           OPTIONAL_DEPS="tensorflow" TEST_DOC="true" TEST_NUMPYDOC="false"
-    - env: PYTHON_VERSION="3.7"
-           NUMPY_VERSION="*" SCIPY_VERSION="*" SKLEARN_VERSION="master"
-           OPTIONAL_DEPS="false" TEST_DOC="false" TEST_NUMPYDOC="true"
+    - env: PYTHON_VERSION="3.8" TEST_DOC="true" TEST_NUMPYDOC="true"
 
 install: source build_tools/travis/install.sh
 script: bash build_tools/travis/test_script.sh
diff --git a/azure-pipelines.yml b/azure-pipelines.yml
@@ -32,7 +32,7 @@ jobs:
       # Linux environment to test the latest available dependencies and MKL.
       pylatest_pip_openblas_pandas:
         DISTRIB: 'conda-pip-latest'
-        PYTHON_VERSION: '3.7'
+        PYTHON_VERSION: '3.8'
         COVERAGE: 'true'
         PANDAS_VERSION: '*'
         TEST_DOCSTRINGS: 'true'
@@ -49,7 +49,7 @@ jobs:
         TEST_DOCSTRINGS: 'true'
       pylatest_conda_pandas_tensorflow:
         DISTRIB: 'conda'
-        PYTHON_VERSION: '3.7'
+        PYTHON_VERSION: '3.8'
         PANDAS_VERSION: '*'
         JOBLIB_VERSION: '*'
         INSTALL_MKL: 'true'
@@ -95,7 +95,7 @@ jobs:
     dependsOn: [linting]
     matrix:
       py37_conda_mkl:
-        PYTHON_VERSION: '3.7'
+        PYTHON_VERSION: '3.8'
         PYTHON_ARCH: '64'
         PYTEST_VERSION: '*'
         COVERAGE: 'true'
diff --git a/build_tools/azure/posix.yml b/build_tools/azure/posix.yml
@@ -44,7 +44,7 @@ jobs:
       condition: succeededOrFailed()
     - script: |
         build_tools/azure/upload_codecov.sh
-      condition: and(succeeded(), eq(variables['COVERAGE'], 'true'), eq(variables['DISTRIB'], 'conda'))
+      condition: and(succeeded(), eq(variables['COVERAGE'], 'true'))
       displayName: 'Upload To Codecov'
       env:
         CODECOV_TOKEN: $(CODECOV_TOKEN)
diff --git a/build_tools/azure/test_docs.sh b/build_tools/azure/test_docs.sh
@@ -8,7 +8,7 @@ elif [[ "$DISTRIB" == "ubuntu" ]]; then
     source $VIRTUALENV/bin/activate
 fi
 
-if [[ "TEST_DOCSTRING" == 'true' ]]; then
+if [[ "$TEST_DOCSTRINGS" == 'true' ]]; then
     make test-doc
     pytest -vsl maint_tools/test_docstring.py
 fi
diff --git a/build_tools/travis/install.sh b/build_tools/travis/install.sh
@@ -34,22 +34,13 @@ conda create -n testenv --yes python=$PYTHON_VERSION pip
 source activate testenv
 
 pip install --upgrade pip setuptools
-echo "Installing numpy and scipy master wheels"
-dev_url=https://7933911d6844c6c53a7d-47bd50c35cd79bd838daf386af554a83.ssl.cf2.rackcdn.com
-pip install --pre --upgrade --timeout=60 -f $dev_url numpy scipy pandas cython
+echo "Installing numpy, scipy, and pandas master wheels"
+dev_url=https://pypi.anaconda.org/scipy-wheels-nightly/simple
+pip install --pre --upgrade --timeout=60 -f $dev_url numpy scipy pandas
 echo "Installing joblib master"
 pip install https://github.com/joblib/joblib/archive/master.zip
-
-if [[ "$OPTIONAL_DEPS" == "keras" ]]; then
-    conda install --yes keras tensorflow=1
-    KERAS_BACKEND=tensorflow
-    python -c "import keras.backend"
-    sed -i -e 's/"backend":[[:space:]]*"[^"]*/"backend":\ "'$KERAS_BACKEND'/g' ~/.keras/keras.json;
-elif [[ "$OPTIONAL_DEPS" == "tensorflow" ]]; then
-    conda install --yes tensorflow
-fi
-
-pip install --pre --extra-index https://pypi.anaconda.org/scipy-wheels-nightly/simple scikit-learn
+echo "Installing scikit-learn master"
+pip install --pre --extra-index $dev_url scikit-learn
 
 conda install --yes pytest pytest-cov
 pip install codecov
diff --git a/doc/ensemble.rst b/doc/ensemble.rst
@@ -97,7 +97,7 @@ a boosting iteration :cite:`seiffert2009rusboost`::
   RUSBoostClassifier(...)
   >>> y_pred = rusboost.predict(X_test)
   >>> balanced_accuracy_score(y_test, y_pred)  # doctest: +ELLIPSIS
-  0.4...
+  0...
 
 A specific method which uses ``AdaBoost`` as learners in the bagging classifier
 is called EasyEnsemble. The :class:`EasyEnsembleClassifier` allows to bag
diff --git a/imblearn/over_sampling/_smote.py b/imblearn/over_sampling/_smote.py
@@ -744,8 +744,7 @@ def _fit_resample(self, X, y):
 #     sampling_strategy=BaseOverSampler._sampling_strategy_docstring,
 #     random_state=_random_state_docstring)
 class SMOTENC(SMOTE):
-    """Synthetic Minority Over-sampling Technique for Nominal and Continuous
-    (SMOTE-NC).
+    """Synthetic Minority Over-sampling Technique for Nominal and Continuous.
 
     Unlike :class:`SMOTE`, SMOTE-NC for dataset containing continuous and
     categorical features.
diff --git a/imblearn/pipeline.py b/imblearn/pipeline.py
@@ -47,7 +47,7 @@ class Pipeline(pipeline.Pipeline):
         fit/transform/fit_resample) that are chained, in the order in which
         they are chained, with the last object an estimator.
 
-    memory : Instance of joblib.Memory or string, optional (default=None)
+    memory : Instance of joblib.Memory or str, default=None
         Used to cache the fitted transformers of the pipeline. By default,
         no caching is performed. If a string is given, it is the path to
         the caching directory. Enabling caching triggers a clone of
@@ -57,7 +57,7 @@ class Pipeline(pipeline.Pipeline):
         inspect estimators within the pipeline. Caching the
         transformers is advantageous when fitting is time consuming.
 
-    verbose : boolean, optional (default=False)
+    verbose : bool, default=False
         If True, the time elapsed while fitting each step will be printed as it
         is completed.
 
@@ -67,17 +67,16 @@ class Pipeline(pipeline.Pipeline):
         Read-only attribute to access any step parameter by user given name.
         Keys are step names and values are steps parameters.
 
+    See Also
+    --------
+    make_pipeline : Helper function to make pipeline.
+
     Notes
     -----
     See :ref:`sphx_glr_auto_examples_pipeline_plot_pipeline_classification.py`
 
-    See also
-    --------
-    make_pipeline : helper function to make pipeline.
-
     Examples
     --------
-
     >>> from collections import Counter
     >>> from sklearn.datasets import make_classification
     >>> from sklearn.model_selection import train_test_split as tts
@@ -109,7 +108,6 @@ class Pipeline(pipeline.Pipeline):
        macro avg       0.93      0.99      0.96       250
     weighted avg       0.99      0.98      0.98       250
     <BLANKLINE>
-
     """
 
     # BaseEstimator interface
@@ -257,7 +255,7 @@ def _fit(self, X, y=None, **fit_params):
         return X, y, fit_params_steps[self.steps[-1][0]]
 
     def fit(self, X, y=None, **fit_params):
-        """Fit the model
+        """Fit the model.
 
         Fit all the transforms/samplers one after the other and
         transform/sample the data, then fit the transformed/sampled
@@ -273,16 +271,15 @@ def fit(self, X, y=None, **fit_params):
             Training targets. Must fulfill label requirements for all steps of
             the pipeline.
 
-        **fit_params : dict of string -> object
+        **fit_params : dict of str -> object
             Parameters passed to the ``fit`` method of each step, where
             each parameter name is prefixed such that parameter ``p`` for step
             ``s`` has key ``s__p``.
 
         Returns
         -------
         self : Pipeline
-            This estimator
-
+            This estimator.
         """
         Xt, yt, fit_params = self._fit(X, y, **fit_params)
         with _print_elapsed_time('Pipeline',
@@ -292,7 +289,7 @@ def fit(self, X, y=None, **fit_params):
         return self
 
     def fit_transform(self, X, y=None, **fit_params):
-        """Fit the model and transform with the final estimator
+        """Fit the model and transform with the final estimator.
 
         Fits all the transformers/samplers one after the other and
         transform/sample the data, then uses fit_transform on
@@ -315,9 +312,8 @@ def fit_transform(self, X, y=None, **fit_params):
 
         Returns
         -------
-        Xt : array-like, shape = [n_samples, n_transformed_features]
-            Transformed samples
-
+        Xt : array-like of shape (n_samples, n_transformed_features)
+            Transformed samples.
         """
         last_step = self._final_estimator
         Xt, yt, fit_params = self._fit(X, y, **fit_params)
@@ -331,7 +327,7 @@ def fit_transform(self, X, y=None, **fit_params):
                 return last_step.fit(Xt, yt, **fit_params).transform(Xt)
 
     def fit_resample(self, X, y=None, **fit_params):
-        """Fit the model and sample with the final estimator
+        """Fit the model and sample with the final estimator.
 
         Fits all the transformers/samplers one after the other and
         transform/sample the data, then uses fit_resample on transformed
@@ -354,12 +350,11 @@ def fit_resample(self, X, y=None, **fit_params):
 
         Returns
         -------
-        Xt : array-like, shape = [n_samples, n_transformed_features]
-            Transformed samples
-
-        yt : array-like, shape = [n_samples, n_transformed_features]
-            Transformed target
+        Xt : array-like of shape (n_samples, n_transformed_features)
+            Transformed samples.
 
+        yt : array-like of shape (n_samples, n_transformed_features)
+            Transformed target.
         """
         last_step = self._final_estimator
         Xt, yt, fit_params = self._fit(X, y, **fit_params)
@@ -372,7 +367,7 @@ def fit_resample(self, X, y=None, **fit_params):
 
     @if_delegate_has_method(delegate="_final_estimator")
     def fit_predict(self, X, y=None, **fit_params):
-        """Applies fit_predict of last step in pipeline after transforms.
+        """Apply `fit_predict` of last step in pipeline after transforms.
 
         Applies fit_transforms of a pipeline to the data, followed by the
         fit_predict method of the final estimator in the pipeline. Valid
@@ -395,7 +390,8 @@ def fit_predict(self, X, y=None, **fit_params):
 
         Returns
         -------
-        y_pred : array-like
+        y_pred : ndarray of shape (n_samples,)
+            The predicted target.
         """
         Xt, yt, fit_params = self._fit(X, y, **fit_params)
         with _print_elapsed_time('Pipeline',
@@ -425,9 +421,10 @@ def make_pipeline(*steps, **kwargs):
 
     Parameters
     ----------
-    *steps : list of estimators.
+    *steps : list of estimators
+        A list of estimators.
 
-    memory : None, str or object with the joblib.Memory interface, optional
+    memory : None, str or object with the joblib.Memory interface, default=None
         Used to cache the fitted transformers of the pipeline. By default,
         no caching is performed. If a string is given, it is the path to
         the caching directory. Enabling caching triggers a clone of
@@ -437,15 +434,15 @@ def make_pipeline(*steps, **kwargs):
         inspect estimators within the pipeline. Caching the
         transformers is advantageous when fitting is time consuming.
 
-    verbose : boolean, optional (default=False)
+    verbose : bool, default=False
         If True, the time elapsed while fitting each step will be printed as it
         is completed.
 
     Returns
     -------
     p : Pipeline
 
-    See also
+    See Also
     --------
     imblearn.pipeline.Pipeline : Class for creating a pipeline of
         transforms with a final estimator.
diff --git a/imblearn/under_sampling/_prototype_generation/_cluster_centroids.py b/imblearn/under_sampling/_prototype_generation/_cluster_centroids.py
@@ -102,7 +102,7 @@ def __init__(
         random_state=None,
         estimator=None,
         voting="auto",
-        n_jobs="deprecatedxs",
+        n_jobs="deprecated",
     ):
         super().__init__(sampling_strategy=sampling_strategy)
         self.random_state = random_state
diff --git a/maint_tools/test_docstring.py b/maint_tools/test_docstring.py
@@ -10,14 +10,47 @@
 # List of whitelisted modules and methods; regexp are supported.
 # These docstrings will fail because they are inheriting from scikit-learn
 DOCSTRING_WHITELIST = [
-    "BalancedBaggingClassifier.decision_function",
-    "BalancedRandomForestClassifier.decision_function",
-    "BalancedRandomForestClassifier.decision_path",
+    "ADASYN$", "ADASYN.",
+    "AllKNN$", "AllKNN.",
+    "BalancedBaggingClassifier$",
+    "BalancedBaggingClassifier.estimators_samples_",
+    "BalancedBaggingClassifier.fit",
+    "BalancedBaggingClassifier.get_params",
+    "BalancedBaggingClassifier.predict",
+    "BalancedBaggingClassifier.score",
+    "BalancedBaggingClassifier.set_params",
+    "BalancedRandomForestClassifier$",
+    "BalancedRandomForestClassifier.apply",
     "BalancedRandomForestClassifier.feature_importances_",
-    "BalancedRandomForestClassifier.predict_log_proba",
-    "BalancedRandomForestClassifier.predict_proba",
-    "EasyEnsembleClassifier.decision_function",
-    "RUSBoostClassifier.feature_importances_"
+    "BalancedRandomForestClassifier.fit",
+    "BalancedRandomForestClassifier.predict$",
+    "BalancedRandomForestClassifier.score",
+    "BalancedRandomForestClassifier.set_params",
+    "ClusterCentroids$", "ClusterCentroids.",
+    "CondensedNearestNeighbour$", "CondensedNearestNeighbour.",
+    "EasyEnsembleClassifier$",
+    "EasyEnsembleClassifier.estimators_samples_",
+    "EasyEnsembleClassifier.fit",
+    "EasyEnsembleClassifier.get_params",
+    "EasyEnsembleClassifier.predict",
+    "EasyEnsembleClassifier.score",
+    "EasyEnsembleClassifier.set_params",
+    "EditedNearestNeighbours$", "EditedNearestNeighbours.",
+    "FunctionSampler$", "FunctionSampler.",
+    "InstanceHardnessThreshold$", "InstanceHardnessThreshold.",
+    "SMOTE$", "SMOTE.",
+    "NearMiss$", "NearMiss.",
+    "NeighbourhoodCleaningRule$", "NeighbourhoodCleaningRule.",
+    "OneSidedSelection$", "OneSidedSelection.",
+    "Pipeline$",
+    "Pipeline.fit$",
+    "Pipeline.fit_transform",
+    "Pipeline.fit_resample",
+    "Pipeline.fit_predict",
+    "RUSBoostClassifier$", "RUSBoostClassifier.",
+    "RandomOverSampler$", "RandomOverSampler.",
+    "RandomUnderSampler$", "RandomUnderSampler.",
+    "TomekLinks$", "TomekLinks",
 ]
 
 
@@ -137,7 +170,7 @@ def test_docstring(Estimator, method, request):
 
     import_path = ".".join(import_path)
 
-    if any(re.search(regex, import_path) for regex in DOCSTRING_WHITELIST):
+    if not any(re.search(regex, import_path) for regex in DOCSTRING_WHITELIST):
         request.applymarker(
             pytest.mark.xfail(
                 run=False, reason="TODO pass numpydoc validation"