[CI] Refactor tests to reduce CI time. (dmlc#8312)

RAMitchell · web-flow · commit ce0382dcb052 · 2022-10-12T11:32:06.000+02:00
diff --git a/demo/guide-python/sklearn_examples.py b/demo/guide-python/sklearn_examples.py
@@ -50,8 +50,8 @@
 print("Parameter optimization")
 xgb_model = xgb.XGBRegressor(n_jobs=1)
 clf = GridSearchCV(xgb_model,
-                   {'max_depth': [2, 4, 6],
-                    'n_estimators': [50, 100, 200]}, verbose=1, n_jobs=1)
+                   {'max_depth': [2, 4],
+                    'n_estimators': [50, 100]}, verbose=1, n_jobs=1, cv=3)
 clf.fit(X, y)
 print(clf.best_score_)
 print(clf.best_params_)
diff --git a/tests/ci_build/Dockerfile.gpu b/tests/ci_build/Dockerfile.gpu
@@ -24,7 +24,7 @@ RUN \
     mamba create -n gpu_test -c rapidsai-nightly -c rapidsai -c nvidia -c conda-forge -c defaults \
         python=3.9 cudf=$RAPIDS_VERSION_ARG* rmm=$RAPIDS_VERSION_ARG* cudatoolkit=$CUDA_VERSION_ARG \
         dask dask-cuda=$RAPIDS_VERSION_ARG* dask-cudf=$RAPIDS_VERSION_ARG* cupy \
-        numpy pytest scipy scikit-learn pandas matplotlib wheel python-kubernetes urllib3 graphviz hypothesis \
+        numpy pytest pytest-timeout scipy scikit-learn pandas matplotlib wheel python-kubernetes urllib3 graphviz hypothesis \
         pyspark cloudpickle cuda-python=11.7.0 && \
     mamba clean --all && \
     conda run --no-capture-output -n gpu_test pip install buildkite-test-collector
diff --git a/tests/ci_build/conda_env/cpu_test.yml b/tests/ci_build/conda_env/cpu_test.yml
@@ -22,6 +22,7 @@ dependencies:
 - sh
 - mock
 - pytest
+- pytest-timeout
 - pytest-cov
 - python-kubernetes
 - urllib3
diff --git a/tests/python-gpu/test_gpu_data_iterator.py b/tests/python-gpu/test_gpu_data_iterator.py
@@ -5,10 +5,9 @@
 import sys
 
 sys.path.append("tests/python")
-from test_data_iterator import SingleBatch, make_batches
 from test_data_iterator import test_single_batch as cpu_single_batch
 from test_data_iterator import run_data_iterator
-from testing import IteratorForTest, no_cupy
+from testing import no_cupy
 
 
 def test_gpu_single_batch() -> None:
@@ -21,16 +20,14 @@ def test_gpu_single_batch() -> None:
     strategies.integers(1, 7),
     strategies.integers(0, 8),
     strategies.booleans(),
+    strategies.booleans(),
 )
-@settings(deadline=None, print_blob=True)
+@settings(deadline=None, max_examples=10, print_blob=True)
 def test_gpu_data_iterator(
-    n_samples_per_batch: int, n_features: int, n_batches: int, subsample: bool
+    n_samples_per_batch: int, n_features: int, n_batches: int, subsample: bool, use_cupy: bool
 ) -> None:
     run_data_iterator(
-        n_samples_per_batch, n_features, n_batches, "gpu_hist", subsample, True
-    )
-    run_data_iterator(
-        n_samples_per_batch, n_features, n_batches, "gpu_hist", subsample, False
+        n_samples_per_batch, n_features, n_batches, "gpu_hist", subsample, use_cupy
     )
 
 
diff --git a/tests/python-gpu/test_gpu_linear.py b/tests/python-gpu/test_gpu_linear.py
@@ -6,6 +6,8 @@
 import testing as tm
 
 
+pytestmark = pytest.mark.timeout(10)
+
 parameter_strategy = strategies.fixed_dictionaries({
     'booster': strategies.just('gblinear'),
     'eta': strategies.floats(0.01, 0.25),
@@ -30,7 +32,7 @@ def train_result(param, dmat, num_rounds):
 class TestGPULinear:
     @given(parameter_strategy, strategies.integers(10, 50),
            tm.dataset_strategy)
-    @settings(deadline=None, print_blob=True)
+    @settings(deadline=None, max_examples=20, print_blob=True)
     def test_gpu_coordinate(self, param, num_rounds, dataset):
         assume(len(dataset.y) > 0)
         param['updater'] = 'gpu_coord_descent'
@@ -49,7 +51,7 @@ def test_gpu_coordinate(self, param, num_rounds, dataset):
         strategies.floats(1e-5, 0.8),
         strategies.floats(1e-5, 0.8)
     )
-    @settings(deadline=None, print_blob=True)
+    @settings(deadline=None, max_examples=20, print_blob=True)
     def test_gpu_coordinate_regularised(self, param, num_rounds, dataset, alpha, lambd):
         assume(len(dataset.y) > 0)
         param['updater'] = 'gpu_coord_descent'
diff --git a/tests/python-gpu/test_gpu_pickling.py b/tests/python-gpu/test_gpu_pickling.py
@@ -15,6 +15,8 @@
 model_path = './model.pkl'
 
 
+pytestmark = pytest.mark.timeout(30)
+
 def build_dataset():
     N = 10
     x = np.linspace(0, N*N, N*N)
@@ -65,6 +67,7 @@ def run_pickling(self, bst) -> None:
         assert status == 0
         os.remove(model_path)
 
+    # TODO: This test is too slow
     @pytest.mark.skipif(**tm.no_sklearn())
     def test_pickling(self):
         x, y = build_dataset()
diff --git a/tests/python-gpu/test_gpu_prediction.py b/tests/python-gpu/test_gpu_prediction.py
@@ -32,6 +32,7 @@ def noop(*args, **kwargs):
     'num_parallel_tree': strategies.sampled_from([1, 4]),
 })
 
+pytestmark = pytest.mark.timeout(20)
 
 class TestGPUPredict:
     def test_predict(self):
@@ -264,7 +265,7 @@ def predict_df(x):
 
     @given(strategies.integers(1, 10),
            tm.dataset_strategy, shap_parameter_strategy)
-    @settings(deadline=None, print_blob=True)
+    @settings(deadline=None, max_examples=20, print_blob=True)
     def test_shap(self, num_rounds, dataset, param):
         if dataset.name.endswith("-l1"):  # not supported by the exact tree method
             return
@@ -280,7 +281,7 @@ def test_shap(self, num_rounds, dataset, param):
 
     @given(strategies.integers(1, 10),
            tm.dataset_strategy, shap_parameter_strategy)
-    @settings(deadline=None, max_examples=20, print_blob=True)
+    @settings(deadline=None, max_examples=10, print_blob=True)
     def test_shap_interactions(self, num_rounds, dataset, param):
         if dataset.name.endswith("-l1"):  # not supported by the exact tree method
             return
@@ -333,14 +334,14 @@ def run_predict_leaf_booster(self, param, num_rounds, dataset):
         np.testing.assert_equal(cpu_leaf, gpu_leaf)
 
     @given(predict_parameter_strategy, tm.dataset_strategy)
-    @settings(deadline=None, print_blob=True)
+    @settings(deadline=None, max_examples=20, print_blob=True)
     def test_predict_leaf_gbtree(self, param, dataset):
         param['booster'] = 'gbtree'
         param['tree_method'] = 'gpu_hist'
         self.run_predict_leaf_booster(param, 10, dataset)
 
     @given(predict_parameter_strategy, tm.dataset_strategy)
-    @settings(deadline=None, print_blob=True)
+    @settings(deadline=None, max_examples=20, print_blob=True)
     def test_predict_leaf_dart(self, param, dataset):
         param['booster'] = 'dart'
         param['tree_method'] = 'gpu_hist'
@@ -351,7 +352,7 @@ def test_predict_leaf_dart(self, param, dataset):
     @given(df=data_frames([column('x0', elements=strategies.integers(min_value=0, max_value=3)),
                            column('x1', elements=strategies.integers(min_value=0, max_value=5))],
                           index=range_indexes(min_size=20, max_size=50)))
-    @settings(deadline=None, print_blob=True)
+    @settings(deadline=None, max_examples=20, print_blob=True)
     def test_predict_categorical_split(self, df):
         from sklearn.metrics import mean_squared_error
 
diff --git a/tests/python-gpu/test_gpu_ranking.py b/tests/python-gpu/test_gpu_ranking.py
@@ -6,10 +6,12 @@
 import urllib.request
 import zipfile
 import sys
+import pytest
 sys.path.append("tests/python")
 
 import testing as tm            # noqa
 
+pytestmark = pytest.mark.timeout(10)
 
 class TestRanking:
     @classmethod
@@ -96,7 +98,7 @@ def __test_training_with_rank_objective(cls, rank_objective, metric_name, tolera
         # specify validations set to watch performance
         watchlist = [(cls.dtest, 'eval'), (cls.dtrain, 'train')]
 
-        num_trees = 2500
+        num_trees = 100
         check_metric_improvement_rounds = 10
 
         evals_result = {}
diff --git a/tests/python-gpu/test_gpu_spark/test_gpu_spark.py b/tests/python-gpu/test_gpu_spark/test_gpu_spark.py
@@ -7,7 +7,7 @@
 sys.path.append("tests/python")
 import testing as tm
 
-if tm.no_dask()["condition"]:
+if tm.no_spark()["condition"]:
     pytest.skip(msg=tm.no_spark()["reason"], allow_module_level=True)
 if sys.platform.startswith("win"):
     pytest.skip("Skipping PySpark tests on Windows", allow_module_level=True)
diff --git a/tests/python-gpu/test_gpu_updaters.py b/tests/python-gpu/test_gpu_updaters.py
@@ -1,7 +1,6 @@
 from typing import Dict, Any
 import numpy as np
 import sys
-import gc
 import pytest
 import xgboost as xgb
 from hypothesis import given, strategies, assume, settings, note
@@ -10,6 +9,7 @@
 import testing as tm
 import test_updaters as test_up
 
+pytestmark = pytest.mark.timeout(30)
 
 parameter_strategy = strategies.fixed_dictionaries({
     'max_depth': strategies.integers(0, 11),
@@ -46,7 +46,7 @@ class TestGPUUpdaters:
     cputest = test_up.TestTreeMethod()
 
     @given(parameter_strategy, strategies.integers(1, 20), tm.dataset_strategy)
-    @settings(deadline=None, print_blob=True)
+    @settings(deadline=None, max_examples=50, print_blob=True)
     def test_gpu_hist(self, param, num_rounds, dataset):
         param["tree_method"] = "gpu_hist"
         param = dataset.set_params(param)
@@ -73,7 +73,7 @@ def test_sparse(self, dataset):
 
     @given(strategies.integers(10, 400), strategies.integers(3, 8),
            strategies.integers(1, 2), strategies.integers(4, 7))
-    @settings(deadline=None, print_blob=True)
+    @settings(deadline=None, max_examples=20, print_blob=True)
     @pytest.mark.skipif(**tm.no_pandas())
     def test_categorical_ohe(self, rows, cols, rounds, cats):
         self.cputest.run_categorical_ohe(rows, cols, rounds, cats, "gpu_hist")
@@ -85,7 +85,7 @@ def test_categorical_ohe(self, rows, cols, rounds, cats):
         test_up.cat_parameter_strategy,
         strategies.integers(4, 32),
     )
-    @settings(deadline=None, print_blob=True)
+    @settings(deadline=None, max_examples=20, print_blob=True)
     @pytest.mark.skipif(**tm.no_pandas())
     def test_categorical(
         self,
@@ -106,7 +106,7 @@ def test_categorical(
         test_up.hist_parameter_strategy,
         test_up.cat_parameter_strategy,
     )
-    @settings(deadline=None, print_blob=True)
+    @settings(deadline=None, max_examples=10, print_blob=True)
     def test_categorical_ames_housing(
         self,
         hist_parameters: Dict[str, Any],
@@ -125,7 +125,7 @@ def test_categorical_ames_housing(
         strategies.integers(3, 8),
         strategies.integers(4, 7)
     )
-    @settings(deadline=None, print_blob=True)
+    @settings(deadline=None, max_examples=20, print_blob=True)
     @pytest.mark.skipif(**tm.no_pandas())
     def test_categorical_missing(self, rows, cols, cats):
         self.cputest.run_categorical_missing(rows, cols, cats, "gpu_hist")
@@ -149,7 +149,7 @@ def test_invalid_category(self):
     @pytest.mark.skipif(**tm.no_cupy())
     @given(parameter_strategy, strategies.integers(1, 20),
            tm.dataset_strategy)
-    @settings(deadline=None, print_blob=True)
+    @settings(deadline=None, max_examples=20, print_blob=True)
     def test_gpu_hist_device_dmatrix(self, param, num_rounds, dataset):
         # We cannot handle empty dataset yet
         assume(len(dataset.y) > 0)
@@ -159,9 +159,9 @@ def test_gpu_hist_device_dmatrix(self, param, num_rounds, dataset):
         note(result)
         assert tm.non_increasing(result['train'][dataset.metric], tolerance=1e-3)
 
-    @given(parameter_strategy, strategies.integers(1, 20),
+    @given(parameter_strategy, strategies.integers(1, 3),
            tm.dataset_strategy)
-    @settings(deadline=None, print_blob=True)
+    @settings(deadline=None, max_examples=10, print_blob=True)
     def test_external_memory(self, param, num_rounds, dataset):
         if dataset.name.endswith("-l1"):
             return
@@ -172,7 +172,6 @@ def test_external_memory(self, param, num_rounds, dataset):
         m = dataset.get_external_dmat()
         external_result = train_result(param, m, num_rounds)
         del m
-        gc.collect()
         assert tm.non_increasing(external_result['train'][dataset.metric])
 
     def test_empty_dmatrix_prediction(self):
diff --git a/tests/python-gpu/test_gpu_with_sklearn.py b/tests/python-gpu/test_gpu_with_sklearn.py
@@ -61,7 +61,7 @@ def test_boost_from_prediction_gpu_hist():
 
 
 def test_num_parallel_tree():
-    twskl.run_calif_housing_rf_regression("gpu_hist")
+    twskl.run_housing_rf_regression("gpu_hist")
 
 
 @pytest.mark.skipif(**tm.no_pandas())
diff --git a/tests/python/test_data_iterator.py b/tests/python/test_data_iterator.py
@@ -6,6 +6,7 @@
 from hypothesis import given, strategies, settings
 from scipy.sparse import csr_matrix
 
+pytestmark = pytest.mark.timeout(30)
 
 def test_single_batch(tree_method: str = "approx") -> None:
     from sklearn.datasets import load_breast_cancer
@@ -134,7 +135,7 @@ def run_data_iterator(
     strategies.integers(0, 13),
     strategies.booleans(),
 )
-@settings(deadline=None, print_blob=True)
+@settings(deadline=None, max_examples=10, print_blob=True)
 def test_data_iterator(
     n_samples_per_batch: int,
     n_features: int,
diff --git a/tests/python/test_demos.py b/tests/python/test_demos.py
@@ -4,6 +4,7 @@
 import testing as tm
 import sys
 
+pytestmark = pytest.mark.timeout(30)
 
 ROOT_DIR = tm.PROJECT_ROOT
 DEMO_DIR = os.path.join(ROOT_DIR, 'demo')
diff --git a/tests/python/test_linear.py b/tests/python/test_linear.py
@@ -1,7 +1,10 @@
 import testing as tm
+import pytest
 from hypothesis import strategies, given, settings, note
 import xgboost as xgb
 
+pytestmark = pytest.mark.timeout(10)
+
 parameter_strategy = strategies.fixed_dictionaries({
     'booster': strategies.just('gblinear'),
     'eta': strategies.floats(0.01, 0.25),
@@ -26,7 +29,7 @@ def train_result(param, dmat, num_rounds):
 class TestLinear:
     @given(parameter_strategy, strategies.integers(10, 50),
            tm.dataset_strategy, coord_strategy)
-    @settings(deadline=None, print_blob=True)
+    @settings(deadline=None, max_examples=20, print_blob=True)
     def test_coordinate(self, param, num_rounds, dataset, coord_param):
         param['updater'] = 'coord_descent'
         param.update(coord_param)
@@ -46,7 +49,7 @@ def test_coordinate(self, param, num_rounds, dataset, coord_param):
         strategies.floats(1e-5, 0.8),
         strategies.floats(1e-5, 0.8)
     )
-    @settings(deadline=None, print_blob=True)
+    @settings(deadline=None, max_examples=20, print_blob=True)
     def test_coordinate_regularised(self, param, num_rounds, dataset, coord_param, alpha, lambd):
         param['updater'] = 'coord_descent'
         param['alpha'] = alpha
@@ -59,7 +62,7 @@ def test_coordinate_regularised(self, param, num_rounds, dataset, coord_param, a
 
     @given(parameter_strategy, strategies.integers(10, 50),
            tm.dataset_strategy)
-    @settings(deadline=None, print_blob=True)
+    @settings(deadline=None, max_examples=20, print_blob=True)
     def test_shotgun(self, param, num_rounds, dataset):
         param['updater'] = 'shotgun'
         param = dataset.set_params(param)
@@ -76,7 +79,7 @@ def test_shotgun(self, param, num_rounds, dataset):
     @given(parameter_strategy, strategies.integers(10, 50),
            tm.dataset_strategy, strategies.floats(1e-5, 1.0),
            strategies.floats(1e-5, 1.0))
-    @settings(deadline=None, print_blob=True)
+    @settings(deadline=None, max_examples=20, print_blob=True)
     def test_shotgun_regularised(self, param, num_rounds, dataset, alpha, lambd):
         param['updater'] = 'shotgun'
         param['alpha'] = alpha
diff --git a/tests/python/test_openmp.py b/tests/python/test_openmp.py
diff --git a/tests/python/test_spark/test_spark_local.py b/tests/python/test_spark/test_spark_local.py
diff --git a/tests/python/test_with_dask.py b/tests/python/test_with_dask.py
diff --git a/tests/python/test_with_modin.py b/tests/python/test_with_modin.py
diff --git a/tests/python/test_with_sklearn.py b/tests/python/test_with_sklearn.py