diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml new file mode 100644 index 000000000..a94623f1c --- /dev/null +++ b/.github/workflows/pre-commit.yml @@ -0,0 +1,16 @@ +name: Linting + +on: + push: + branches: main + pull_request: + branches: main + +jobs: + checks: + name: "pre-commit hooks" + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: actions/setup-python@v2 + - uses: pre-commit/action@v2.0.0 diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index d109a10db..5f8fe5463 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,16 +1,23 @@ repos: - repo: https://github.com/python/black - rev: 19.10b0 + rev: 20.8b1 hooks: - id: black language_version: python3 - repo: https://gitlab.com/pycqa/flake8 - rev: 3.7.9 + rev: 3.8.3 hooks: - id: flake8 language_version: python3 - repo: https://github.com/timothycrosley/isort - rev: 4.3.21 + rev: 5.8.0 hooks: - id: isort language_version: python3 +- repo: https://github.com/pre-commit/mirrors-mypy + rev: v0.812 + hooks: + - id: mypy + language_version: python3 + entry: bash -c 'mypy dask_ml/{metrics,preprocessing}' + diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 2a2b90ddc..faf4f9f9e 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -12,7 +12,7 @@ RET=$(($RET + $?)) ; echo $MSG "DONE" MSG='Checking isort... ' ; echo $MSG isort --version-number -isort --recursive --check-only . +isort --check-only . RET=$(($RET + $?)) ; echo $MSG "DONE" MSG='Checking mypy... ' ; echo $MSG diff --git a/ci/environment-3.6.yaml b/ci/environment-3.6.yaml index 57d46f934..1fbd35253 100644 --- a/ci/environment-3.6.yaml +++ b/ci/environment-3.6.yaml @@ -3,16 +3,16 @@ channels: - conda-forge - defaults dependencies: - - black==19.10b0 + - black ==20.8b1 - coverage - dask ==2.4.0 - dask-glm >=0.2.0 - distributed ==2.4.0 - - flake8 - - isort==4.3.21 + - flake8 ==3.8.3 + - isort ==5.8.0 - msgpack-python ==0.6.2 - multipledispatch - - mypy + - mypy ==0.812 - numba - numpy ==1.17.3 - numpydoc diff --git a/ci/environment-3.7.yaml b/ci/environment-3.7.yaml index 2ab6922ac..6dc240d54 100644 --- a/ci/environment-3.7.yaml +++ b/ci/environment-3.7.yaml @@ -3,7 +3,7 @@ channels: - conda-forge - defaults dependencies: - - black==19.10b0 + - black ==20.8b1 - coverage - codecov # dask 2021.3.0 introduced a regression which causes tests to fail. @@ -12,10 +12,10 @@ dependencies: # to allow CI to pass - dask !=2021.3.0 - dask-glm >=0.2.0 - - flake8 - - isort==4.3.21 + - flake8 ==3.8.3 + - isort ==5.8.0 - multipledispatch >=0.4.9 - - mypy + - mypy ==0.812 - numba - numpy >=1.16.3 - numpydoc diff --git a/ci/environment-3.8.yaml b/ci/environment-3.8.yaml index c48399c7d..1acd63c29 100644 --- a/ci/environment-3.8.yaml +++ b/ci/environment-3.8.yaml @@ -3,7 +3,7 @@ channels: - conda-forge - defaults dependencies: - - black==19.10b0 + - black ==20.8b1 - coverage - codecov # dask 2021.3.0 introduced a regression which causes tests to fail. @@ -12,10 +12,10 @@ dependencies: # to allow CI to pass - dask !=2021.3.0 - dask-glm >=0.2.0 - - flake8 - - isort==4.3.21 + - flake8 ==3.8.3 + - isort ==5.8.0 - multipledispatch >=0.4.9 - - mypy + - mypy ==0.812 - numba - numpy >=1.16.3 - numpydoc diff --git a/ci/environment-docs.yaml b/ci/environment-docs.yaml index 4efabafa6..f4e368191 100644 --- a/ci/environment-docs.yaml +++ b/ci/environment-docs.yaml @@ -3,16 +3,16 @@ channels: - conda-forge - defaults dependencies: - - black + - black==20.8b1 - coverage - - flake8 + - flake8==3.8.3 - graphviz - heapdict - ipykernel - ipython - - isort==4.3.21 + - isort==5.8.0 - multipledispatch - - mypy + - mypy==0.812 - nbsphinx - nomkl - nose diff --git a/ci/windows.yaml b/ci/windows.yaml index abd449453..7a842b13c 100644 --- a/ci/windows.yaml +++ b/ci/windows.yaml @@ -37,12 +37,13 @@ jobs: black --check . echo "[isort]" - isort --recursive --check-only . + isort --check-only . echo "[codecov]" codecov echo "[mypy]" mypy dask_ml/metrics + mypy dask_ml/preprocessing displayName: "Lint" diff --git a/dask_ml/_compat.py b/dask_ml/_compat.py index c99d625be..b983260dd 100644 --- a/dask_ml/_compat.py +++ b/dask_ml/_compat.py @@ -46,8 +46,8 @@ def dummy_context(*args: Any, **kwargs: Any): def _check_multimetric_scoring(estimator, scoring=None): # TODO: See if scikit-learn 0.24 solves the need for using # a private method - from sklearn.metrics._scorer import _check_multimetric_scoring from sklearn.metrics import check_scoring + from sklearn.metrics._scorer import _check_multimetric_scoring if SK_024: if callable(scoring) or isinstance(scoring, (type(None), str)): diff --git a/dask_ml/model_selection/utils_test.py b/dask_ml/model_selection/utils_test.py index 695203c0a..ab7b0ca63 100644 --- a/dask_ml/model_selection/utils_test.py +++ b/dask_ml/model_selection/utils_test.py @@ -175,9 +175,12 @@ def fit(self, X, y, **fit_params): len(missing) == 0 ), "Expected fit parameter(s) %s not " "seen." % list(missing) for key, value in fit_params.items(): - assert len(value) == len(X), ( - "Fit parameter %s has length" - "%d; expected %d." % (key, len(value), len(X),) + assert len(value) == len( + X + ), "Fit parameter %s has length" "%d; expected %d." % ( + key, + len(value), + len(X), ) return self diff --git a/docs/dimensions.py b/docs/dimensions.py index 7ff76d932..216f19c16 100644 --- a/docs/dimensions.py +++ b/docs/dimensions.py @@ -1,6 +1,5 @@ -import numpy as np - import matplotlib.pyplot as plt +import numpy as np def draw_brace(ax, xspan, text): diff --git a/docs/source/conf.py b/docs/source/conf.py index e0bc15acf..da49dd28e 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -13,13 +13,14 @@ # All configuration values have a default; values that are commented out # serve to show the default. +import dask_sphinx_theme + # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. # import packaging.version -import dask_sphinx_theme from dask_ml import __version__ as version # import sys diff --git a/docs/source/contributing.rst b/docs/source/contributing.rst index 121b8f5dc..e46abc617 100644 --- a/docs/source/contributing.rst +++ b/docs/source/contributing.rst @@ -67,7 +67,7 @@ installed. black . flake8 - isort -rc dask_ml tests + isort . You may wish to setup a `pre-commit hook `_ diff --git a/setup.cfg b/setup.cfg index e55bcc711..473d7cba8 100644 --- a/setup.cfg +++ b/setup.cfg @@ -19,8 +19,7 @@ include_trailing_comma=True force_grid_wrap=0 combine_as_imports=True line_length=88 -skip= - docs/source/conf.py +profile=black [coverage:run] source=dask_ml diff --git a/tests/ensemble/test_blockwise.py b/tests/ensemble/test_blockwise.py index e6a8ece01..b24a0a0ea 100644 --- a/tests/ensemble/test_blockwise.py +++ b/tests/ensemble/test_blockwise.py @@ -13,7 +13,8 @@ class TestBlockwiseVotingClassifier: def test_hard_voting_array(self): X, y = dask_ml.datasets.make_classification(chunks=25) clf = dask_ml.ensemble.BlockwiseVotingClassifier( - sklearn.linear_model.LogisticRegression(solver="lbfgs"), classes=[0, 1], + sklearn.linear_model.LogisticRegression(solver="lbfgs"), + classes=[0, 1], ) clf.fit(X, y) assert len(clf.estimators_) == 4 @@ -51,7 +52,8 @@ def test_bad_chunking_raises(self): X = da.ones((10, 5), chunks=3) y = da.ones(10, chunks=3) clf = dask_ml.ensemble.BlockwiseVotingClassifier( - sklearn.linear_model.LogisticRegression(solver="lbfgs"), classes=[0, 1], + sklearn.linear_model.LogisticRegression(solver="lbfgs"), + classes=[0, 1], ) with pytest.raises(TypeError): @@ -64,7 +66,8 @@ def test_hard_voting_frame(self): y = dd.from_dask_array(y) clf = dask_ml.ensemble.BlockwiseVotingClassifier( - sklearn.linear_model.LogisticRegression(solver="lbfgs"), classes=[0, 1], + sklearn.linear_model.LogisticRegression(solver="lbfgs"), + classes=[0, 1], ) clf.fit(X, y) assert len(clf.estimators_) == 4 diff --git a/tests/linear_model/test_glm.py b/tests/linear_model/test_glm.py index 029754d8f..1ee5be64b 100644 --- a/tests/linear_model/test_glm.py +++ b/tests/linear_model/test_glm.py @@ -63,9 +63,10 @@ def test_fit(fit_intercept, solver): "solver", ["admm", "newton", "lbfgs", "proximal_grad", "gradient_descent"] ) def test_fit_solver(solver): - import dask_glm from distutils.version import LooseVersion + import dask_glm + if LooseVersion(dask_glm.__version__) <= "0.2.0": pytest.skip("FutureWarning for dask config.") diff --git a/tests/model_selection/dask_searchcv/test_model_selection.py b/tests/model_selection/dask_searchcv/test_model_selection.py index e0a203932..bfa245a95 100644 --- a/tests/model_selection/dask_searchcv/test_model_selection.py +++ b/tests/model_selection/dask_searchcv/test_model_selection.py @@ -445,7 +445,12 @@ def test_pipeline_sub_estimators(): }, ] - gs = GridSearchCV(pipe, param_grid=param_grid, return_train_score=True, cv=3,) + gs = GridSearchCV( + pipe, + param_grid=param_grid, + return_train_score=True, + cv=3, + ) gs.fit(X, y) dgs = dcv.GridSearchCV( pipe, param_grid=param_grid, scheduler="sync", return_train_score=True, cv=3 @@ -946,7 +951,11 @@ def test_gridsearch_with_arraylike_fit_param(cache_cv): param_grid = {"foo_param": [0.0001, 0.1]} a = dcv.GridSearchCV( - MockClassifierWithFitParam(), param_grid, cv=3, refit=False, cache_cv=cache_cv, + MockClassifierWithFitParam(), + param_grid, + cv=3, + refit=False, + cache_cv=cache_cv, ) b = GridSearchCV(MockClassifierWithFitParam(), param_grid, cv=3, refit=False) diff --git a/tests/model_selection/dask_searchcv/test_model_selection_sklearn.py b/tests/model_selection/dask_searchcv/test_model_selection_sklearn.py index 59a6b6bfb..5212d858c 100644 --- a/tests/model_selection/dask_searchcv/test_model_selection_sklearn.py +++ b/tests/model_selection/dask_searchcv/test_model_selection_sklearn.py @@ -561,7 +561,7 @@ def test_pandas_input(): # check cross_val_score doesn't destroy pandas dataframe types = [(MockDataFrame, MockDataFrame)] try: - from pandas import Series, DataFrame + from pandas import DataFrame, Series types.append((DataFrame, Series)) except ImportError: diff --git a/tests/model_selection/test_hyperband.py b/tests/model_selection/test_hyperband.py index c23e3438d..6ade693a2 100644 --- a/tests/model_selection/test_hyperband.py +++ b/tests/model_selection/test_hyperband.py @@ -283,15 +283,18 @@ def test_correct_params(c, s, a, b): SHAs_params = [ bracket["SuccessiveHalvingSearchCV params"] for bracket in meta["brackets"] ] - SHA_params = base.union( - { - "n_initial_parameters", - "n_initial_iter", - "aggressiveness", - "max_iter", - "prefix", - } - ) - {"estimator__sleep", "estimator__value", "estimator", "parameters"} + SHA_params = ( + base.union( + { + "n_initial_parameters", + "n_initial_iter", + "aggressiveness", + "max_iter", + "prefix", + } + ) + - {"estimator__sleep", "estimator__value", "estimator", "parameters"} + ) assert all(set(SHA) == SHA_params for SHA in SHAs_params) diff --git a/tests/model_selection/test_incremental.py b/tests/model_selection/test_incremental.py index 207065cf5..17e3aa290 100644 --- a/tests/model_selection/test_incremental.py +++ b/tests/model_selection/test_incremental.py @@ -350,7 +350,12 @@ def score(self, *args, **kwargs): model = ConstantClassifier() search = IncrementalSearchCV( - model, params, n_initial_parameters=10, patience=5, tol=0, max_iter=10, + model, + params, + n_initial_parameters=10, + patience=5, + tol=0, + max_iter=10, ) yield search.fit(X, y, classes=[0, 1]) @@ -770,7 +775,11 @@ def test_search_patience_infeasible_tol(c, s, a, b): max_iter = 10 score_increase = -10 search = IncrementalSearchCV( - model, params, max_iter=max_iter, patience=3, tol=score_increase, + model, + params, + max_iter=max_iter, + patience=3, + tol=score_increase, ) yield search.fit(X, y, classes=[0, 1]) diff --git a/tests/model_selection/test_keras.py b/tests/model_selection/test_keras.py index 7ea61b4ab..c81101223 100644 --- a/tests/model_selection/test_keras.py +++ b/tests/model_selection/test_keras.py @@ -51,7 +51,10 @@ def test_keras(c, s, a, b): assert y.dtype == np.dtype("int64") model = KerasClassifier( - model=_keras_build_fn, lr=0.01, verbose=False, loss="categorical_crossentropy", + model=_keras_build_fn, + lr=0.01, + verbose=False, + loss="categorical_crossentropy", ) params = {"lr": loguniform(1e-3, 1e-1)} diff --git a/tests/test_incremental_pca.py b/tests/test_incremental_pca.py index 694a9bd1e..5890386ed 100644 --- a/tests/test_incremental_pca.py +++ b/tests/test_incremental_pca.py @@ -9,13 +9,17 @@ from dask_ml.utils import flip_vector_signs try: - from sklearn.utils._testing import assert_almost_equal - from sklearn.utils._testing import assert_array_almost_equal - from sklearn.utils._testing import assert_allclose_dense_sparse + from sklearn.utils._testing import ( + assert_allclose_dense_sparse, + assert_almost_equal, + assert_array_almost_equal, + ) except ImportError: - from sklearn.utils.testing import assert_almost_equal - from sklearn.utils.testing import assert_array_almost_equal - from sklearn.utils.testing import assert_allclose_dense_sparse + from sklearn.utils.testing import ( + assert_allclose_dense_sparse, + assert_almost_equal, + assert_array_almost_equal, + ) iris = datasets.load_iris() diff --git a/tests/test_spectral_clustering.py b/tests/test_spectral_clustering.py index e07c5c38c..e60477e91 100644 --- a/tests/test_spectral_clustering.py +++ b/tests/test_spectral_clustering.py @@ -103,9 +103,10 @@ def test_spectral_clustering(Xl_blobs_easy): @pytest.mark.parametrize("keep", [[4, 7], [4, 5], [0, 3], [1, 9], [0, 1, 5, 8, 9]]) def test_slice_mostly_sorted(keep): - import numpy as np import dask.array as da + import numpy as np from dask.array.utils import assert_eq + from dask_ml.cluster.spectral import _slice_mostly_sorted X = np.arange(10).reshape(-1, 1)