diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml
new file mode 100644
index 000000000..a94623f1c
--- /dev/null
+++ b/.github/workflows/pre-commit.yml
@@ -0,0 +1,16 @@
+name: Linting
+
+on:
+  push:
+    branches: main
+  pull_request:
+    branches: main
+
+jobs:
+  checks:
+    name: "pre-commit hooks"
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+      - uses: actions/setup-python@v2
+      - uses: pre-commit/action@v2.0.0
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index d109a10db..5f8fe5463 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,16 +1,23 @@
 repos:
 -   repo: https://github.com/python/black
-    rev: 19.10b0
+    rev: 20.8b1
     hooks:
     - id: black
       language_version: python3
 -   repo: https://gitlab.com/pycqa/flake8
-    rev: 3.7.9
+    rev: 3.8.3
     hooks:
     - id: flake8
       language_version: python3
 -   repo: https://github.com/timothycrosley/isort
-    rev: 4.3.21
+    rev: 5.8.0
     hooks:
     - id: isort
       language_version: python3
+-   repo: https://github.com/pre-commit/mirrors-mypy
+    rev: v0.812
+    hooks:
+    - id: mypy
+      language_version: python3
+      entry: bash -c 'mypy dask_ml/{metrics,preprocessing}'
+
diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 2a2b90ddc..faf4f9f9e 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -12,7 +12,7 @@ RET=$(($RET + $?)) ; echo $MSG "DONE"
 
 MSG='Checking isort... ' ; echo $MSG
 isort --version-number
-isort --recursive --check-only .
+isort --check-only .
 RET=$(($RET + $?)) ; echo $MSG "DONE"
 
 MSG='Checking mypy... ' ; echo $MSG
diff --git a/ci/environment-3.6.yaml b/ci/environment-3.6.yaml
index 57d46f934..1fbd35253 100644
--- a/ci/environment-3.6.yaml
+++ b/ci/environment-3.6.yaml
@@ -3,16 +3,16 @@ channels:
   - conda-forge
   - defaults
 dependencies:
-  - black==19.10b0
+  - black ==20.8b1
   - coverage
   - dask ==2.4.0
   - dask-glm >=0.2.0
   - distributed ==2.4.0
-  - flake8
-  - isort==4.3.21
+  - flake8 ==3.8.3
+  - isort ==5.8.0
   - msgpack-python ==0.6.2
   - multipledispatch
-  - mypy
+  - mypy ==0.812
   - numba
   - numpy ==1.17.3
   - numpydoc
diff --git a/ci/environment-3.7.yaml b/ci/environment-3.7.yaml
index 2ab6922ac..6dc240d54 100644
--- a/ci/environment-3.7.yaml
+++ b/ci/environment-3.7.yaml
@@ -3,7 +3,7 @@ channels:
   - conda-forge
   - defaults
 dependencies:
-  - black==19.10b0
+  - black ==20.8b1
   - coverage
   - codecov
   # dask 2021.3.0 introduced a regression which causes tests to fail.
@@ -12,10 +12,10 @@ dependencies:
   # to allow CI to pass
   - dask !=2021.3.0
   - dask-glm >=0.2.0
-  - flake8
-  - isort==4.3.21
+  - flake8 ==3.8.3
+  - isort ==5.8.0
   - multipledispatch >=0.4.9
-  - mypy
+  - mypy ==0.812
   - numba
   - numpy >=1.16.3
   - numpydoc
diff --git a/ci/environment-3.8.yaml b/ci/environment-3.8.yaml
index c48399c7d..1acd63c29 100644
--- a/ci/environment-3.8.yaml
+++ b/ci/environment-3.8.yaml
@@ -3,7 +3,7 @@ channels:
   - conda-forge
   - defaults
 dependencies:
-  - black==19.10b0
+  - black ==20.8b1
   - coverage
   - codecov
   # dask 2021.3.0 introduced a regression which causes tests to fail.
@@ -12,10 +12,10 @@ dependencies:
   # to allow CI to pass
   - dask !=2021.3.0
   - dask-glm >=0.2.0
-  - flake8
-  - isort==4.3.21
+  - flake8 ==3.8.3
+  - isort ==5.8.0
   - multipledispatch >=0.4.9
-  - mypy
+  - mypy ==0.812
   - numba
   - numpy >=1.16.3
   - numpydoc
diff --git a/ci/environment-docs.yaml b/ci/environment-docs.yaml
index 4efabafa6..f4e368191 100644
--- a/ci/environment-docs.yaml
+++ b/ci/environment-docs.yaml
@@ -3,16 +3,16 @@ channels:
   - conda-forge
   - defaults
 dependencies:
-  - black
+  - black==20.8b1
   - coverage
-  - flake8
+  - flake8==3.8.3
   - graphviz
   - heapdict
   - ipykernel
   - ipython
-  - isort==4.3.21
+  - isort==5.8.0
   - multipledispatch
-  - mypy
+  - mypy==0.812
   - nbsphinx
   - nomkl
   - nose
diff --git a/ci/windows.yaml b/ci/windows.yaml
index abd449453..7a842b13c 100644
--- a/ci/windows.yaml
+++ b/ci/windows.yaml
@@ -37,12 +37,13 @@ jobs:
       black --check .
 
       echo "[isort]"
-      isort --recursive --check-only .
+      isort --check-only .
 
       echo "[codecov]"
       codecov
 
       echo "[mypy]"
       mypy dask_ml/metrics
+      mypy dask_ml/preprocessing
 
     displayName: "Lint"
diff --git a/dask_ml/_compat.py b/dask_ml/_compat.py
index c99d625be..b983260dd 100644
--- a/dask_ml/_compat.py
+++ b/dask_ml/_compat.py
@@ -46,8 +46,8 @@ def dummy_context(*args: Any, **kwargs: Any):
 def _check_multimetric_scoring(estimator, scoring=None):
     # TODO: See if scikit-learn 0.24 solves the need for using
     # a private method
-    from sklearn.metrics._scorer import _check_multimetric_scoring
     from sklearn.metrics import check_scoring
+    from sklearn.metrics._scorer import _check_multimetric_scoring
 
     if SK_024:
         if callable(scoring) or isinstance(scoring, (type(None), str)):
diff --git a/dask_ml/model_selection/utils_test.py b/dask_ml/model_selection/utils_test.py
index 695203c0a..ab7b0ca63 100644
--- a/dask_ml/model_selection/utils_test.py
+++ b/dask_ml/model_selection/utils_test.py
@@ -175,9 +175,12 @@ def fit(self, X, y, **fit_params):
                 len(missing) == 0
             ), "Expected fit parameter(s) %s not " "seen." % list(missing)
             for key, value in fit_params.items():
-                assert len(value) == len(X), (
-                    "Fit parameter %s has length"
-                    "%d; expected %d." % (key, len(value), len(X),)
+                assert len(value) == len(
+                    X
+                ), "Fit parameter %s has length" "%d; expected %d." % (
+                    key,
+                    len(value),
+                    len(X),
                 )
         return self
 
diff --git a/docs/dimensions.py b/docs/dimensions.py
index 7ff76d932..216f19c16 100644
--- a/docs/dimensions.py
+++ b/docs/dimensions.py
@@ -1,6 +1,5 @@
-import numpy as np
-
 import matplotlib.pyplot as plt
+import numpy as np
 
 
 def draw_brace(ax, xspan, text):
diff --git a/docs/source/conf.py b/docs/source/conf.py
index e0bc15acf..da49dd28e 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -13,13 +13,14 @@
 # All configuration values have a default; values that are commented out
 # serve to show the default.
 
+import dask_sphinx_theme
+
 # If extensions (or modules to document with autodoc) are in another directory,
 # add these directories to sys.path here. If the directory is relative to the
 # documentation root, use os.path.abspath to make it absolute, like shown here.
 #
 import packaging.version
 
-import dask_sphinx_theme
 from dask_ml import __version__ as version
 
 # import sys
diff --git a/docs/source/contributing.rst b/docs/source/contributing.rst
index 121b8f5dc..e46abc617 100644
--- a/docs/source/contributing.rst
+++ b/docs/source/contributing.rst
@@ -67,7 +67,7 @@ installed.
 
     black .
     flake8
-    isort -rc dask_ml tests
+    isort .
 
 You may wish to setup a
 `pre-commit hook <https://black.readthedocs.io/en/stable/version_control_integration.html>`_
diff --git a/setup.cfg b/setup.cfg
index e55bcc711..473d7cba8 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -19,8 +19,7 @@ include_trailing_comma=True
 force_grid_wrap=0
 combine_as_imports=True
 line_length=88
-skip=
-    docs/source/conf.py
+profile=black
 
 [coverage:run]
 source=dask_ml
diff --git a/tests/ensemble/test_blockwise.py b/tests/ensemble/test_blockwise.py
index e6a8ece01..b24a0a0ea 100644
--- a/tests/ensemble/test_blockwise.py
+++ b/tests/ensemble/test_blockwise.py
@@ -13,7 +13,8 @@ class TestBlockwiseVotingClassifier:
     def test_hard_voting_array(self):
         X, y = dask_ml.datasets.make_classification(chunks=25)
         clf = dask_ml.ensemble.BlockwiseVotingClassifier(
-            sklearn.linear_model.LogisticRegression(solver="lbfgs"), classes=[0, 1],
+            sklearn.linear_model.LogisticRegression(solver="lbfgs"),
+            classes=[0, 1],
         )
         clf.fit(X, y)
         assert len(clf.estimators_) == 4
@@ -51,7 +52,8 @@ def test_bad_chunking_raises(self):
         X = da.ones((10, 5), chunks=3)
         y = da.ones(10, chunks=3)
         clf = dask_ml.ensemble.BlockwiseVotingClassifier(
-            sklearn.linear_model.LogisticRegression(solver="lbfgs"), classes=[0, 1],
+            sklearn.linear_model.LogisticRegression(solver="lbfgs"),
+            classes=[0, 1],
         )
 
         with pytest.raises(TypeError):
@@ -64,7 +66,8 @@ def test_hard_voting_frame(self):
         y = dd.from_dask_array(y)
 
         clf = dask_ml.ensemble.BlockwiseVotingClassifier(
-            sklearn.linear_model.LogisticRegression(solver="lbfgs"), classes=[0, 1],
+            sklearn.linear_model.LogisticRegression(solver="lbfgs"),
+            classes=[0, 1],
         )
         clf.fit(X, y)
         assert len(clf.estimators_) == 4
diff --git a/tests/linear_model/test_glm.py b/tests/linear_model/test_glm.py
index 029754d8f..1ee5be64b 100644
--- a/tests/linear_model/test_glm.py
+++ b/tests/linear_model/test_glm.py
@@ -63,9 +63,10 @@ def test_fit(fit_intercept, solver):
     "solver", ["admm", "newton", "lbfgs", "proximal_grad", "gradient_descent"]
 )
 def test_fit_solver(solver):
-    import dask_glm
     from distutils.version import LooseVersion
 
+    import dask_glm
+
     if LooseVersion(dask_glm.__version__) <= "0.2.0":
         pytest.skip("FutureWarning for dask config.")
 
diff --git a/tests/model_selection/dask_searchcv/test_model_selection.py b/tests/model_selection/dask_searchcv/test_model_selection.py
index e0a203932..bfa245a95 100644
--- a/tests/model_selection/dask_searchcv/test_model_selection.py
+++ b/tests/model_selection/dask_searchcv/test_model_selection.py
@@ -445,7 +445,12 @@ def test_pipeline_sub_estimators():
         },
     ]
 
-    gs = GridSearchCV(pipe, param_grid=param_grid, return_train_score=True, cv=3,)
+    gs = GridSearchCV(
+        pipe,
+        param_grid=param_grid,
+        return_train_score=True,
+        cv=3,
+    )
     gs.fit(X, y)
     dgs = dcv.GridSearchCV(
         pipe, param_grid=param_grid, scheduler="sync", return_train_score=True, cv=3
@@ -946,7 +951,11 @@ def test_gridsearch_with_arraylike_fit_param(cache_cv):
     param_grid = {"foo_param": [0.0001, 0.1]}
 
     a = dcv.GridSearchCV(
-        MockClassifierWithFitParam(), param_grid, cv=3, refit=False, cache_cv=cache_cv,
+        MockClassifierWithFitParam(),
+        param_grid,
+        cv=3,
+        refit=False,
+        cache_cv=cache_cv,
     )
     b = GridSearchCV(MockClassifierWithFitParam(), param_grid, cv=3, refit=False)
 
diff --git a/tests/model_selection/dask_searchcv/test_model_selection_sklearn.py b/tests/model_selection/dask_searchcv/test_model_selection_sklearn.py
index 59a6b6bfb..5212d858c 100644
--- a/tests/model_selection/dask_searchcv/test_model_selection_sklearn.py
+++ b/tests/model_selection/dask_searchcv/test_model_selection_sklearn.py
@@ -561,7 +561,7 @@ def test_pandas_input():
     # check cross_val_score doesn't destroy pandas dataframe
     types = [(MockDataFrame, MockDataFrame)]
     try:
-        from pandas import Series, DataFrame
+        from pandas import DataFrame, Series
 
         types.append((DataFrame, Series))
     except ImportError:
diff --git a/tests/model_selection/test_hyperband.py b/tests/model_selection/test_hyperband.py
index c23e3438d..6ade693a2 100644
--- a/tests/model_selection/test_hyperband.py
+++ b/tests/model_selection/test_hyperband.py
@@ -283,15 +283,18 @@ def test_correct_params(c, s, a, b):
     SHAs_params = [
         bracket["SuccessiveHalvingSearchCV params"] for bracket in meta["brackets"]
     ]
-    SHA_params = base.union(
-        {
-            "n_initial_parameters",
-            "n_initial_iter",
-            "aggressiveness",
-            "max_iter",
-            "prefix",
-        }
-    ) - {"estimator__sleep", "estimator__value", "estimator", "parameters"}
+    SHA_params = (
+        base.union(
+            {
+                "n_initial_parameters",
+                "n_initial_iter",
+                "aggressiveness",
+                "max_iter",
+                "prefix",
+            }
+        )
+        - {"estimator__sleep", "estimator__value", "estimator", "parameters"}
+    )
 
     assert all(set(SHA) == SHA_params for SHA in SHAs_params)
 
diff --git a/tests/model_selection/test_incremental.py b/tests/model_selection/test_incremental.py
index 207065cf5..17e3aa290 100644
--- a/tests/model_selection/test_incremental.py
+++ b/tests/model_selection/test_incremental.py
@@ -350,7 +350,12 @@ def score(self, *args, **kwargs):
     model = ConstantClassifier()
 
     search = IncrementalSearchCV(
-        model, params, n_initial_parameters=10, patience=5, tol=0, max_iter=10,
+        model,
+        params,
+        n_initial_parameters=10,
+        patience=5,
+        tol=0,
+        max_iter=10,
     )
     yield search.fit(X, y, classes=[0, 1])
 
@@ -770,7 +775,11 @@ def test_search_patience_infeasible_tol(c, s, a, b):
     max_iter = 10
     score_increase = -10
     search = IncrementalSearchCV(
-        model, params, max_iter=max_iter, patience=3, tol=score_increase,
+        model,
+        params,
+        max_iter=max_iter,
+        patience=3,
+        tol=score_increase,
     )
     yield search.fit(X, y, classes=[0, 1])
 
diff --git a/tests/model_selection/test_keras.py b/tests/model_selection/test_keras.py
index 7ea61b4ab..c81101223 100644
--- a/tests/model_selection/test_keras.py
+++ b/tests/model_selection/test_keras.py
@@ -51,7 +51,10 @@ def test_keras(c, s, a, b):
     assert y.dtype == np.dtype("int64")
 
     model = KerasClassifier(
-        model=_keras_build_fn, lr=0.01, verbose=False, loss="categorical_crossentropy",
+        model=_keras_build_fn,
+        lr=0.01,
+        verbose=False,
+        loss="categorical_crossentropy",
     )
     params = {"lr": loguniform(1e-3, 1e-1)}
 
diff --git a/tests/test_incremental_pca.py b/tests/test_incremental_pca.py
index 694a9bd1e..5890386ed 100644
--- a/tests/test_incremental_pca.py
+++ b/tests/test_incremental_pca.py
@@ -9,13 +9,17 @@
 from dask_ml.utils import flip_vector_signs
 
 try:
-    from sklearn.utils._testing import assert_almost_equal
-    from sklearn.utils._testing import assert_array_almost_equal
-    from sklearn.utils._testing import assert_allclose_dense_sparse
+    from sklearn.utils._testing import (
+        assert_allclose_dense_sparse,
+        assert_almost_equal,
+        assert_array_almost_equal,
+    )
 except ImportError:
-    from sklearn.utils.testing import assert_almost_equal
-    from sklearn.utils.testing import assert_array_almost_equal
-    from sklearn.utils.testing import assert_allclose_dense_sparse
+    from sklearn.utils.testing import (
+        assert_allclose_dense_sparse,
+        assert_almost_equal,
+        assert_array_almost_equal,
+    )
 
 
 iris = datasets.load_iris()
diff --git a/tests/test_spectral_clustering.py b/tests/test_spectral_clustering.py
index e07c5c38c..e60477e91 100644
--- a/tests/test_spectral_clustering.py
+++ b/tests/test_spectral_clustering.py
@@ -103,9 +103,10 @@ def test_spectral_clustering(Xl_blobs_easy):
 
 @pytest.mark.parametrize("keep", [[4, 7], [4, 5], [0, 3], [1, 9], [0, 1, 5, 8, 9]])
 def test_slice_mostly_sorted(keep):
-    import numpy as np
     import dask.array as da
+    import numpy as np
     from dask.array.utils import assert_eq
+
     from dask_ml.cluster.spectral import _slice_mostly_sorted
 
     X = np.arange(10).reshape(-1, 1)