DOC, TST: Wrapping of PyTorch models (#699)

stsievert · web-flow · commit 5c3179eb7eaa · 2020-07-28T20:58:20.000-05:00
diff --git a/ci/posix.yaml b/ci/posix.yaml
@@ -33,6 +33,13 @@ jobs:
   - bash: conda env create --quiet --file=$(envFile) --name=dask-ml-test && conda list -n dask-ml-test
     displayName: "install"
 
+  - bash: |
+      conda install -y -q pytorch cpuonly -c pytorch -n dask-ml-test
+      source activate dask-ml-test
+      pip install skorch
+    displayName: "install PyTorch"
+    condition: eq(variables['Build.SourceBranch'], 'refs/heads/master')
+
   - script: |
       source activate dask-ml-test
       conda uninstall -y --force scikit-learn
diff --git a/dask_ml/model_selection/_hyperband.py b/dask_ml/model_selection/_hyperband.py
@@ -388,7 +388,7 @@ def _get_SHAs(self, brackets):
         return SHAs
 
     async def _fit(self, X, y, **fit_params):
-        X, y, scorer = self._validate_parameters(X, y)
+        X, y, scorer = await self._validate_parameters(X, y)
 
         brackets = _get_hyperband_params(self.max_iter, eta=self.aggressiveness)
         SHAs = self._get_SHAs(brackets)
diff --git a/dask_ml/model_selection/_incremental.py b/dask_ml/model_selection/_incremental.py
@@ -515,21 +515,25 @@ def __init__(
         self.prefix = prefix
         super(BaseIncrementalSearchCV, self).__init__(estimator, scoring=scoring)
 
-    def _validate_parameters(self, X, y):
+    async def _validate_parameters(self, X, y):
         if (self.max_iter is not None) and self.max_iter < 1:
             raise ValueError(
                 "Received max_iter={}. max_iter < 1 is not supported".format(
                     self.max_iter
                 )
             )
 
-        # Make sure dask arrays are passed so error on unknown chunk size is raised
         kwargs = dict(accept_unknown_chunks=True, accept_dask_dataframe=True)
         if not isinstance(X, dd.DataFrame):
             X = self._check_array(X, **kwargs)
-        if not isinstance(y, dd.Series):
+        if not isinstance(y, (dd.DataFrame, dd.Series)):
             y = self._check_array(y, ensure_2d=False, **kwargs)
-        scorer = check_scoring(self.estimator, scoring=self.scoring)
+        estimator = self.estimator
+        if isinstance(estimator, Future):
+            client = default_client()
+            scorer = await client.submit(check_scoring, estimator, scoring=self.scoring)
+        else:
+            scorer = check_scoring(self.estimator, scoring=self.scoring)
         return X, y, scorer
 
     @property
@@ -640,7 +644,7 @@ async def _fit(self, X, y, **fit_params):
         else:
             context = dummy_context()
 
-        X, y, scorer = self._validate_parameters(X, y)
+        X, y, scorer = await self._validate_parameters(X, y)
 
         X_train, X_test, y_train, y_test = self._get_train_test_split(X, y)
 
diff --git a/dask_ml/wrappers.py b/dask_ml/wrappers.py
@@ -481,7 +481,7 @@ def _fit_for_estimator(self, estimator, X, y, **fit_kwargs):
                 random_state=self.random_state,
                 shuffle_blocks=self.shuffle_blocks,
                 assume_equal_chunks=self.assume_equal_chunks,
-                **fit_kwargs
+                **fit_kwargs,
             )
 
         copy_learned_attributes(result, self)
diff --git a/docs/source/hyper-parameter-search.rst b/docs/source/hyper-parameter-search.rst
@@ -403,7 +403,7 @@ generalized to any of the above estimators.
 
 .. note::
 
-   These estimators require that the model implement ``partial_fit``
+   These estimators require that the model implement ``partial_fit``.
 
 By default, these class will call ``partial_fit`` on each chunk of the data.
 These classes can stop training any models if their score stops increasing
diff --git a/docs/source/index.rst b/docs/source/index.rst
@@ -120,13 +120,20 @@ Scikit-Learn should feel at home with Dask-ML.
    hyper-parameter-search.rst
    compose.rst
    glm.rst
-   joblib.rst
    meta-estimators.rst
    incremental.rst
    clustering.rst
-   xgboost.rst
    modules/api.rst
 
+.. toctree::
+   :maxdepth: 2
+   :hidden:
+   :caption: Integration
+
+   joblib.rst
+   xgboost.rst
+   pytorch.rst
+
 .. toctree::
    :maxdepth: 2
    :hidden:
diff --git a/docs/source/joblib.rst b/docs/source/joblib.rst
@@ -1,7 +1,7 @@
 .. _joblib:
 
-Joblib
-======
+Scikit-Learn & Joblib
+=====================
 
 Many Scikit-Learn algorithms are written for parallel execution using
 `Joblib <http://joblib.readthedocs.io/en/latest/>`__, which natively provides
diff --git a/docs/source/pytorch.rst b/docs/source/pytorch.rst
@@ -0,0 +1,70 @@
+PyTorch
+=======
+
+Skorch_ brings a Scikit-learn API to PyTorch_. Skorch allows PyTorch models to
+be wrapped in Scikit-learn compatible estimators. So, that means that PyTorch
+models wrapped in Skorch can be used with the rest of the Dask-ML API.  For
+example, using Dask-ML's :class:`~dask_ml.model_selection.HyperbandSearchCV` or
+:class:`~dask_ml.model_selection.Incremental` with PyTorch is possible after
+wrapping with Skorch.
+
+We encourage looking at the Skorch documentation for complete details.
+
+Example usage
+-------------
+
+First, let's create a normal PyTorch model:
+
+.. code-block:: python
+
+
+   import torch.nn as nn
+   import torch.nn.functional as F
+
+   class ShallowNet(nn.Module):
+       def __init__(self, n_features=5):
+           super().__init__()
+           self.layer1 = nn.Linear(n_features, 1)
+
+       def forward(self, x):
+           return F.relu(self.layer1(x))
+
+With this, it's easy to use Skorch:
+
+.. code-block:: python
+
+   from skorch import NeuralNetRegressor
+   import torch.optim as optim
+
+   niceties = {
+       "callbacks": False,
+       "warm_start": False,
+       "train_split": None,
+       "max_epochs": 1,
+   }
+
+   model = NeuralNetRegressor(
+       module=ShallowNet,
+       module__n_features=5,
+       criterion=nn.MSELoss,
+       optimizer=optim.SGD,
+       optimizer__lr=0.1,
+       optimizer__momentum=0.9,
+       batch_size=64,
+       **niceties,
+   )
+
+Each parameter that the PyTorch ``nn.Module`` takes is prefixed with ``module__``,
+and same for the optimizer (``optim.SGD`` takes a ``lr`` and ``momentum``
+parameters). The ``niceties`` make sure Skorch uses all the data for training
+and doesn't print excessive amounts of logs.
+
+Now, this model can be used with Dask-ML. For example, it's possible to do the
+following:
+
+* Use PyTorch with the Dask-ML's model selection, including
+  :class:`~dask_ml.model_selection.HyperbandSearchCV`.
+* Use PyTorch with Dask-ML's :class:`~dask_ml.wrappers.Incremental`.
+
+.. _Skorch: https://skorch.readthedocs.io/en/stable/
+.. _PyTorch: https://pytorch.org
diff --git a/docs/source/xgboost.rst b/docs/source/xgboost.rst
@@ -1,19 +1,29 @@
-XGBoost
-=======
+XGBoost & LightGBM
+==================
 
 .. currentmodule:: dask_ml.xgboost
 
+XGBoost_ is a powerful and popular library for gradient boosted trees.  For
+larger datasets or faster training XGBoost also provides a distributed
+computing solution. LightGBM_ is another library similar to XGBoost; it also
+natively supplies native distributed training for decision trees.
+
+Dask-ML can set up distributed XGBoost or LightGBM for you and hand off data
+from distributed dask.dataframes.  This automates much of the hassle of
+preprocessing and setup while still letting XGBoost/LightGBM do what they do
+well.
+
+Below, we'll refer to an example with XGBoost. Here are the relevant XGBoost
+classes/functions:
+
 .. autosummary::
    train
    predict
    XGBClassifier
    XGBRegressor
 
-XGBoost_ is a powerful and popular library for gradient boosted trees.  For
-larger datasets or faster training XGBoost also provides a distributed
-computing solution.  Dask-ML can set up distributed XGBoost for you and hand
-off data from distributed dask.dataframes.  This automates much of the hassle
-of preprocessing and setup while still letting XGBoost do what it does well.
+The LightGBM implementation and documentation can be found at
+https://github.com/dask/dask-lightgbm.
 
 Example
 -------
@@ -63,3 +73,4 @@ relevant GitHub issue here: `dmlc/xgboost #2032 <https://github.com/dmlc/xgboost
 See the ":doc:`Dask-ML examples <examples>`" for an example usage.
 
 .. _XGBoost: https://xgboost.readthedocs.io/
+.. _LightGBM: https://lightgbm.readthedocs.io/
diff --git a/tests/model_selection/test_incremental.py b/tests/model_selection/test_incremental.py
@@ -853,3 +853,18 @@ def test_warns_scores_per_fit(c, s, a, b):
     search = IncrementalSearchCV(model, params, scores_per_fit=2)
     with pytest.warns(UserWarning, match="deprecated since Dask-ML v1.4.0"):
         yield search.fit(X, y)
+
+
+@gen_cluster(client=True)
+async def test_model_future(c, s, a, b):
+    X, y = make_classification(n_samples=100, n_features=5, chunks=10)
+
+    params = {"value": np.random.RandomState(42).rand(1000)}
+    model = ConstantFunction()
+    model_future = await c.scatter(model)
+
+    search = IncrementalSearchCV(model_future, params, max_iter=10)
+
+    await search.fit(X, y, classes=[0, 1])
+    assert search.history_
+    assert search.best_score_ > 0
diff --git a/tests/model_selection/test_pytorch.py b/tests/model_selection/test_pytorch.py
@@ -0,0 +1,60 @@
+import pytest
+from distributed.utils_test import gen_cluster
+from scipy.stats import loguniform
+from sklearn.base import clone
+from sklearn.datasets import make_regression
+
+from dask_ml.model_selection import IncrementalSearchCV
+
+try:
+    import torch.nn as nn
+    import torch.nn.functional as F
+    import torch.optim as optim
+    from skorch import NeuralNetRegressor
+except ImportError:
+    pytestmark = pytest.mark.skip(reason="Missing pytorch or skorch.")
+
+else:
+
+    class ShallowNet(nn.Module):
+        def __init__(self, n_features=5):
+            super().__init__()
+            self.layer1 = nn.Linear(n_features, 1)
+
+        def forward(self, x):
+            return F.relu(self.layer1(x))
+
+
+@gen_cluster(client=True)
+def test_pytorch(c, s, a, b):
+
+    n_features = 10
+    defaults = {
+        "callbacks": False,
+        "warm_start": False,
+        "train_split": None,
+        "max_epochs": 1,
+    }
+    model = NeuralNetRegressor(
+        module=ShallowNet,
+        module__n_features=n_features,
+        criterion=nn.MSELoss,
+        optimizer=optim.SGD,
+        optimizer__lr=0.1,
+        batch_size=64,
+        **defaults,
+    )
+
+    model2 = clone(model)
+    assert model.callbacks is False
+    assert model.warm_start is False
+    assert model.train_split is None
+    assert model.max_epochs == 1
+
+    params = {"optimizer__lr": loguniform(1e-3, 1e0)}
+    X, y = make_regression(n_samples=100, n_features=n_features)
+    X = X.astype("float32")
+    y = y.astype("float32").reshape(-1, 1)
+    search = IncrementalSearchCV(model2, params, max_iter=5, decay_rate=None)
+    yield search.fit(X, y)
+    assert search.best_score_ >= 0

Original file line number	Diff line number	Diff line change
`@@ -481,7 +481,7 @@ def _fit_for_estimator(self, estimator, X, y, **fit_kwargs):`
`481`	`481`	`random_state=self.random_state,`
`482`	`482`	`shuffle_blocks=self.shuffle_blocks,`
`483`	`483`	`assume_equal_chunks=self.assume_equal_chunks,`
`484`		`- **fit_kwargs`
	`484`	`+ **fit_kwargs,`
`485`	`485`	`)`
`486`	`486`
`487`	`487`	`copy_learned_attributes(result, self)`