kingmbc · elgohr · Apr 30, 2020 · Apr 30, 2020 · Apr 30, 2020 · Apr 30, 2020
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -64,10 +64,13 @@ references:
      name: Make Documentation
      command: |
        # sudo apt-get install pandoc
+       sudo apt-get update && sudo apt-get install -y cmake
        pip install -r requirements.txt --user
        sudo pip install -r docs/requirements.txt
+       pip install -r requirements-extra.txt --user  # for doctesting loggers etc.
        # sphinx-apidoc -o ./docs/source ./pytorch_lightning **/test_* --force --follow-links
-       cd docs; make clean ; make html --debug --jobs 2 SPHINXOPTS="-W"
+       cd docs; make clean; make html --debug --jobs 2 SPHINXOPTS="-W"
+       make doctest; make coverage
 
 jobs:
 

diff --git a/.drone.yml b/.drone.yml
@@ -35,9 +35,11 @@ steps:
     - apt-get update && apt-get install -y cmake
     - pip install -r requirements.txt --user -q
     - pip install -r ./tests/requirements-devel.txt --user -q
+    #- pip install -r ./docs/requirements.txt --user -q
     - pip list
     - python -c "import torch ; print(' & '.join([torch.cuda.get_device_name(i) for i in range(torch.cuda.device_count())]) if torch.cuda.is_available() else 'only CPU')"
     - coverage run --source pytorch_lightning -m py.test pytorch_lightning tests benchmarks -v --doctest-modules #  --flake8
+    #- cd docs; make doctest; make coverage
     - coverage report
     - codecov --token $CODECOV_TOKEN  # --pr $DRONE_PULL_REQUEST --build $DRONE_BUILD_NUMBER --branch $DRONE_BRANCH --commit $DRONE_COMMIT --tag $DRONE_TAG
     - python tests/collect_env_details.py

diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
@@ -4,7 +4,7 @@
 - [ ] Did you read the [contributor guideline](https://github.com/PyTorchLightning/pytorch-lightning/blob/master/.github/CONTRIBUTING.md), Pull Request section?
 - [ ] Did you make sure to update the docs?   
 - [ ] Did you write any new necessary tests?  
-- [ ] If you made a notable change (that affects users), did you update the [CHANGELOG](https://github.com/PyTorchLightning/pytorch-lightning/blob/master/.github/CHANGELOG.md)?
+- [ ] If you made a notable change (that affects users), did you update the [CHANGELOG](https://github.com/PyTorchLightning/pytorch-lightning/blob/master/CHANGELOG.md)?
 
 <!-- For CHANGELOG separate each item in unreleased section by blank line to reduce collisions -->
 

diff --git a/.github/workflows/ci-testing.yml b/.github/workflows/ci-testing.yml
@@ -71,9 +71,9 @@ jobs:
       uses: actions/cache@v1
       with:
         path: ${{ steps.pip-cache.outputs.dir }}
-        key: ${{ runner.os }}-${{ matrix.python-version }}-pip-${{ hashFiles('requirements.txt') }}-${{ hashFiles('requirements-extra.txt') }}
+        key: ${{ runner.os }}-${{ matrix.python-version }}-${{ matrix.requires }}-pip-${{ hashFiles('requirements.txt') }}-${{ hashFiles('requirements-extra.txt') }}
         restore-keys: |
-          ${{ runner.os }}-${{ matrix.python-version }}-pip-
+          ${{ runner.os }}-${{ matrix.python-version }}-${{ matrix.requires }}-pip-
 
     - name: Install dependencies
       run: |
@@ -86,6 +86,17 @@ jobs:
         pip list
       shell: bash
 
+    - name: Reinstall Horovod if necessary
+      if: runner.os != 'windows' && matrix.python-version != '3.8'
+      run: |
+        HOROVOD_BUILT=$(python -c "import horovod.torch; horovod.torch.nccl_built(); print('SUCCESS')")
+        if [[ $HOROVOD_BUILT != "SUCCESS" ]]; then
+          pip uninstall -y horovod
+          HOROVOD_BUILD_ARCH_FLAGS="-mfma" pip install --no-cache-dir $(grep "horovod" requirements-extra.txt)
+        fi
+        horovodrun --check-build
+      shell: bash
+
     - name: Cache datasets
       uses: actions/cache@v1
       with:
@@ -99,14 +110,14 @@ jobs:
       run: |
         # tox --sitepackages
         # flake8 .
-        coverage run --source pytorch_lightning -m py.test pytorch_lightning tests -v --doctest-modules --junitxml=junit/test-results-${{ runner.os }}-${{ matrix.python-version }}.xml
+        coverage run --source pytorch_lightning -m py.test pytorch_lightning tests -v --doctest-modules --junitxml=junit/test-results-${{ runner.os }}-${{ matrix.python-version }}-${{ matrix.requires }}.xml
         coverage report
 
     - name: Upload pytest test results
       uses: actions/upload-artifact@master
       with:
-        name: pytest-results-${{ runner.os }}-${{ matrix.python-version }}
-        path: junit/test-results-${{ runner.os }}-${{ matrix.python-version }}.xml
+        name: pytest-results-${{ runner.os }}-${{ matrix.python-version }}-${{ matrix.requires }}
+        path: junit/test-results-${{ runner.os }}-${{ matrix.python-version }}-${{ matrix.requires }}.xml
       # Use always() to always run this step to publish test results when there are test failures
       if: always()
 

diff --git a/.github/workflows/docker_builds.yml b/.github/workflows/docker_builds.yml
@@ -22,23 +22,23 @@ jobs:
     - uses: actions/checkout@v2
     - name: Publish Releases to Docker
       # only on releases
-      uses: elgohr/Publish-Docker-Github-Action@master
+      uses: elgohr/Publish-Docker-Github-Action@v5
       if: contains(github.ref, 'refs/tags/') && !contains(${{ steps.get_version.outputs.VERSION }}, 'rc') %% !contains(${{ steps.get_version.outputs.VERSION }}, 'dev')
       with:
         name: pytorchlightning/pytorch_lightning
         username: ${{ secrets.DOCKER_USERNAME }}
         password: ${{ secrets.DOCKER_PASSWORD }}
         dockerfile: docker/Dockerfile
         buildargs: PYTHON_VERSION=${{ matrix.python_version }},PYTORCH_VERSION=${{ matrix.pytorch_version }},LIGHTNING_VERSION=${{ steps.get_version.outputs.VERSION }}
-        tags: "${{ steps.get_version.outputs.VERSION }}_py${{ matrix.python_version }}_torch${{ matrix.pytorch_version }},stable_py${{ matrix.python_version }}_torch${{ matrix.pytorch_version }}"
+        tags: "${{ steps.get_version.outputs.VERSION }}-py${{ matrix.python_version }}-torch${{ matrix.pytorch_version }},stable-py${{ matrix.python_version }}-torch${{ matrix.pytorch_version }}"
     - name: Publish Master
       # publish master
-      uses: elgohr/Publish-Docker-Github-Action@master
+      uses: elgohr/Publish-Docker-Github-Action@v5
       if: github.event_name == 'push'
       with:
         name: pytorchlightning/pytorch_lightning
         username: ${{ secrets.DOCKER_USERNAME }}
         password: ${{ secrets.DOCKER_PASSWORD }}
         dockerfile: docker/Dockerfile
         buildargs: PYTHON_VERSION=${{ matrix.python_version }},PYTORCH_VERSION=${{ matrix.pytorch_version }},LIGHTNING_VERSION=${{ steps.get_version.outputs.VERSION }}
-        tags: "latest_py${{ matrix.python_version }}_torch${{ matrix.pytorch_version }}"
+        tags: "nightly-py${{ matrix.python_version }}-torch${{ matrix.pytorch_version }}"
diff --git a/.github/workflows/docs-check.yml b/.github/workflows/docs-check.yml
@@ -11,7 +11,7 @@ jobs:
     - uses: actions/checkout@v2
     - uses: ammaraskar/sphinx-action@master
       with:
-        # git is requried to clone the docs theme
+        # git is required to clone the docs theme
         pre-build-command: "apt-get update -y && apt-get install -y git"
         docs-folder: "docs/"
         repo-token: "${{ secrets.GITHUB_TOKEN }}"
diff --git a/.github/workflows/rebase.yml b/.github/workflows/rebase.yml
@@ -1,8 +1,9 @@
 name: Automatic Rebase
 # https://github.com/marketplace/actions/automatic-rebase
 
-on:
-  - pull_request
+on: 
+  issue_comment:
+    types: [created]
 
 jobs:
   rebase:
@@ -17,10 +18,3 @@ jobs:
       uses: cirrus-actions/[email protected]
       env:
         GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-  # https://github.community/t5/GitHub-Actions/Workflow-is-failing-if-no-job-can-be-ran-due-to-condition/m-p/38186#M3250
-  always_job:
-    name: Always run job
-    runs-on: ubuntu-latest
-    steps:
-      - name: Always run
-        run: echo "This job is used to prevent the workflow to fail when all other jobs are skipped."
diff --git a/.gitignore b/.gitignore
@@ -13,9 +13,7 @@ test_tube_data/
 test_tube_exp/
 
 # Documentations
-docs/source/pl_examples*.rst
-docs/source/pytorch_lightning*.rst
-docs/source/tests*.rst
+docs/source/api
 docs/source/*.md
 
 # Byte-compiled / optimized / DLL files

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -8,8 +8,20 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 
 ### Added
 
+- Added callback for logging learning rates ([#1498](https://github.com/PyTorchLightning/pytorch-lightning/pull/1498))
+
+- Added transfer learning example (for a binary classification task in computer vision) ([#1564](https://github.com/PyTorchLightning/pytorch-lightning/pull/1564))
+
+- Added type hints in `Trainer.fit()` and `Trainer.test()` to reflect that also a list of dataloaders can be passed in ([#1723](https://github.com/PyTorchLightning/pytorch-lightning/pull/1723)).
+
+- The progress bar metrics now also get updated in `training_epoch_end` ([#1724](https://github.com/PyTorchLightning/pytorch-lightning/pull/1724)).
+
 ### Changed
 
+- Reduction when `batch_size < num_gpus` ([#1609](https://github.com/PyTorchLightning/pytorch-lightning/pull/1609))
+
+- Updated LightningTemplateModel to look more like Colab example ([#1577](https://github.com/PyTorchLightning/pytorch-lightning/pull/1577))
+
 ### Deprecated
 
 ### Removed
@@ -18,6 +30,17 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 
 - Fixed ModelCheckpoint not None checking filepath ([1654](https://github.com/PyTorchLightning/pytorch-lightning/pull/1654))
 
+- Trainer now calls `on_load_checkpoint()` when resuming from a checkpoint ([1666](https://github.com/PyTorchLightning/pytorch-lightning/pull/1666))
+
+- Fixed Horovod distributed backend to set the `root_gpu` property ([#1669](https://github.com/PyTorchLightning/pytorch-lightning/pull/1669))
+
+- Fixed wandb logger `global_step` affects other loggers ([#1492](https://github.com/PyTorchLightning/pytorch-lightning/issues/1485))
+
+- Fixed disabling progress bar on non-zero ranks using Horovod backend ([#1709](https://github.com/PyTorchLightning/pytorch-lightning/pull/1709))
+
+- Fixed bugs that prevent lr finder to be used together with early stopping and validation dataloaders ([#1676](https://github.com/PyTorchLightning/pytorch-lightning/pull/1676))
+
+- Fixed a bug in Trainer that prepended the checkpoint path with `version_` when it shouldn't ([#1748](https://github.com/PyTorchLightning/pytorch-lightning/pull/1748))
 
 ## [0.7.5] - 2020-04-27
 

diff --git a/docs/source/apex.rst b/docs/source/apex.rst
@@ -1,3 +1,8 @@
+.. testsetup:: *
+
+    from pytorch_lightning.trainer.trainer import Trainer
+
+
 16-bit training
 =================
 Lightning offers 16-bit training for CPUs, GPUs and TPUs.
@@ -38,7 +43,7 @@ Install apex
 Enable 16-bit
 ^^^^^^^^^^^^^
 
-.. code-block:: python
+.. testcode::
 
     # turn on 16-bit
     trainer = Trainer(amp_level='O1', precision=16)
@@ -50,7 +55,7 @@ TPU 16-bit
 ----------
 16-bit on TPus is much simpler. To use 16-bit with TPUs set precision to 16 when using the tpu flag
 
-.. code-block:: python
+.. testcode::
 
     # DEFAULT
     trainer = Trainer(num_tpu_cores=8, precision=32)

diff --git a/docs/source/callbacks.rst b/docs/source/callbacks.rst
@@ -1,3 +1,8 @@
+.. testsetup:: *
+
+    from pytorch_lightning.trainer.trainer import Trainer
+    from pytorch_lightning.callbacks.base import Callback
+
 .. role:: hidden
     :class: hidden-section
 
@@ -18,21 +23,23 @@ An overall Lightning system should have:
 
 Example:
 
-.. doctest::
-
-    >>> import pytorch_lightning as pl
-    >>> class MyPrintingCallback(pl.Callback):
-    ...
-    ...     def on_init_start(self, trainer):
-    ...         print('Starting to init trainer!')
-    ...
-    ...     def on_init_end(self, trainer):
-    ...         print('trainer is init now')
-    ...
-    ...     def on_train_end(self, trainer, pl_module):
-    ...         print('do something when training ends')
-    ...
-    >>> trainer = pl.Trainer(callbacks=[MyPrintingCallback()])
+.. testcode::
+
+    class MyPrintingCallback(Callback):
+
+        def on_init_start(self, trainer):
+            print('Starting to init trainer!')
+
+        def on_init_end(self, trainer):
+            print('trainer is init now')
+
+        def on_train_end(self, trainer, pl_module):
+            print('do something when training ends')
+
+    trainer = Trainer(callbacks=[MyPrintingCallback()])
+
+.. testoutput::
+
     Starting to init trainer!
     trainer is init now
 
@@ -84,3 +91,11 @@ We successfully extended functionality without polluting our super clean
 .. automodule:: pytorch_lightning.callbacks.progress
    :noindex:
    :exclude-members:
+
+---------
+
+.. automodule:: pytorch_lightning.callbacks.lr_logger
+    :noindex:
+    :exclude-members:
+        _extract_lr,
+        _find_names
diff --git a/docs/source/child_modules.rst b/docs/source/child_modules.rst
@@ -1,3 +1,22 @@
+.. testsetup:: *
+
+    import torch
+    from pytorch_lightning.trainer.trainer import Trainer
+    from pytorch_lightning.callbacks.base import Callback
+    from pytorch_lightning.core.lightning import LightningModule
+
+    class LitMNIST(LightningModule):
+
+        def __init__(self):
+            super().__init__()
+
+        def train_dataloader():
+            pass
+
+        def val_dataloader():
+            pass
+
+
 Child Modules
 -------------
 Research projects tend to test different approaches to the same dataset.
@@ -7,13 +26,18 @@ For example, imagine we now want to train an Autoencoder to use as a feature ext
 Recall that `LitMNIST` already defines all the dataloading etc... The only things
 that change in the `Autoencoder` model are the init, forward, training, validation and test step.
 
-.. code-block:: python
+.. testcode::
 
     class Encoder(torch.nn.Module):
-        ...
+        pass
+
+    class Decoder(torch.nn.Module):
+        pass
 
     class AutoEncoder(LitMNIST):
+
         def __init__(self):
+            super().__init__()
             self.encoder = Encoder()
             self.decoder = Decoder()
 
@@ -30,10 +54,10 @@ that change in the `Autoencoder` model are the init, forward, training, validati
             return loss
 
         def validation_step(self, batch, batch_idx):
-            return self._shared_eval(batch, batch_idx, 'val'):
+            return self._shared_eval(batch, batch_idx, 'val')
 
         def test_step(self, batch, batch_idx):
-            return self._shared_eval(batch, batch_idx, 'test'):
+            return self._shared_eval(batch, batch_idx, 'test')
 
         def _shared_eval(self, batch, batch_idx, prefix):
             x, y = batch
@@ -43,6 +67,7 @@ that change in the `Autoencoder` model are the init, forward, training, validati
             loss = F.nll_loss(logits, y)
             return {f'{prefix}_loss': loss}
 
+
 and we can train this using the same trainer
 
 .. code-block:: python
@@ -58,5 +83,3 @@ In this case, we want to use the `AutoEncoder` to extract image representations
 
     some_images = torch.Tensor(32, 1, 28, 28)
     representations = autoencoder(some_images)
-
-..