Fix epoch logging on train epoch end (#13025)

rohitgr7 · carmocca · lexierule · commit f89b181ab21b · 2022-06-01T08:04:16.000-04:00
Co-authored-by: Carlos Mocholí &lt;carlossmocholi@gmail.com&gt;
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -29,6 +29,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 - Fixed issue where the CLI could not pass a `Profiler` to the `Trainer` ([#13084](https://github.com/PyTorchLightning/pytorch-lightning/pull/13084))
 - Fixed torchelastic detection with non-distributed installations ([#13142](https://github.com/PyTorchLightning/pytorch-lightning/pull/13142))
 - Fixed logging's step values when multiple dataloaders are used during evaluation ([#12184](https://github.com/PyTorchLightning/pytorch-lightning/pull/12184))
+- Fixed epoch logging on train epoch end ([#13025](https://github.com/PyTorchLightning/pytorch-lightning/pull/13025))
 
 
 ## [1.6.3] - 2022-05-03
diff --git a/pytorch_lightning/loops/fit_loop.py b/pytorch_lightning/loops/fit_loop.py
@@ -305,15 +305,15 @@ def on_advance_end(self) -> None:
         if self.epoch_loop._num_ready_batches_reached():
             self.epoch_loop.update_lr_schedulers("epoch", update_plateau_schedulers=True)
 
-        self.epoch_progress.increment_completed()
-
         # we manually decrease here because loggers expect that the same step is used when logging epoch-end metrics
         # even when the batch loop has finished
         self.epoch_loop._batches_that_stepped -= 1
         # log epoch metrics
         self.trainer._logger_connector.update_train_epoch_metrics()
         self.epoch_loop._batches_that_stepped += 1
 
+        self.epoch_progress.increment_completed()
+
         # if fault tolerant is enabled and process has been notified, exit.
         self.trainer._exit_gracefully_on_signal()
 
diff --git a/tests/trainer/logging_/test_train_loop_logging.py b/tests/trainer/logging_/test_train_loop_logging.py
@@ -16,6 +16,8 @@
 import collections
 import itertools
 from re import escape
+from unittest import mock
+from unittest.mock import call
 
 import numpy as np
 import pytest
@@ -747,3 +749,37 @@ def validation_epoch_end(self, *_) -> None:
     train_data = DataLoader(RandomDataset(32, 64), batch_size=2)
     val_data = DataLoader(RandomDataset(32, 64), batch_size=2)
     trainer.fit(model, train_dataloaders=train_data, val_dataloaders=val_data)
+
+
+@mock.patch("pytorch_lightning.loggers.TensorBoardLogger.log_metrics")
+def test_log_metrics_epoch_step_values(mock_log_metrics, tmpdir):
+    """Tests the default epoch and step values logged."""
+
+    class MyModel(BoringModel):
+        def training_step(self, batch, batch_idx):
+            self.log("foo", 0.0, on_step=True, on_epoch=True)
+            return super().training_step(batch, batch_idx)
+
+    model = MyModel()
+    trainer = Trainer(
+        default_root_dir=tmpdir,
+        limit_train_batches=2,
+        limit_val_batches=0,
+        max_epochs=2,
+        log_every_n_steps=1,
+        enable_model_summary=False,
+        enable_checkpointing=False,
+        enable_progress_bar=False,
+    )
+    trainer.fit(model)
+
+    mock_log_metrics.assert_has_calls(
+        [
+            call(metrics={"foo_step": 0.0, "epoch": 0}, step=0),
+            call(metrics={"foo_step": 0.0, "epoch": 0}, step=1),
+            call(metrics={"foo_epoch": 0.0, "epoch": 0}, step=1),
+            call(metrics={"foo_step": 0.0, "epoch": 1}, step=2),
+            call(metrics={"foo_step": 0.0, "epoch": 1}, step=3),
+            call(metrics={"foo_epoch": 0.0, "epoch": 1}, step=3),
+        ]
+    )