Fix rich progress bar metric render on epoch end (#11689)

rohitgr7 · carmocca · Borda · lexierule · commit 5f616ac1049c · 2022-02-09T15:22:34.000-05:00
Co-authored-by: Carlos Mocholi &lt;carlossmocholi@gmail.com&gt;
Co-authored-by: Jirka &lt;jirka.borovec@seznam.cz&gt;
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -10,6 +10,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 
 - Fixed the format of the configuration saved automatically by the CLI's `SaveConfigCallback` ([#11532](https://github.com/PyTorchLightning/pytorch-lightning/pull/11532))
 - Fixed an issue to avoid validation loop run on restart ([#11552](https://github.com/PyTorchLightning/pytorch-lightning/pull/11552))
+- The Rich progress bar now correctly shows the `on_epoch` logged values on train epoch end ([#11689](https://github.com/PyTorchLightning/pytorch-lightning/pull/11689))
 
 
 ## [1.5.9] - 2022-01-18
diff --git a/pytorch_lightning/callbacks/progress/rich_progress.py b/pytorch_lightning/callbacks/progress/rich_progress.py
@@ -16,6 +16,7 @@
 from datetime import timedelta
 from typing import Any, Optional, Union
 
+import pytorch_lightning as pl
 from pytorch_lightning.callbacks.progress.base import ProgressBarBase
 from pytorch_lightning.utilities.exceptions import MisconfigurationException
 from pytorch_lightning.utilities.imports import _RICH_AVAILABLE
@@ -379,6 +380,10 @@ def on_validation_epoch_end(self, trainer, pl_module):
         if self.val_progress_bar_id is not None:
             self._update(self.val_progress_bar_id, visible=False)
 
+    def on_validation_end(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None:
+        if trainer.state.fn == "fit":
+            self._update_metrics(trainer, pl_module)
+
     def on_test_epoch_start(self, trainer, pl_module):
         super().on_train_epoch_start(trainer, pl_module)
         self.test_progress_bar_id = self._add_task(self.total_test_batches, self.test_description)
@@ -392,6 +397,9 @@ def on_train_batch_end(self, trainer, pl_module, outputs, batch, batch_idx):
         self._update(self.main_progress_bar_id)
         self._update_metrics(trainer, pl_module)
 
+    def on_train_epoch_end(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None:
+        self._update_metrics(trainer, pl_module)
+
     def on_validation_batch_end(self, trainer, pl_module, outputs, batch, batch_idx, dataloader_idx):
         super().on_validation_batch_end(trainer, pl_module, outputs, batch, batch_idx, dataloader_idx)
         if trainer.sanity_checking:
diff --git a/tests/callbacks/test_rich_progress_bar.py b/tests/callbacks/test_rich_progress_bar.py
@@ -11,6 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from collections import defaultdict
 from unittest import mock
 from unittest.mock import DEFAULT, Mock
 
@@ -201,3 +202,68 @@ def test_rich_progress_bar_num_sanity_val_steps(tmpdir, limit_val_batches: int):
 
     trainer.fit(model)
     assert progress_bar.progress.tasks[0].completed == min(num_sanity_val_steps, limit_val_batches)
+
+
+@RunIf(rich=True)
+def test_rich_progress_bar_correct_value_epoch_end(tmpdir):
+    """Rich counterpart to test_tqdm_progress_bar::test_tqdm_progress_bar_correct_value_epoch_end."""
+
+    class MockedProgressBar(RichProgressBar):
+        calls = defaultdict(list)
+
+        def get_metrics(self, trainer, pl_module):
+            items = super().get_metrics(trainer, model)
+            del items["v_num"]
+            del items["loss"]
+            # this is equivalent to mocking `set_postfix` as this method gets called every time
+            self.calls[trainer.state.fn].append(
+                (trainer.state.stage, trainer.current_epoch, trainer.global_step, items)
+            )
+            return items
+
+    class MyModel(BoringModel):
+        def training_step(self, batch, batch_idx):
+            self.log("a", self.global_step, prog_bar=True, on_step=False, on_epoch=True, reduce_fx=max)
+            return super().training_step(batch, batch_idx)
+
+        def validation_step(self, batch, batch_idx):
+            self.log("b", self.global_step, prog_bar=True, on_step=False, on_epoch=True, reduce_fx=max)
+            return super().validation_step(batch, batch_idx)
+
+        def test_step(self, batch, batch_idx):
+            self.log("c", self.global_step, prog_bar=True, on_step=False, on_epoch=True, reduce_fx=max)
+            return super().test_step(batch, batch_idx)
+
+    model = MyModel()
+    pbar = MockedProgressBar()
+    trainer = Trainer(
+        default_root_dir=tmpdir,
+        limit_train_batches=2,
+        limit_val_batches=2,
+        limit_test_batches=2,
+        max_epochs=2,
+        enable_model_summary=False,
+        enable_checkpointing=False,
+        log_every_n_steps=1,
+        callbacks=pbar,
+    )
+
+    trainer.fit(model)
+    assert pbar.calls["fit"] == [
+        ("sanity_check", 0, 0, {"b": 0}),
+        ("train", 0, 0, {}),
+        ("train", 0, 1, {}),
+        ("validate", 0, 1, {"b": 1}),  # validation end
+        # epoch end over, `on_epoch=True` metrics are computed
+        ("train", 0, 2, {"a": 1, "b": 1}),  # training epoch end
+        ("train", 1, 2, {"a": 1, "b": 1}),
+        ("train", 1, 3, {"a": 1, "b": 1}),
+        ("validate", 1, 3, {"a": 1, "b": 3}),  # validation end
+        ("train", 1, 4, {"a": 3, "b": 3}),  # training epoch end
+    ]
+
+    trainer.validate(model, verbose=False)
+    assert pbar.calls["validate"] == []
+
+    trainer.test(model, verbose=False)
+    assert pbar.calls["test"] == []
diff --git a/tests/callbacks/test_tqdm_progress_bar.py b/tests/callbacks/test_tqdm_progress_bar.py
@@ -611,6 +611,8 @@ def test_tqdm_progress_bar_main_bar_resume():
 
 
 def test_tqdm_progress_bar_correct_value_epoch_end(tmpdir):
+    """TQDM counterpart to test_rich_progress_bar::test_rich_progress_bar_correct_value_epoch_end."""
+
     class MockedProgressBar(TQDMProgressBar):
         calls = defaultdict(list)