Add support for saving and restoring best_model_metrics in ModelCheckpoint

Iruos8805 · Iruos8805 · commit a1e06816bbcc · 2025-12-04T19:52:07.000+05:30
diff --git a/src/lightning/pytorch/CHANGELOG.md b/src/lightning/pytorch/CHANGELOG.md
@@ -10,7 +10,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 
 ### Added
 
--
+- Added `best_model_metrics` attribute to `ModelCheckpoint` callback to store all logged metrics associated with the best model checkpoint ([#21355](https://github.com/Lightning-AI/pytorch-lightning/pull/21355))
 
 ### Changed
 
diff --git a/src/lightning/pytorch/callbacks/model_checkpoint.py b/src/lightning/pytorch/callbacks/model_checkpoint.py
@@ -551,10 +551,13 @@ def state_dict(self) -> dict[str, Any]:
             "kth_best_model_path": self.kth_best_model_path,
             "kth_value": self.kth_value,
             "last_model_path": self.last_model_path,
+            "best_model_metrics": self.best_model_metrics,
         }
 
     @override
     def load_state_dict(self, state_dict: dict[str, Any]) -> None:
+        self.best_model_metrics = state_dict.get("best_model_metrics", {})
+
         dirpath_from_ckpt = state_dict.get("dirpath", self.dirpath)
 
         if self.dirpath == dirpath_from_ckpt:
diff --git a/tests/tests_pytorch/checkpointing/test_model_checkpoint.py b/tests/tests_pytorch/checkpointing/test_model_checkpoint.py
@@ -1638,7 +1638,7 @@ def training_step(self, *args):
 
 
 def test_best_model_metrics(tmp_path):
-    """Ensure ModelCheckpoint correctly tracks best_model_metrics."""
+    """Ensure ModelCheckpoint correctly tracks and restores best_model_metrics."""
 
     class TestModel(BoringModel):
         def training_step(self, batch, batch_idx):
@@ -1654,7 +1654,12 @@ def validation_step(self, batch, batch_idx):
             self.log("val_metric", (self.current_epoch + 1) / 10)
             return loss
 
-    checkpoint = ModelCheckpoint(dirpath=tmp_path, save_top_k=3, monitor="val_metric", mode="min")
+    checkpoint = ModelCheckpoint(
+        dirpath=tmp_path,
+        save_top_k=3,
+        monitor="val_metric",
+        mode="min",
+    )
 
     trainer = Trainer(
         default_root_dir=tmp_path,
@@ -1672,15 +1677,37 @@ def validation_step(self, batch, batch_idx):
     assert hasattr(checkpoint, "best_model_metrics")
     assert isinstance(checkpoint.best_model_metrics, dict)
     assert "val_metric" in checkpoint.best_model_metrics
-    assert checkpoint.best_model_metrics["val_metric"] == 0.1  # best (lowest) value
+    assert checkpoint.best_model_metrics["val_metric"] == 0.1  # lowest value
     assert "val_loss" in checkpoint.best_model_metrics
     assert "train_loss" in checkpoint.best_model_metrics
     assert "train_metric" in checkpoint.best_model_metrics
 
+    best_ckpt_path = checkpoint.best_model_path
+    assert best_ckpt_path
+    assert os.path.exists(best_ckpt_path)
+
+    loaded = torch.load(best_ckpt_path, weights_only=False)
+
+    callbacks_state = loaded.get("callbacks", {})
+    assert callbacks_state  # ensure not empty
+
+    ckpt_key = next(
+        (k for k in callbacks_state if k.startswith("ModelCheckpoint")),
+        None,
+    )
+
+    assert ckpt_key is not None
+
+    loaded_metrics = callbacks_state[ckpt_key]["best_model_metrics"]
+
+    assert isinstance(loaded_metrics, dict)
+    assert loaded_metrics == checkpoint.best_model_metrics
+    assert loaded_metrics["val_metric"] == 0.1
+
 
 @pytest.mark.parametrize("mode", ["min", "max"])
 def test_best_model_metrics_mode(tmp_path, mode: str):
-    """Ensure ModelCheckpoint.best_model_metrics respects the 'mode' parameter."""
+    """Ensure ModelCheckpoint.best_model_metrics respects the 'mode' parameter and is restored correctly."""
 
     class TestModel(BoringModel):
         def validation_step(self, batch, batch_idx):
@@ -1710,6 +1737,26 @@ def validation_step(self, batch, batch_idx):
     expected_value = 0.1 if mode == "min" else 0.3
     assert checkpoint.best_model_metrics["val_metric"] == expected_value
 
+    # load the checkpoint and verify metrics are restored
+    best_ckpt_path = checkpoint.best_model_path
+    assert best_ckpt_path
+    assert os.path.exists(best_ckpt_path)
+
+    loaded = torch.load(best_ckpt_path, weights_only=False)
+    callbacks_state = loaded.get("callbacks", {})
+    assert callbacks_state
+
+    ckpt_key = next(
+        (k for k in callbacks_state if k.startswith("ModelCheckpoint")),
+        None,
+    )
+    assert ckpt_key is not None
+
+    loaded_metrics = callbacks_state[ckpt_key]["best_model_metrics"]
+
+    assert isinstance(loaded_metrics, dict)
+    assert loaded_metrics["val_metric"] == expected_value
+
 
 @pytest.mark.parametrize("use_omegaconf", [False, pytest.param(True, marks=RunIf(omegaconf=True))])
 def test_hparams_type(tmp_path, use_omegaconf):