Skip to content

Commit 0dc9f24

Browse files
vseyBorda
andauthored
Update model_checkpoint on_exception run condition to follow common convention
Co-authored-by: Jirka Borovec <[email protected]>
1 parent ac33670 commit 0dc9f24

File tree

1 file changed

+10
-9
lines changed

1 file changed

+10
-9
lines changed

src/lightning/pytorch/callbacks/model_checkpoint.py

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -345,15 +345,16 @@ def on_validation_end(self, trainer: "pl.Trainer", pl_module: "pl.LightningModul
345345
@override
346346
def on_exception(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule", exception: Exception) -> None:
347347
"""Save a checkpoint when an exception is raised."""
348-
if self._should_save_on_exception(trainer):
349-
monitor_candidates = self._monitor_candidates(trainer)
350-
filepath = self.format_checkpoint_name(metrics=monitor_candidates, prefix=self.CHECKPOINT_EXCEPTION_PREFIX)
351-
self._save_checkpoint(trainer, filepath)
352-
self._save_last_checkpoint(trainer, monitor_candidates)
353-
rank_zero_info(
354-
f"An {type(exception).__name__} was raised with message: \
355-
{str(exception)}, saved checkpoint to {filepath}"
356-
)
348+
if not self._should_save_on_exception(trainer):
349+
return
350+
monitor_candidates = self._monitor_candidates(trainer)
351+
filepath = self.format_checkpoint_name(metrics=monitor_candidates, prefix=self.CHECKPOINT_EXCEPTION_PREFIX)
352+
self._save_checkpoint(trainer, filepath)
353+
self._save_last_checkpoint(trainer, monitor_candidates)
354+
rank_zero_info(
355+
f"An {type(exception).__name__} was raised with message: \
356+
{str(exception)}, saved checkpoint to {filepath}"
357+
)
357358

358359
@override
359360
def state_dict(self) -> dict[str, Any]:

0 commit comments

Comments
 (0)