Skip to content

Commit 7d750e6

Browse files
committed
add extra condition for checking if we should save on exception
1 parent 2113acc commit 7d750e6

File tree

1 file changed

+9
-2
lines changed

1 file changed

+9
-2
lines changed

src/lightning/pytorch/callbacks/model_checkpoint.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -343,10 +343,9 @@ def on_validation_end(self, trainer: "pl.Trainer", pl_module: "pl.LightningModul
343343

344344
@override
345345
def on_exception(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule", exception: Exception) -> None:
346-
if self.save_on_exception and not self._should_skip_saving_checkpoint(trainer):
346+
if self._should_save_on_exception(trainer):
347347
monitor_candidates = self._monitor_candidates(trainer)
348348
filepath = self.format_checkpoint_name(metrics=monitor_candidates)
349-
print(type(exception))
350349
self._save_checkpoint(trainer, filepath)
351350
self._save_last_checkpoint(trainer, monitor_candidates)
352351
rank_zero_info(f"An exception was raised saved checkpoint to {filepath}")
@@ -439,6 +438,14 @@ def _should_skip_saving_checkpoint(self, trainer: "pl.Trainer") -> bool:
439438
or trainer.sanity_checking # don't save anything during sanity check
440439
or self._last_global_step_saved == trainer.global_step # already saved at the last step
441440
)
441+
442+
def _should_save_on_exception(self, trainer: "pl.Trainer") -> bool:
443+
return (
444+
self.save_on_exception
445+
and not bool(trainer.fast_dev_run) # disable checkpointing with fast_dev_run
446+
and not trainer.sanity_checking # don't save anything during sanity check
447+
and not self._last_global_step_saved == trainer.global_step # already saved at the last step)
448+
)
442449

443450
def _should_save_on_train_epoch_end(self, trainer: "pl.Trainer") -> bool:
444451
if self._save_on_train_epoch_end is not None:

0 commit comments

Comments
 (0)