11Help on class BackupAndRestore in module keras.src.callbacks.backup_and_restore:
22
33class BackupAndRestore(keras.src.callbacks.callback.Callback)
4- | BackupAndRestore(backup_dir, save_freq='epoch', delete_checkpoint=True)
4+ | BackupAndRestore(backup_dir, save_freq='epoch', double_checkpoint=False, delete_checkpoint=True)
55 |
66 | Callback to back up and restore the training state.
77 |
@@ -59,6 +59,12 @@ class BackupAndRestore(keras.src.callbacks.callback.Callback)
5959 | When set to an integer, the callback saves the checkpoint every
6060 | `save_freq` batches. Set `save_freq=False` only if using
6161 | preemption checkpointing (i.e. with `save_before_preemption=True`).
62+ | double_checkpoint: Boolean. If enabled, `BackupAndRestore` callback
63+ | will save 2 last training states (current and previous). After
64+ | interruption if current state can't be loaded due to IO error
65+ | (e.g. file corrupted) it will try to restore previous one. Such
66+ | behaviour will consume twice more space on disk, but increase fault
67+ | tolerance. Defaults to `False`.
6268 | delete_checkpoint: Boolean. This `BackupAndRestore`
6369 | callback works by saving a checkpoint to back up the training state.
6470 | If `delete_checkpoint=True`, the checkpoint will be deleted after
@@ -76,6 +82,7 @@ class BackupAndRestore(keras.src.callbacks.callback.Callback)
7682 | self,
7783 | backup_dir,
7884 | save_freq='epoch',
85+ | double_checkpoint=False,
7986 | delete_checkpoint=True
8087 | )
8188 | Initialize self. See help(type(self)) for accurate signature.
@@ -116,7 +123,13 @@ class BackupAndRestore(keras.src.callbacks.callback.Callback)
116123 | logs: Dict. Aggregated metric results up until this batch.
117124 |
118125 | on_train_begin(self, logs=None)
119- | Get training state from temporary file and restore it.
126+ | Called at the beginning of training.
127+ |
128+ | Subclasses should override for any actions to run.
129+ |
130+ | Args:
131+ | logs: Dict. Currently no data is passed to this argument for this
132+ | method but that may change in the future.
120133 |
121134 | on_train_end(self, logs=None)
122135 | Called at the end of training.
0 commit comments