How to set Checkpoints to be used in the automatically generated version_N
directories?
#6821
-
If the TensorBoard logger is set up as shown logger = TensorBoardLogger(name="MyModel")
checkpoint_callback = ModelCheckpoint(
filename="{epoch}-{step}-{val_loss:.2f}",
monitor="val_loss",
save_top_k=5,
)
trainer = pl.Trainer(
default_root_dir=ROOT_DIR,
callbacks=[checkpoint_callback],
logger=[logger],
) how do we configure the checkpoints to be written to directories that are automatically named trainer = pl.Trainer(
default_root_dir=ROOT_DIR,
callbacks=[checkpoint_callback],
) If we pass in a logger to
while the tensorboard logs and
If we do not pass in a logger to
How can both checkpoints and tensorboard files we written to the same |
Beta Was this translation helpful? Give feedback.
Replies: 1 comment
-
Hi This works for me (latest PL version): from argparse import ArgumentParser
import torch
from torch.nn import functional as F
import pytorch_lightning as pl
from pl_examples.basic_examples.mnist_datamodule import MNISTDataModule
from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning.loggers import TensorBoardLogger
class LitClassifier(pl.LightningModule):
def __init__(self, hidden_dim=128, learning_rate=1e-3):
super().__init__()
self.save_hyperparameters()
self.l1 = torch.nn.Linear(28 * 28, self.hparams.hidden_dim)
self.l2 = torch.nn.Linear(self.hparams.hidden_dim, 10)
def forward(self, x):
x = x.view(x.size(0), -1)
x = torch.relu(self.l1(x))
x = torch.relu(self.l2(x))
return x
def training_step(self, batch, batch_idx):
x, y = batch
y_hat = self(x)
loss = F.cross_entropy(y_hat, y)
return loss
def validation_step(self, batch, batch_idx):
x, y = batch
y_hat = self(x)
loss = F.cross_entropy(y_hat, y)
self.log('valid_loss', loss)
def configure_optimizers(self):
return torch.optim.Adam(self.parameters(), lr=self.hparams.learning_rate)
@staticmethod
def add_model_specific_args(parent_parser):
parser = parent_parser.add_argument_group("LitClassifier")
parser.add_argument('--hidden_dim', type=int, default=128)
parser.add_argument('--learning_rate', type=float, default=0.0001)
return parent_parser
def cli_main():
pl.seed_everything(1234)
parser = ArgumentParser()
parser = pl.Trainer.add_argparse_args(parser)
parser = LitClassifier.add_model_specific_args(parser)
parser = MNISTDataModule.add_argparse_args(parser)
args = parser.parse_args()
dm = MNISTDataModule.from_argparse_args(args, num_workers=2)
model = LitClassifier(args.hidden_dim, args.learning_rate)
ROOT_DIR = "here"
mylogger = TensorBoardLogger(name="MyModel", save_dir=ROOT_DIR)
ckpt_callback = ModelCheckpoint(monitor="valid_loss", filename="{epoch}-{step}-{valid_loss:.2f}")
trainer = pl.Trainer.from_argparse_args(args, default_root_dir=ROOT_DIR, logger=mylogger, callbacks=[ckpt_callback], limit_train_batches=2, limit_val_batches=2)
trainer.fit(model, datamodule=dm)
if __name__ == '__main__':
cli_main() |
Beta Was this translation helpful? Give feedback.
Hi
For this you need to set the "default_root_dir" in the Trainer, and set the save_dir of the Logger to the same.
This works for me (latest PL version):