-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain_ctr.py
More file actions
48 lines (40 loc) · 1.5 KB
/
main_ctr.py
File metadata and controls
48 lines (40 loc) · 1.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
import os
import torch
import time
import ezkfg as ez
from loguru import logger
from lightning.pytorch import seed_everything
from model.module.callbacks import add_callbacks
import lightning.pytorch as pl
from model.mrg import MRGModel
from dataset.data_module import DataModule
def main(cfg_file=os.path.join(os.path.dirname(__file__), "config", "mrg_ctr_config.yaml")):
cfg = ez.load(cfg_file)
log_dir = cfg.save_dir
os.makedirs(log_dir, exist_ok=True)
logger.add(os.path.join(log_dir, f"train-{time.strftime('%Y-%m-%d-%H-%M-%S')}.log"))
seed_everything(cfg.seed, workers=True)
dm = DataModule(cfg, flag=True) # test during training
callbacks = add_callbacks(cfg)
trainer = pl.Trainer(
devices=cfg.n_gpus,
num_nodes=cfg.n_nodes,
strategy=cfg.strategy,
accelerator=cfg.accelerator,
precision=cfg.precision,
val_check_interval=cfg.train.val_check_interval,
limit_val_batches=cfg.train.limit_val_batches,
max_epochs=cfg.train.num_epochs,
num_sanity_val_steps=cfg.train.num_sanity_val_steps,
accumulate_grad_batches=cfg.train.accumulate_grad_batches,
check_val_every_n_epoch=cfg.train.check_val_every_n_epoch,
callbacks=callbacks["callbacks"],
logger=callbacks["loggers"],
)
# build model architecture
model = MRGModel(cfg)
logger.info(f"Model: {model}")
trainer.fit(model, datamodule=dm)
# trainer.test(model, datamodule=dm)
if __name__ == "__main__":
main()