|
| 1 | +from libai.config import LazyCall |
| 2 | +from .common.models.vit.vit_base_patch16_224 import model #from .common.models.vit.vit_large_patch16_224 import model |
| 3 | +from .common.models.graph import graph |
| 4 | +from .common.train import train |
| 5 | +from .common.optim import optim |
| 6 | +from .common.data.imagenet import dataloader |
| 7 | + |
| 8 | +from flowvision.data import Mixup |
| 9 | +from flowvision.loss.cross_entropy import SoftTargetCrossEntropy |
| 10 | + |
| 11 | +# Refine data path to imagenet |
| 12 | +dataloader.train.dataset[0].root = "/data/ImageNet/extract" |
| 13 | +dataloader.test[0].dataset.root = "/data/ImageNet/extract" |
| 14 | + |
| 15 | +# Refine model cfg for vit training on imagenet |
| 16 | +model.cfg.num_classes = 1000 |
| 17 | +model.cfg.loss_func = SoftTargetCrossEntropy() |
| 18 | + |
| 19 | +# Add Mixup Func |
| 20 | +dataloader.train.mixup_func = LazyCall(Mixup)( |
| 21 | + mixup_alpha=0.8, |
| 22 | + cutmix_alpha=1.0, |
| 23 | + prob=1.0, |
| 24 | + switch_prob=0.5, |
| 25 | + mode="batch", |
| 26 | + num_classes=model.cfg.num_classes, |
| 27 | +) |
| 28 | + |
| 29 | +# Refine optimizer cfg for vit model |
| 30 | +optim.lr = 1e-3 # 5e-4 * 1024 (batchsize) / 512 |
| 31 | +optim.eps = 1e-8 |
| 32 | +optim.weight_decay = 0.05 |
| 33 | +optim.params.clip_grad_max_norm = None |
| 34 | +optim.params.clip_grad_norm_type = None |
| 35 | +optim.params.overrides = {"pos_embed": {"weight_decay": 0.0}, "cls_token": {"weight_decay": 0.0}} |
| 36 | + |
| 37 | +# Refine train cfg for vit model |
| 38 | +train.train_micro_batch_size = 128 |
| 39 | +train.test_micro_batch_size = 128 |
| 40 | +# train.train_epoch = 300 |
| 41 | +train.train_epoch = 0 |
| 42 | +import os |
| 43 | +train.train_iter = int(os.getenv("NUM_ITER_ENV")) |
| 44 | +train.warmup_ratio = 5 / 300 |
| 45 | +train.evaluation.enabled = False |
| 46 | +# train.evaluation.eval_period = 100 |
| 47 | +train.log_period = 1 |
| 48 | + |
| 49 | +# Scheduler |
| 50 | +train.scheduler.warmup_factor = 0.001 |
| 51 | +train.scheduler.alpha = 0.01 |
| 52 | +train.scheduler.warmup_method = "linear" |
| 53 | + |
| 54 | +# Set fp16 ON |
| 55 | +train.amp.enabled = True |
| 56 | + |
| 57 | +# zero |
| 58 | +train.zero_optimization.enabled = False |
| 59 | +train.zero_optimization.stage = 1 |
| 60 | + |
| 61 | +# Distributed Settings |
| 62 | +train.dist.pipeline_num_layers = model.cfg.depth |
| 63 | +train.dist.data_parallel_size = 2 |
| 64 | +train.dist.tensor_parallel_size = 2 |
| 65 | +train.dist.pipeline_parallel_size = 2 |
0 commit comments