Skip to content

Commit 3b51f86

Browse files
committed
+scripts
1 parent 90837d0 commit 3b51f86

File tree

1 file changed

+65
-0
lines changed

1 file changed

+65
-0
lines changed

configs/vit_imagenet_a100.py

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
from libai.config import LazyCall
2+
from .common.models.vit.vit_base_patch16_224 import model #from .common.models.vit.vit_large_patch16_224 import model
3+
from .common.models.graph import graph
4+
from .common.train import train
5+
from .common.optim import optim
6+
from .common.data.imagenet import dataloader
7+
8+
from flowvision.data import Mixup
9+
from flowvision.loss.cross_entropy import SoftTargetCrossEntropy
10+
11+
# Refine data path to imagenet
12+
dataloader.train.dataset[0].root = "/data/ImageNet/extract"
13+
dataloader.test[0].dataset.root = "/data/ImageNet/extract"
14+
15+
# Refine model cfg for vit training on imagenet
16+
model.cfg.num_classes = 1000
17+
model.cfg.loss_func = SoftTargetCrossEntropy()
18+
19+
# Add Mixup Func
20+
dataloader.train.mixup_func = LazyCall(Mixup)(
21+
mixup_alpha=0.8,
22+
cutmix_alpha=1.0,
23+
prob=1.0,
24+
switch_prob=0.5,
25+
mode="batch",
26+
num_classes=model.cfg.num_classes,
27+
)
28+
29+
# Refine optimizer cfg for vit model
30+
optim.lr = 1e-3 # 5e-4 * 1024 (batchsize) / 512
31+
optim.eps = 1e-8
32+
optim.weight_decay = 0.05
33+
optim.params.clip_grad_max_norm = None
34+
optim.params.clip_grad_norm_type = None
35+
optim.params.overrides = {"pos_embed": {"weight_decay": 0.0}, "cls_token": {"weight_decay": 0.0}}
36+
37+
# Refine train cfg for vit model
38+
train.train_micro_batch_size = 128
39+
train.test_micro_batch_size = 128
40+
# train.train_epoch = 300
41+
train.train_epoch = 0
42+
import os
43+
train.train_iter = int(os.getenv("NUM_ITER_ENV"))
44+
train.warmup_ratio = 5 / 300
45+
train.evaluation.enabled = False
46+
# train.evaluation.eval_period = 100
47+
train.log_period = 1
48+
49+
# Scheduler
50+
train.scheduler.warmup_factor = 0.001
51+
train.scheduler.alpha = 0.01
52+
train.scheduler.warmup_method = "linear"
53+
54+
# Set fp16 ON
55+
train.amp.enabled = True
56+
57+
# zero
58+
train.zero_optimization.enabled = False
59+
train.zero_optimization.stage = 1
60+
61+
# Distributed Settings
62+
train.dist.pipeline_num_layers = model.cfg.depth
63+
train.dist.data_parallel_size = 2
64+
train.dist.tensor_parallel_size = 2
65+
train.dist.pipeline_parallel_size = 2

0 commit comments

Comments
 (0)