File tree Expand file tree Collapse file tree 2 files changed +3
-173
lines changed
Expand file tree Collapse file tree 2 files changed +3
-173
lines changed Load Diff This file was deleted.
Original file line number Diff line number Diff line change @@ -28,7 +28,7 @@ Training a 340M model:
2828
2929NNODE=1 NGPU=8 LOG_RANK=0 bash train.sh \
3030 --job.config_file train.toml \
31- --job.dump_folder exp/nsa-340M-10B/batch32.seqlen2048 .warmup1024.update1.steps20480.lr3e-4 \
31+ --job.dump_folder exp/nsa-340M-10B/batch8.seqlen8192 .warmup1024.update1.steps20480.lr3e-4 \
3232 --model.config configs/nsa_340M.json \
3333 --model.tokenizer_path fla-hub/transformer-1.3B-100B \
3434 --optimizer.name AdamW \
@@ -37,8 +37,8 @@ NNODE=1 NGPU=8 LOG_RANK=0 bash train.sh \
3737 --lr_scheduler.warmup_steps 1024 \
3838 --lr_scheduler.lr_min 0.1 \
3939 --lr_scheduler.decay_type sqrt \
40- --training.batch_size 32 \
41- --training.seq_len 2048 \
40+ --training.batch_size 8 \
41+ --training.seq_len 8192 \
4242 --training.gradient_accumulation_steps 1 \
4343 --training.steps 20480 \
4444 --training.max_norm 1.0 \
You can’t perform that action at this time.
0 commit comments