Skip to content

Commit ebd38e9

Browse files
authored
Fix train-iters typo & format script (#74)
1 parent 462980b commit ebd38e9

File tree

1 file changed

+19
-19
lines changed

1 file changed

+19
-19
lines changed

examples/pretrain_gpt_1B_santacoder.sh

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -21,32 +21,32 @@ GPT_ARGS="\
2121
--tensor-model-parallel-size 1 \
2222
--pipeline-model-parallel-size 1 \
2323
--recompute-activations \
24-
--num-layers 24 \
25-
--hidden-size 2048 \
26-
--num-attention-heads 16 \
27-
--attention-head-type multiquery \
28-
--init-method-std 0.022 \
24+
--num-layers 24 \
25+
--hidden-size 2048 \
26+
--num-attention-heads 16 \
27+
--attention-head-type multiquery \
28+
--init-method-std 0.022 \
2929
--seq-length 2048 \
3030
--max-position-embeddings 2048 \
31-
--attention-dropout 0.1 \
32-
--hidden-dropout 0.1 \
31+
--attention-dropout 0.1 \
32+
--hidden-dropout 0.1 \
3333
--micro-batch-size 2 \
3434
--global-batch-size 192 \
35-
--lr 0.0002 \
36-
--train-iters 3000 \
37-
--lr-decay-iters 600000 \
38-
--lr-decay-style cosine \
39-
--lr-warmup-fraction 0.02 \
40-
--weight-decay .1 \
41-
--adam-beta2 .95 \
42-
--clip-grad 1.0 \
43-
--fp16 \
35+
--lr 0.0002 \
36+
--train-iters 300000 \
37+
--lr-decay-iters 600000 \
38+
--lr-decay-style cosine \
39+
--lr-warmup-fraction 0.02 \
40+
--weight-decay .1 \
41+
--adam-beta2 .95 \
42+
--clip-grad 1.0 \
43+
--fp16 \
4444
--log-interval 10 \
4545
--save-interval 4000 \
4646
--eval-interval 200 \
4747
--eval-iters 10 \
48-
--initial-loss-scale 65536 \
49-
--fim-rate 0.5 \
48+
--initial-loss-scale 65536 \
49+
--fim-rate 0.5 \
5050
"
5151

5252
TENSORBOARD_ARGS="--tensorboard-dir ${CHECKPOINT_PATH}/tensorboard"
@@ -59,4 +59,4 @@ torchrun $DISTRIBUTED_ARGS \
5959
--save $CHECKPOINT_PATH \
6060
--load $CHECKPOINT_PATH \
6161
--data-path $DATA_PATH \
62-
$TENSORBOARD_ARGS
62+
$TENSORBOARD_ARGS

0 commit comments

Comments
 (0)