Skip to content

Commit be7d123

Browse files
authored
[Bug fixes] update gpt testing issue (#4438)
1 parent 0b1d706 commit be7d123

File tree

3 files changed

+10
-13
lines changed

3 files changed

+10
-13
lines changed

model_zoo/gpt/README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ CUDA_VISIBLE_DEVICES=0 python run_pretrain.py \
9696
--device gpu \
9797
--warmup_steps 320000 \
9898
--warmup_ratio 0.01 \
99-
--per_device_train_batch_size 4 \
99+
--mirco_batch_size 4 \
100100
--eval_steps 100 \
101101
--do_train true \
102102
--do_predict true
@@ -129,7 +129,7 @@ python -m paddle.distributed.launch --gpus "0,1,2,3,4,5,6,7" run_pretrain.py \
129129
--device gpu \
130130
--warmup_steps 320000 \
131131
--warmup_ratio 0.01 \
132-
--per_device_train_batch_size 4 \
132+
--mirco_batch_size 8 \
133133
--eval_steps 100 \
134134
--do_train true \
135135
--do_predict true

model_zoo/gpt/run_pretrain.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -52,11 +52,7 @@
5252
@dataclass
5353
class TrainingArguments(TrainingArguments):
5454
min_lr: float = field(default=1e-5, metadata={"help": "The initial min learning rate for Adam."})
55-
56-
# per_device_train_batch_size
57-
@property
58-
def micro_batch_size(self):
59-
return self.per_device_train_batch_size
55+
micro_batch_size: int = field(default=4, metadata={"help": "the batch-size of trainin"})
6056

6157
@property
6258
def eval_freq(self):
@@ -349,6 +345,7 @@ def do_train():
349345
).parse_args_into_dataclasses()
350346
training_args.eval_iters = 10
351347
training_args.test_iters = training_args.eval_iters * 10
348+
# training_args.per_device_train_batch_size = 2
352349

353350
paddle.set_device(training_args.device)
354351
if paddle.distributed.get_world_size() > 1:
@@ -455,6 +452,9 @@ def do_train():
455452
elif last_checkpoint is not None:
456453
checkpoint = last_checkpoint
457454

455+
print("==============================")
456+
print(f"last checkpoint : {checkpoint}")
457+
458458
# Training
459459
if training_args.do_train:
460460
train_result = trainer.train(resume_from_checkpoint=checkpoint)

scripts/regression/ci_case.sh

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -286,11 +286,6 @@ if [ ! -f 'test.py' ];then
286286
sed -i "s/python3/python/g" Makefile
287287
sed -i "s/python-config/python3.7m-config/g" Makefile
288288
cd ${nlp_dir}/model_zoo/gpt/
289-
mkdir pre_data
290-
cd ./pre_data
291-
wget -q https://bj.bcebos.com/paddlenlp/models/transformers/gpt/data/gpt_en_dataset_300m_ids.npy
292-
wget -q https://bj.bcebos.com/paddlenlp/models/transformers/gpt/data/gpt_en_dataset_300m_idx.npz
293-
cd ../
294289
# pretrain
295290
python -m paddle.distributed.launch run_pretrain.py \
296291
--model_name_or_path "__internal_testing__/gpt" \
@@ -302,8 +297,10 @@ if [ ! -f 'test.py' ];then
302297
--device gpu \
303298
--warmup_steps 320000 \
304299
--warmup_ratio 0.01 \
305-
--per_device_train_batch_size 4 \
300+
--micro_batch_size 8 \
306301
--eval_steps 100 \
302+
--overwrite_output_dir true \
303+
--dataloader_drop_last true \
307304
--do_train true \
308305
--do_predict true >${log_path}/gpt_pretrain >>${log_path}/gpt_pretrain 2>&1
309306
print_info $? gpt_pretrain

0 commit comments

Comments
 (0)