Skip to content

Commit f9fab94

Browse files
FrostMLsmallv0221
andauthored
Merge prepare.benchmark.sh and prepare.sh (#1938)
* merge prepare benchmark * update * delete export BENCHMARK_ROOT * rename ips tp ips_total, ips_per_cars to ips * pre-commit issue Co-authored-by: smallv0221 <[email protected]>
1 parent 7a5b002 commit f9fab94

File tree

8 files changed

+129
-144
lines changed

8 files changed

+129
-144
lines changed

examples/language_model/gpt-3/dygraph/run_pretrain.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -394,7 +394,7 @@ def do_train(args):
394394
avg_reader_cost = train_reader_cost / args.logging_freq
395395

396396
logger.info(
397-
"global step %d, epoch: %d, batch: %d, loss: %.9f, avg_reader_cost: %.5f sec, avg_batch_cost: %.5f sec, speed: %.2f step/s, ips: %.0f tokens/s, ips_per_card: %.0f tokens/s, learning rate: %.5e"
397+
"global step %d, epoch: %d, batch: %d, loss: %.9f, avg_reader_cost: %.5f sec, avg_batch_cost: %.5f sec, speed: %.2f step/s, ips_total: %.0f tokens/s, ips: %.0f tokens/s, learning rate: %.5e"
398398
% (global_step, epoch, step, avg_loss, avg_reader_cost,
399399
1. / speed, speed, speed * default_global_tokens_num,
400400
speed * default_global_tokens_num / nranks,

examples/language_model/gpt-3/static/run_pretrain_static.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -435,7 +435,7 @@ def do_train(args):
435435
train_reader_cost + train_run_cost)
436436
avg_reader_cost = train_reader_cost / args.logging_freq
437437
logger.info(
438-
"global step %d, epoch: %d, batch: %d, loss: %.9f, avg_reader_cost: %.5f sec, avg_batch_cost: %.5f sec, speed: %.2f steps/s, ips: %.0f tokens/s, ips_per_card: %.0f tokens/s, learning rate: %.5e"
438+
"global step %d, epoch: %d, batch: %d, loss: %.9f, avg_reader_cost: %.5f sec, avg_batch_cost: %.5f sec, speed: %.2f steps/s, ips_total: %.0f tokens/s, ips: %.0f tokens/s, learning rate: %.5e"
439439
% (global_step, epoch, step, loss_return[0],
440440
avg_reader_cost, 1. / speed, speed,
441441
speed * args.global_batch_size * args.max_seq_len,

examples/language_model/gpt/run_pretrain.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -263,7 +263,7 @@ def do_train(args):
263263
train_reader_cost + train_run_cost)
264264
avg_reader_cost = train_reader_cost / args.logging_freq
265265
logger.info(
266-
"global step %d, epoch: %d, batch: %d, loss: %.9f, avg_reader_cost: %.5f sec, avg_batch_cost: %.5f sec, speed: %.2f step/s, ips: %.0f tokens/s, ips_per_card: %.0f tokens/s, learning rate: %.5e"
266+
"global step %d, epoch: %d, batch: %d, loss: %.9f, avg_reader_cost: %.5f sec, avg_batch_cost: %.5f sec, speed: %.2f step/s, ips_total: %.0f tokens/s, ips: %.0f tokens/s, learning rate: %.5e"
267267
%
268268
(global_step, epoch, step, loss_numpy, avg_reader_cost,
269269
1. / speed, speed, speed * default_global_tokens_num,

examples/language_model/gpt/run_pretrain_static.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -430,7 +430,7 @@ def do_train(args):
430430
avg_reader_cost = train_reader_cost / args.logging_freq
431431

432432
logger.info(
433-
"global step %d, epoch: %d, batch: %d, loss: %.9f, avg_reader_cost: %.5f sec, avg_batch_cost: %.5f sec, speed: %.2f steps/s, ips: %.0f tokens/s, ips_per_card: %.0f tokens/s, learning rate: %.5e"
433+
"global step %d, epoch: %d, batch: %d, loss: %.9f, avg_reader_cost: %.5f sec, avg_batch_cost: %.5f sec, speed: %.2f steps/s, ips_total: %.0f tokens/s, ips: %.0f tokens/s, learning rate: %.5e"
434434
% (global_step, epoch, step, loss_return[0],
435435
avg_reader_cost, 1. / speed, speed,
436436
speed * args.global_batch_size * args.max_seq_len,

tests/benchmark/run_benchmark.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ function _set_params(){
3030

3131
log_with_profiler=$log_file
3232
profiler_path=$log_profile
33-
keyword="ips_per_card:"
33+
keyword="ips:"
3434
keyword_loss="loss:"
3535
skip_steps=20
3636
model_mode=-1

tests/test_tipc/benchmark_train.sh

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,6 @@ lines=(${dataline})
9393
model_name=$(func_parser_value "${lines[1]}")
9494

9595
if [[ ${model_name} =~ gpt* ]]; then
96-
export BENCHMARK_ROOT=/workspace
9796
run_env=$BENCHMARK_ROOT/run_env
9897
export PATH=$run_env:${PATH}
9998
fi

tests/test_tipc/prepare.benchmark.sh

Lines changed: 0 additions & 106 deletions
This file was deleted.

tests/test_tipc/prepare.sh

Lines changed: 124 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -34,16 +34,16 @@ if [ ${MODE} = "lite_train_lite_infer" ];then
3434

3535
# The whole procedure of lite_train_infer should be less than 15min.
3636
# Hence, set maximum output length is 16.
37-
# sed -i "s/^max_out_len.*/max_out_len: 16/g" configs/transformer.base.yaml
38-
# sed -i "s/^max_out_len.*/max_out_len: 16/g" configs/transformer.big.yaml
37+
sed -i "s/^max_out_len.*/max_out_len: 16/g" configs/transformer.base.yaml
38+
sed -i "s/^max_out_len.*/max_out_len: 16/g" configs/transformer.big.yaml
3939

40-
# sed -i "s/^random_seed:.*/random_seed: 128/g" configs/transformer.base.yaml
41-
# sed -i "s/^shuffle_batch:.*/shuffle_batch: False/g" configs/transformer.base.yaml
42-
# sed -i "s/^shuffle:.*/shuffle: False/g" configs/transformer.base.yaml
40+
sed -i "s/^random_seed:.*/random_seed: 128/g" configs/transformer.base.yaml
41+
sed -i "s/^shuffle_batch:.*/shuffle_batch: False/g" configs/transformer.base.yaml
42+
sed -i "s/^shuffle:.*/shuffle: False/g" configs/transformer.base.yaml
4343

44-
# sed -i "s/^random_seed:.*/random_seed: 128/g" configs/transformer.big.yaml
45-
# sed -i "s/^shuffle_batch:.*/shuffle_batch: False/g" configs/transformer.big.yaml
46-
# sed -i "s/^shuffle:.*/shuffle: False/g" configs/transformer.big.yaml
44+
sed -i "s/^random_seed:.*/random_seed: 128/g" configs/transformer.big.yaml
45+
sed -i "s/^shuffle_batch:.*/shuffle_batch: False/g" configs/transformer.big.yaml
46+
sed -i "s/^shuffle:.*/shuffle: False/g" configs/transformer.big.yaml
4747

4848
# Data set prepared.
4949
if [ ! -f WMT14.en-de.partial.tar.gz ]; then
@@ -94,16 +94,16 @@ elif [ ${MODE} = "whole_train_whole_infer" ];then
9494

9595
if [[ ${model_name} =~ transformer* ]]; then
9696
cd ../examples/machine_translation/transformer/
97-
# sed -i "s/^max_out_len.*/max_out_len: 256/g" configs/transformer.base.yaml
98-
# sed -i "s/^max_out_len.*/max_out_len: 1024/g" configs/transformer.big.yaml
97+
sed -i "s/^max_out_len.*/max_out_len: 256/g" configs/transformer.base.yaml
98+
sed -i "s/^max_out_len.*/max_out_len: 1024/g" configs/transformer.big.yaml
9999

100-
# sed -i "s/^random_seed:.*/random_seed: None/g" configs/transformer.base.yaml
101-
# sed -i "s/^shuffle_batch:.*/shuffle_batch: True/g" configs/transformer.base.yaml
102-
# sed -i "s/^shuffle:.*/shuffle: True/g" configs/transformer.base.yaml
100+
sed -i "s/^random_seed:.*/random_seed: None/g" configs/transformer.base.yaml
101+
sed -i "s/^shuffle_batch:.*/shuffle_batch: True/g" configs/transformer.base.yaml
102+
sed -i "s/^shuffle:.*/shuffle: True/g" configs/transformer.base.yaml
103103

104-
# sed -i "s/^random_seed:.*/random_seed: None/g" configs/transformer.big.yaml
105-
# sed -i "s/^shuffle_batch:.*/shuffle_batch: True/g" configs/transformer.big.yaml
106-
# sed -i "s/^shuffle:.*/shuffle: True/g" configs/transformer.big.yaml
104+
sed -i "s/^random_seed:.*/random_seed: None/g" configs/transformer.big.yaml
105+
sed -i "s/^shuffle_batch:.*/shuffle_batch: True/g" configs/transformer.big.yaml
106+
sed -i "s/^shuffle:.*/shuffle: True/g" configs/transformer.big.yaml
107107

108108
# Whole data set prepared.
109109
if [ ! -f WMT14.en-de.tar.gz ]; then
@@ -154,16 +154,16 @@ elif [ ${MODE} = "lite_train_whole_infer" ];then
154154

155155
if [[ ${model_name} =~ transformer* ]]; then
156156
cd ../examples/machine_translation/transformer/
157-
# sed -i "s/^max_out_len.*/max_out_len: 256/g" configs/transformer.base.yaml
158-
# sed -i "s/^max_out_len.*/max_out_len: 1024/g" configs/transformer.big.yaml
157+
sed -i "s/^max_out_len.*/max_out_len: 256/g" configs/transformer.base.yaml
158+
sed -i "s/^max_out_len.*/max_out_len: 1024/g" configs/transformer.big.yaml
159159

160-
# sed -i "s/^random_seed:.*/random_seed: None/g" configs/transformer.base.yaml
161-
# sed -i "s/^shuffle_batch:.*/shuffle_batch: True/g" configs/transformer.base.yaml
162-
# sed -i "s/^shuffle:.*/shuffle: True/g" configs/transformer.base.yaml
160+
sed -i "s/^random_seed:.*/random_seed: None/g" configs/transformer.base.yaml
161+
sed -i "s/^shuffle_batch:.*/shuffle_batch: True/g" configs/transformer.base.yaml
162+
sed -i "s/^shuffle:.*/shuffle: True/g" configs/transformer.base.yaml
163163

164-
# sed -i "s/^random_seed:.*/random_seed: None/g" configs/transformer.big.yaml
165-
# sed -i "s/^shuffle_batch:.*/shuffle_batch: True/g" configs/transformer.big.yaml
166-
# sed -i "s/^shuffle:.*/shuffle: True/g" configs/transformer.big.yaml
164+
sed -i "s/^random_seed:.*/random_seed: None/g" configs/transformer.big.yaml
165+
sed -i "s/^shuffle_batch:.*/shuffle_batch: True/g" configs/transformer.big.yaml
166+
sed -i "s/^shuffle:.*/shuffle: True/g" configs/transformer.big.yaml
167167

168168
# Trained transformer base model checkpoint.
169169
# For infer.
@@ -230,16 +230,16 @@ elif [ ${MODE} = "whole_infer" ];then
230230

231231
if [[ ${model_name} =~ transformer* ]]; then
232232
cd ../examples/machine_translation/transformer/
233-
# sed -i "s/^max_out_len.*/max_out_len: 256/g" configs/transformer.base.yaml
234-
# sed -i "s/^max_out_len.*/max_out_len: 1024/g" configs/transformer.big.yaml
233+
sed -i "s/^max_out_len.*/max_out_len: 256/g" configs/transformer.base.yaml
234+
sed -i "s/^max_out_len.*/max_out_len: 1024/g" configs/transformer.big.yaml
235235

236-
# sed -i "s/^random_seed:.*/random_seed: None/g" configs/transformer.base.yaml
237-
# sed -i "s/^shuffle_batch:.*/shuffle_batch: True/g" configs/transformer.base.yaml
238-
# sed -i "s/^shuffle:.*/shuffle: True/g" configs/transformer.base.yaml
236+
sed -i "s/^random_seed:.*/random_seed: None/g" configs/transformer.base.yaml
237+
sed -i "s/^shuffle_batch:.*/shuffle_batch: True/g" configs/transformer.base.yaml
238+
sed -i "s/^shuffle:.*/shuffle: True/g" configs/transformer.base.yaml
239239

240-
# sed -i "s/^random_seed:.*/random_seed: None/g" configs/transformer.big.yaml
241-
# sed -i "s/^shuffle_batch:.*/shuffle_batch: True/g" configs/transformer.big.yaml
242-
# sed -i "s/^shuffle:.*/shuffle: True/g" configs/transformer.big.yaml
240+
sed -i "s/^random_seed:.*/random_seed: None/g" configs/transformer.big.yaml
241+
sed -i "s/^shuffle_batch:.*/shuffle_batch: True/g" configs/transformer.big.yaml
242+
sed -i "s/^shuffle:.*/shuffle: True/g" configs/transformer.big.yaml
243243

244244
# Trained transformer base model checkpoint.
245245
if [ ! -f transformer-base-wmt_ende_bpe.tar.gz ]; then
@@ -269,4 +269,96 @@ elif [ ${MODE} = "whole_infer" ];then
269269
ln -s WMT14.en-de/wmt14_ende_data_bpe/newstest2014.tok.bpe.33708.de test.de
270270
cd -
271271
fi
272+
elif [ ${MODE} = "benchmark_train" ];then
273+
if [ ${model_name} == "bigru_crf" ]; then
274+
rm -rf ./data/lexical_analysis_dataset_tiny ./data/lexical_analysis_dataset_tiny.tar.gz
275+
wget -nc -P ./data/ https://bj.bcebos.com/paddlenlp/datasets/lexical_analysis_dataset_tiny.tar.gz --no-check-certificate
276+
cd ./data/ && tar xfz lexical_analysis_dataset_tiny.tar.gz && cd ..
277+
fi
278+
279+
if [[ ${model_name} =~ bert* ]]; then
280+
rm -rf ./data/wikicorpus_en_seqlen128/ wikicorpus_en_seqlen128.tar wikicorpus_en_seqlen512 hdf5_lower_case_1_seq_len_512_max_pred_80_masked_lm_prob_0.15_random_seed_12345_dupe_factor_5/ hdf5_lower_case_1_seq_len_512_max_pred_80_masked_lm_prob_0.15_random_seed_12345_dupe_factor_5.tar
281+
wget -nc -P ./data/ https://bj.bcebos.com/paddlenlp/datasets/benchmark_wikicorpus_en_seqlen128.tar --no-check-certificate
282+
wget -nc -P ./data/ https://bj.bcebos.com/paddlenlp/datasets/benchmark_hdf5_lower_case_1_seq_len_512_max_pred_80_masked_lm_prob_0.15_random_seed_12345_dupe_factor_5.tar --no-check-certificate
283+
284+
cd ./data/
285+
tar -xf benchmark_wikicorpus_en_seqlen128.tar
286+
tar -xf benchmark_hdf5_lower_case_1_seq_len_512_max_pred_80_masked_lm_prob_0.15_random_seed_12345_dupe_factor_5.tar
287+
288+
ln -s hdf5_lower_case_1_seq_len_512_max_pred_80_masked_lm_prob_0.15_random_seed_12345_dupe_factor_5/wikicorpus_en_seqlen512/ wikicorpus_en_seqlen512
289+
290+
cd ..
291+
fi
292+
293+
if [[ ${model_name} =~ gpt* ]]; then
294+
run_env=$BENCHMARK_ROOT/run_env
295+
296+
rm -rf $run_env
297+
mkdir $run_env
298+
299+
echo `which python`
300+
ln -s $(which python)m-config $run_env/python3-config
301+
ln -s $(which python)m-config $run_env/python-config
302+
ln -s $(which python) $run_env/python3
303+
304+
export PATH=$run_env:${PATH}
305+
306+
mkdir -p data && cd data
307+
wget https://bj.bcebos.com/paddlenlp/models/transformers/gpt/data/gpt_en_dataset_300m_ids.npy -o .tmp
308+
wget https://bj.bcebos.com/paddlenlp/models/transformers/gpt/data/gpt_en_dataset_300m_idx.npz -o .tmp
309+
cd -
310+
fi
311+
312+
if [[ ${model_name} =~ transformer* ]]; then
313+
cd ../examples/machine_translation/transformer/
314+
315+
git checkout .
316+
317+
# Data set prepared.
318+
if [ ! -f WMT14.en-de.partial.tar.gz ]; then
319+
wget https://bj.bcebos.com/paddlenlp/datasets/WMT14.en-de.partial.tar.gz
320+
tar -zxf WMT14.en-de.partial.tar.gz
321+
fi
322+
# Set soft link.
323+
if [ -f train.en ]; then
324+
rm -f train.en
325+
fi
326+
if [ -f train.de ]; then
327+
rm -f train.de
328+
fi
329+
if [ -f dev.en ]; then
330+
rm -f dev.en
331+
fi
332+
if [ -f dev.de ]; then
333+
rm -f dev.de
334+
fi
335+
if [ -f test.en ]; then
336+
rm -f test.en
337+
fi
338+
if [ -f test.de ]; then
339+
rm -f test.de
340+
fi
341+
rm -f vocab_all.bpe.33712
342+
rm -f vocab_all.bpe.33708
343+
# Vocab
344+
cp -f WMT14.en-de.partial/wmt14_ende_data_bpe/vocab_all.bpe.33712 ./
345+
cp -f WMT14.en-de.partial/wmt14_ende_data_bpe/vocab_all.bpe.33708 ./
346+
# Train
347+
ln -s WMT14.en-de.partial/wmt14_ende_data_bpe/train.tok.clean.bpe.en train.en
348+
ln -s WMT14.en-de.partial/wmt14_ende_data_bpe/train.tok.clean.bpe.de train.de
349+
# Dev
350+
ln -s WMT14.en-de.partial/wmt14_ende_data_bpe/dev.tok.bpe.en dev.en
351+
ln -s WMT14.en-de.partial/wmt14_ende_data_bpe/dev.tok.bpe.de dev.de
352+
#Test
353+
ln -s WMT14.en-de.partial/wmt14_ende_data_bpe/test.tok.bpe.en test.en
354+
ln -s WMT14.en-de.partial/wmt14_ende_data_bpe/test.tok.bpe.de test.de
355+
cd -
356+
fi
357+
358+
export PYTHONPATH=$(dirname "$PWD"):$PYTHONPATH
359+
python -m pip install --upgrade pip
360+
python -m pip install -r ../requirements.txt -i https://mirror.baidu.com/pypi/simple
361+
python -m pip install pybind11 regex sentencepiece tqdm visualdl attrdict pyyaml -i https://mirror.baidu.com/pypi/simple
362+
python -m pip install -e ..
363+
272364
fi

0 commit comments

Comments
 (0)