@@ -34,16 +34,16 @@ if [ ${MODE} = "lite_train_lite_infer" ];then
34
34
35
35
# The whole procedure of lite_train_infer should be less than 15min.
36
36
# Hence, set maximum output length is 16.
37
- # sed -i "s/^max_out_len.*/max_out_len: 16/g" configs/transformer.base.yaml
38
- # sed -i "s/^max_out_len.*/max_out_len: 16/g" configs/transformer.big.yaml
37
+ sed -i " s/^max_out_len.*/max_out_len: 16/g" configs/transformer.base.yaml
38
+ sed -i " s/^max_out_len.*/max_out_len: 16/g" configs/transformer.big.yaml
39
39
40
- # sed -i "s/^random_seed:.*/random_seed: 128/g" configs/transformer.base.yaml
41
- # sed -i "s/^shuffle_batch:.*/shuffle_batch: False/g" configs/transformer.base.yaml
42
- # sed -i "s/^shuffle:.*/shuffle: False/g" configs/transformer.base.yaml
40
+ sed -i " s/^random_seed:.*/random_seed: 128/g" configs/transformer.base.yaml
41
+ sed -i " s/^shuffle_batch:.*/shuffle_batch: False/g" configs/transformer.base.yaml
42
+ sed -i " s/^shuffle:.*/shuffle: False/g" configs/transformer.base.yaml
43
43
44
- # sed -i "s/^random_seed:.*/random_seed: 128/g" configs/transformer.big.yaml
45
- # sed -i "s/^shuffle_batch:.*/shuffle_batch: False/g" configs/transformer.big.yaml
46
- # sed -i "s/^shuffle:.*/shuffle: False/g" configs/transformer.big.yaml
44
+ sed -i " s/^random_seed:.*/random_seed: 128/g" configs/transformer.big.yaml
45
+ sed -i " s/^shuffle_batch:.*/shuffle_batch: False/g" configs/transformer.big.yaml
46
+ sed -i " s/^shuffle:.*/shuffle: False/g" configs/transformer.big.yaml
47
47
48
48
# Data set prepared.
49
49
if [ ! -f WMT14.en-de.partial.tar.gz ]; then
@@ -94,16 +94,16 @@ elif [ ${MODE} = "whole_train_whole_infer" ];then
94
94
95
95
if [[ ${model_name} =~ transformer* ]]; then
96
96
cd ../examples/machine_translation/transformer/
97
- # sed -i "s/^max_out_len.*/max_out_len: 256/g" configs/transformer.base.yaml
98
- # sed -i "s/^max_out_len.*/max_out_len: 1024/g" configs/transformer.big.yaml
97
+ sed -i " s/^max_out_len.*/max_out_len: 256/g" configs/transformer.base.yaml
98
+ sed -i " s/^max_out_len.*/max_out_len: 1024/g" configs/transformer.big.yaml
99
99
100
- # sed -i "s/^random_seed:.*/random_seed: None/g" configs/transformer.base.yaml
101
- # sed -i "s/^shuffle_batch:.*/shuffle_batch: True/g" configs/transformer.base.yaml
102
- # sed -i "s/^shuffle:.*/shuffle: True/g" configs/transformer.base.yaml
100
+ sed -i " s/^random_seed:.*/random_seed: None/g" configs/transformer.base.yaml
101
+ sed -i " s/^shuffle_batch:.*/shuffle_batch: True/g" configs/transformer.base.yaml
102
+ sed -i " s/^shuffle:.*/shuffle: True/g" configs/transformer.base.yaml
103
103
104
- # sed -i "s/^random_seed:.*/random_seed: None/g" configs/transformer.big.yaml
105
- # sed -i "s/^shuffle_batch:.*/shuffle_batch: True/g" configs/transformer.big.yaml
106
- # sed -i "s/^shuffle:.*/shuffle: True/g" configs/transformer.big.yaml
104
+ sed -i " s/^random_seed:.*/random_seed: None/g" configs/transformer.big.yaml
105
+ sed -i " s/^shuffle_batch:.*/shuffle_batch: True/g" configs/transformer.big.yaml
106
+ sed -i " s/^shuffle:.*/shuffle: True/g" configs/transformer.big.yaml
107
107
108
108
# Whole data set prepared.
109
109
if [ ! -f WMT14.en-de.tar.gz ]; then
@@ -154,16 +154,16 @@ elif [ ${MODE} = "lite_train_whole_infer" ];then
154
154
155
155
if [[ ${model_name} =~ transformer* ]]; then
156
156
cd ../examples/machine_translation/transformer/
157
- # sed -i "s/^max_out_len.*/max_out_len: 256/g" configs/transformer.base.yaml
158
- # sed -i "s/^max_out_len.*/max_out_len: 1024/g" configs/transformer.big.yaml
157
+ sed -i " s/^max_out_len.*/max_out_len: 256/g" configs/transformer.base.yaml
158
+ sed -i " s/^max_out_len.*/max_out_len: 1024/g" configs/transformer.big.yaml
159
159
160
- # sed -i "s/^random_seed:.*/random_seed: None/g" configs/transformer.base.yaml
161
- # sed -i "s/^shuffle_batch:.*/shuffle_batch: True/g" configs/transformer.base.yaml
162
- # sed -i "s/^shuffle:.*/shuffle: True/g" configs/transformer.base.yaml
160
+ sed -i " s/^random_seed:.*/random_seed: None/g" configs/transformer.base.yaml
161
+ sed -i " s/^shuffle_batch:.*/shuffle_batch: True/g" configs/transformer.base.yaml
162
+ sed -i " s/^shuffle:.*/shuffle: True/g" configs/transformer.base.yaml
163
163
164
- # sed -i "s/^random_seed:.*/random_seed: None/g" configs/transformer.big.yaml
165
- # sed -i "s/^shuffle_batch:.*/shuffle_batch: True/g" configs/transformer.big.yaml
166
- # sed -i "s/^shuffle:.*/shuffle: True/g" configs/transformer.big.yaml
164
+ sed -i " s/^random_seed:.*/random_seed: None/g" configs/transformer.big.yaml
165
+ sed -i " s/^shuffle_batch:.*/shuffle_batch: True/g" configs/transformer.big.yaml
166
+ sed -i " s/^shuffle:.*/shuffle: True/g" configs/transformer.big.yaml
167
167
168
168
# Trained transformer base model checkpoint.
169
169
# For infer.
@@ -230,16 +230,16 @@ elif [ ${MODE} = "whole_infer" ];then
230
230
231
231
if [[ ${model_name} =~ transformer* ]]; then
232
232
cd ../examples/machine_translation/transformer/
233
- # sed -i "s/^max_out_len.*/max_out_len: 256/g" configs/transformer.base.yaml
234
- # sed -i "s/^max_out_len.*/max_out_len: 1024/g" configs/transformer.big.yaml
233
+ sed -i " s/^max_out_len.*/max_out_len: 256/g" configs/transformer.base.yaml
234
+ sed -i " s/^max_out_len.*/max_out_len: 1024/g" configs/transformer.big.yaml
235
235
236
- # sed -i "s/^random_seed:.*/random_seed: None/g" configs/transformer.base.yaml
237
- # sed -i "s/^shuffle_batch:.*/shuffle_batch: True/g" configs/transformer.base.yaml
238
- # sed -i "s/^shuffle:.*/shuffle: True/g" configs/transformer.base.yaml
236
+ sed -i " s/^random_seed:.*/random_seed: None/g" configs/transformer.base.yaml
237
+ sed -i " s/^shuffle_batch:.*/shuffle_batch: True/g" configs/transformer.base.yaml
238
+ sed -i " s/^shuffle:.*/shuffle: True/g" configs/transformer.base.yaml
239
239
240
- # sed -i "s/^random_seed:.*/random_seed: None/g" configs/transformer.big.yaml
241
- # sed -i "s/^shuffle_batch:.*/shuffle_batch: True/g" configs/transformer.big.yaml
242
- # sed -i "s/^shuffle:.*/shuffle: True/g" configs/transformer.big.yaml
240
+ sed -i " s/^random_seed:.*/random_seed: None/g" configs/transformer.big.yaml
241
+ sed -i " s/^shuffle_batch:.*/shuffle_batch: True/g" configs/transformer.big.yaml
242
+ sed -i " s/^shuffle:.*/shuffle: True/g" configs/transformer.big.yaml
243
243
244
244
# Trained transformer base model checkpoint.
245
245
if [ ! -f transformer-base-wmt_ende_bpe.tar.gz ]; then
@@ -269,4 +269,96 @@ elif [ ${MODE} = "whole_infer" ];then
269
269
ln -s WMT14.en-de/wmt14_ende_data_bpe/newstest2014.tok.bpe.33708.de test.de
270
270
cd -
271
271
fi
272
+ elif [ ${MODE} = " benchmark_train" ]; then
273
+ if [ ${model_name} == " bigru_crf" ]; then
274
+ rm -rf ./data/lexical_analysis_dataset_tiny ./data/lexical_analysis_dataset_tiny.tar.gz
275
+ wget -nc -P ./data/ https://bj.bcebos.com/paddlenlp/datasets/lexical_analysis_dataset_tiny.tar.gz --no-check-certificate
276
+ cd ./data/ && tar xfz lexical_analysis_dataset_tiny.tar.gz && cd ..
277
+ fi
278
+
279
+ if [[ ${model_name} =~ bert* ]]; then
280
+ rm -rf ./data/wikicorpus_en_seqlen128/ wikicorpus_en_seqlen128.tar wikicorpus_en_seqlen512 hdf5_lower_case_1_seq_len_512_max_pred_80_masked_lm_prob_0.15_random_seed_12345_dupe_factor_5/ hdf5_lower_case_1_seq_len_512_max_pred_80_masked_lm_prob_0.15_random_seed_12345_dupe_factor_5.tar
281
+ wget -nc -P ./data/ https://bj.bcebos.com/paddlenlp/datasets/benchmark_wikicorpus_en_seqlen128.tar --no-check-certificate
282
+ wget -nc -P ./data/ https://bj.bcebos.com/paddlenlp/datasets/benchmark_hdf5_lower_case_1_seq_len_512_max_pred_80_masked_lm_prob_0.15_random_seed_12345_dupe_factor_5.tar --no-check-certificate
283
+
284
+ cd ./data/
285
+ tar -xf benchmark_wikicorpus_en_seqlen128.tar
286
+ tar -xf benchmark_hdf5_lower_case_1_seq_len_512_max_pred_80_masked_lm_prob_0.15_random_seed_12345_dupe_factor_5.tar
287
+
288
+ ln -s hdf5_lower_case_1_seq_len_512_max_pred_80_masked_lm_prob_0.15_random_seed_12345_dupe_factor_5/wikicorpus_en_seqlen512/ wikicorpus_en_seqlen512
289
+
290
+ cd ..
291
+ fi
292
+
293
+ if [[ ${model_name} =~ gpt* ]]; then
294
+ run_env=$BENCHMARK_ROOT /run_env
295
+
296
+ rm -rf $run_env
297
+ mkdir $run_env
298
+
299
+ echo ` which python`
300
+ ln -s $( which python) m-config $run_env /python3-config
301
+ ln -s $( which python) m-config $run_env /python-config
302
+ ln -s $( which python) $run_env /python3
303
+
304
+ export PATH=$run_env :${PATH}
305
+
306
+ mkdir -p data && cd data
307
+ wget https://bj.bcebos.com/paddlenlp/models/transformers/gpt/data/gpt_en_dataset_300m_ids.npy -o .tmp
308
+ wget https://bj.bcebos.com/paddlenlp/models/transformers/gpt/data/gpt_en_dataset_300m_idx.npz -o .tmp
309
+ cd -
310
+ fi
311
+
312
+ if [[ ${model_name} =~ transformer* ]]; then
313
+ cd ../examples/machine_translation/transformer/
314
+
315
+ git checkout .
316
+
317
+ # Data set prepared.
318
+ if [ ! -f WMT14.en-de.partial.tar.gz ]; then
319
+ wget https://bj.bcebos.com/paddlenlp/datasets/WMT14.en-de.partial.tar.gz
320
+ tar -zxf WMT14.en-de.partial.tar.gz
321
+ fi
322
+ # Set soft link.
323
+ if [ -f train.en ]; then
324
+ rm -f train.en
325
+ fi
326
+ if [ -f train.de ]; then
327
+ rm -f train.de
328
+ fi
329
+ if [ -f dev.en ]; then
330
+ rm -f dev.en
331
+ fi
332
+ if [ -f dev.de ]; then
333
+ rm -f dev.de
334
+ fi
335
+ if [ -f test.en ]; then
336
+ rm -f test.en
337
+ fi
338
+ if [ -f test.de ]; then
339
+ rm -f test.de
340
+ fi
341
+ rm -f vocab_all.bpe.33712
342
+ rm -f vocab_all.bpe.33708
343
+ # Vocab
344
+ cp -f WMT14.en-de.partial/wmt14_ende_data_bpe/vocab_all.bpe.33712 ./
345
+ cp -f WMT14.en-de.partial/wmt14_ende_data_bpe/vocab_all.bpe.33708 ./
346
+ # Train
347
+ ln -s WMT14.en-de.partial/wmt14_ende_data_bpe/train.tok.clean.bpe.en train.en
348
+ ln -s WMT14.en-de.partial/wmt14_ende_data_bpe/train.tok.clean.bpe.de train.de
349
+ # Dev
350
+ ln -s WMT14.en-de.partial/wmt14_ende_data_bpe/dev.tok.bpe.en dev.en
351
+ ln -s WMT14.en-de.partial/wmt14_ende_data_bpe/dev.tok.bpe.de dev.de
352
+ # Test
353
+ ln -s WMT14.en-de.partial/wmt14_ende_data_bpe/test.tok.bpe.en test.en
354
+ ln -s WMT14.en-de.partial/wmt14_ende_data_bpe/test.tok.bpe.de test.de
355
+ cd -
356
+ fi
357
+
358
+ export PYTHONPATH=$( dirname " $PWD " ) :$PYTHONPATH
359
+ python -m pip install --upgrade pip
360
+ python -m pip install -r ../requirements.txt -i https://mirror.baidu.com/pypi/simple
361
+ python -m pip install pybind11 regex sentencepiece tqdm visualdl attrdict pyyaml -i https://mirror.baidu.com/pypi/simple
362
+ python -m pip install -e ..
363
+
272
364
fi
0 commit comments