Skip to content

Commit 080aeb8

Browse files
authored
[CI] add ernie-layout & uie (#4868)
* [CI] remove stacl wk-3 * [CI] add ernie-layout & uie * update * update
1 parent 886834e commit 080aeb8

File tree

2 files changed

+56
-61
lines changed

2 files changed

+56
-61
lines changed

scripts/regression/ci_case.sh

Lines changed: 55 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,10 @@ export cudaid1=$2
1919
export cudaid2=$3
2020
export PATH=${PATH}
2121
export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}
22-
if [ -f "model_logs/" ];then
22+
if [ ! -f "model_logs/" ];then
2323
mkdir model_logs
2424
fi
25-
if [ -f "unittest_logs/" ];then
25+
if [ ! -f "unittest_logs/" ];then
2626
mkdir unittest_logs
2727
fi
2828
print_info(){
@@ -707,21 +707,8 @@ sed -i "s/batch_size: 4096/batch_size: 500/g" config/transformer.yaml
707707
python -m paddle.distributed.launch train.py --config ./config/transformer.yaml >${log_path}/stacl_wk-1) >>${log_path}/stacl_wk-1 2>&1
708708
print_info $? stacl_wk-1
709709

710-
time (
711-
sed -i "s/waitk: -1/waitk: 3/g" config/transformer.yaml
712-
sed -i 's/save_model: "trained_models"/save_model: "trained_models_3"/g' config/transformer.yaml
713-
sed -i 's#init_from_checkpoint: ""#init_from_checkpoint: "./trained_models/step_1/"#g' config/transformer.yaml
714-
python -m paddle.distributed.launch train.py --config ./config/transformer.yaml >${log_path}/stacl_wk3) >>${log_path}/stacl_wk3 2>&1
715-
print_info $? stacl_wk3
716-
717-
time (sed -i "s/waitk: 3/waitk: 5/g" config/transformer.yaml
718-
sed -i 's/save_model: "trained_models_3"/save_model: "trained_models_5"/g' config/transformer.yaml
719-
sed -i 's#init_from_checkpoint: "./trained_models/step_1/"#init_from_checkpoint: "./trained_models_3/step_1/"#g' config/transformer.yaml
720-
python -m paddle.distributed.launch train.py --config ./config/transformer.yaml >${log_path}/stacl_wk5) >>${log_path}/stacl_wk5 2>&1
721-
print_info $? stacl_wk5
722-
723710
time (sed -i "s/batch_size: 500/batch_size: 100/g" config/transformer.yaml
724-
sed -i 's#init_from_params: "trained_models/step_final/"#init_from_params: "./trained_models_5/step_1/"#g' config/transformer.yaml
711+
sed -i 's#init_from_params: "trained_models/step_final/"#init_from_params: "./trained_models/step_1/"#g' config/transformer.yaml
725712
python predict.py --config ./config/transformer.yaml >${log_path}/stacl_predict) >>${log_path}/stacl_predict 2>&1
726713
print_info $? stacl_predict
727714
}
@@ -1135,60 +1122,68 @@ fast_transformer
11351122
}
11361123
ernie-3.0(){
11371124
cd ${nlp_dir}/model_zoo/ernie-3.0/
1138-
if [ ! -f 'test.py' ];then
1139-
echo '模型测试文件不存在!'
1140-
unset http_proxy
1141-
unset https_proxy
1142-
#训练
1143-
python run_seq_cls.py --model_name_or_path ernie-3.0-medium-zh --dataset afqmc --output_dir ./best_models --export_model_dir best_models/ --do_train --do_eval --do_export --config=configs/default.yml --max_steps=2 --save_step=2 >${log_path}/ernie-3.0_train_seq_cls >>${log_path}/ernie-3.0_train_seq_cls 2>&1
1144-
print_info $? ernie-3.0_train_seq_cls
1145-
python run_token_cls.py --model_name_or_path ernie-3.0-medium-zh --dataset msra_ner --output_dir ./best_models --export_model_dir best_models/ --do_train --do_eval --do_export --config=configs/default.yml --max_steps=2 --save_step=2 >${log_path}/ernie-3.0_train_token_cls >>${log_path}/ernie-3.0_train_token_cls 2>&1
1146-
print_info $? ernie-3.0_train_token_cls
1147-
python run_qa.py --model_name_or_path ernie-3.0-medium-zh --dataset cmrc2018 --output_dir ./best_models --export_model_dir best_models/ --do_train --do_eval --do_export --config=configs/default.yml --max_steps=2 --save_step=2 >${log_path}/ernie-3.0_train_qa >>${log_path}/ernie-3.0_train_qa 2>&1
1148-
print_info $? ernie-3.0_train_qa
1149-
# 预测
1150-
python run_seq_cls.py --model_name_or_path best_models/afqmc/ --dataset afqmc --output_dir ./best_models --do_predict --config=configs/default.yml >${log_path}/ernie-3.0_predict_seq_cls >>${log_path}/ernie-3.0_predict_seq_cls 2>&1
1151-
print_info $? ernie-3.0_predict_seq_cls
1152-
python run_token_cls.py --model_name_or_path best_models/msra_ner/ --dataset msra_ner --output_dir ./best_models --do_predict --config=configs/default.yml >${log_path}/ernie-3.0_predict_token_cls >>${log_path}/ernie-3.0_predict_token_cls 2>&1
1153-
print_info $? ernie-3.0_predict_token_cls
1154-
python run_qa.py --model_name_or_path best_models/cmrc2018/ --dataset cmrc2018 --output_dir ./best_models --do_predict --config=configs/default.yml >${log_path}/ernie-3.0_predict_qa >>${log_path}/ernie-3.0_predict_qa 2>&1
1155-
print_info $? ernie-3.0_predict_qa
1156-
#压缩
1157-
python compress_seq_cls.py --model_name_or_path best_models/afqmc/ --dataset afqmc --output_dir ./best_models/afqmc --config=configs/default.yml --max_steps 10 --eval_steps 5 --save_steps 5 --save_steps 5 --algo_list mse --batch_size_list 4 >${log_path}/ernie-3.0_compress_seq_cls >>${log_path}/ernie-3.0_compress_seq_cls 2>&1
1158-
print_info $? ernie-3.0_compress_seq_cls
1159-
python compress_token_cls.py --model_name_or_path best_models/msra_ner/ --dataset msra_ner --output_dir ./best_models/msra_ner --config=configs/default.yml --max_steps 10 --eval_steps 5 --save_steps 5 --algo_list mse --batch_size_list 4 >${log_path}/ernie-3.0_compress_token_cls >>${log_path}/ernie-3.0_compress_token_cls 2>&1
1160-
print_info $? ernie-3.0_compress_token_cls
1161-
python compress_qa.py --model_name_or_path best_models/cmrc2018/ --dataset cmrc2018 --output_dir ./best_models/cmrc2018 --config=configs/default.yml --max_steps 10 --eval_steps 5 --save_steps 5 --algo_list mse --batch_size_list 4 >${log_path}/ernie-3.0_compress_qa >>${log_path}/ernie-3.0_compress_qa 2>&1
1162-
print_info $? ernie-3.0_compress_qa
1163-
else
1164-
python -m pytest ${nlp_dir}/model_zoo/ernie-3.0/ >${log_path}/ernie-3.0 >>${log_path}/ernie-3.0 2>&1
1165-
print_info $? ernie-3.0
1166-
fi
1125+
#训练
1126+
python run_seq_cls.py --model_name_or_path ernie-3.0-medium-zh --dataset afqmc --output_dir ./best_models --export_model_dir best_models/ --do_train --do_eval --do_export --config=configs/default.yml --max_steps=2 --save_step=2 >${log_path}/ernie-3.0_train_seq_cls >>${log_path}/ernie-3.0_train_seq_cls 2>&1
1127+
print_info $? ernie-3.0_train_seq_cls
1128+
python run_token_cls.py --model_name_or_path ernie-3.0-medium-zh --dataset msra_ner --output_dir ./best_models --export_model_dir best_models/ --do_train --do_eval --do_export --config=configs/default.yml --max_steps=2 --save_step=2 >${log_path}/ernie-3.0_train_token_cls >>${log_path}/ernie-3.0_train_token_cls 2>&1
1129+
print_info $? ernie-3.0_train_token_cls
1130+
python run_qa.py --model_name_or_path ernie-3.0-medium-zh --dataset cmrc2018 --output_dir ./best_models --export_model_dir best_models/ --do_train --do_eval --do_export --config=configs/default.yml --max_steps=2 --save_step=2 >${log_path}/ernie-3.0_train_qa >>${log_path}/ernie-3.0_train_qa 2>&1
1131+
print_info $? ernie-3.0_train_qa
1132+
# 预测
1133+
python run_seq_cls.py --model_name_or_path best_models/afqmc/ --dataset afqmc --output_dir ./best_models --do_predict --config=configs/default.yml >${log_path}/ernie-3.0_predict_seq_cls >>${log_path}/ernie-3.0_predict_seq_cls 2>&1
1134+
print_info $? ernie-3.0_predict_seq_cls
1135+
python run_token_cls.py --model_name_or_path best_models/msra_ner/ --dataset msra_ner --output_dir ./best_models --do_predict --config=configs/default.yml >${log_path}/ernie-3.0_predict_token_cls >>${log_path}/ernie-3.0_predict_token_cls 2>&1
1136+
print_info $? ernie-3.0_predict_token_cls
1137+
python run_qa.py --model_name_or_path best_models/cmrc2018/ --dataset cmrc2018 --output_dir ./best_models --do_predict --config=configs/default.yml >${log_path}/ernie-3.0_predict_qa >>${log_path}/ernie-3.0_predict_qa 2>&1
1138+
print_info $? ernie-3.0_predict_qa
1139+
#压缩
1140+
python compress_seq_cls.py --model_name_or_path best_models/afqmc/ --dataset afqmc --output_dir ./best_models/afqmc --config=configs/default.yml --max_steps 10 --eval_steps 5 --save_steps 5 --save_steps 5 --algo_list mse --batch_size_list 4 >${log_path}/ernie-3.0_compress_seq_cls >>${log_path}/ernie-3.0_compress_seq_cls 2>&1
1141+
print_info $? ernie-3.0_compress_seq_cls
1142+
python compress_token_cls.py --model_name_or_path best_models/msra_ner/ --dataset msra_ner --output_dir ./best_models/msra_ner --config=configs/default.yml --max_steps 10 --eval_steps 5 --save_steps 5 --algo_list mse --batch_size_list 4 >${log_path}/ernie-3.0_compress_token_cls >>${log_path}/ernie-3.0_compress_token_cls 2>&1
1143+
print_info $? ernie-3.0_compress_token_cls
1144+
python compress_qa.py --model_name_or_path best_models/cmrc2018/ --dataset cmrc2018 --output_dir ./best_models/cmrc2018 --config=configs/default.yml --max_steps 10 --eval_steps 5 --save_steps 5 --algo_list mse --batch_size_list 4 >${log_path}/ernie-3.0_compress_qa >>${log_path}/ernie-3.0_compress_qa 2>&1
1145+
print_info $? ernie-3.0_compress_qa
11671146
}
11681147
ernie-health(){
1169-
if [ ! -f 'test.py' ];then
1148+
cd ${nlp_dir}/tests/model_zoo/
1149+
if [ ! -f 'test_ernie-health.py' ];then
11701150
echo '模型测试文件不存在!'
11711151
else
1172-
python -m pytest ${nlp_dir}/model_zoo/ernie-health/ >${log_path}/ernie-health>>${log_path}/ernie-health 2>&1
1173-
print_info $? ernie-health
1152+
python -m pytest tests/model_zoo/test_ernie-health.py >${log_path}/ernie-health_unittest>>${log_path}/ernie-health_unittest 2>&1
1153+
print_info $? tests ernie-health_unittest
11741154
fi
11751155
}
11761156
uie(){
11771157
cd ${nlp_dir}/model_zoo/uie/
1178-
if [ ! -f 'test.py' ];then
1179-
echo '模型测试文件不存在!'
1180-
else
1181-
python -m pytest ${nlp_dir}/model_zoo/uie/ >${log_path}/uie>>${log_path}/uie 2>&1
1182-
print_info $? uie
1183-
fi
1158+
mkdir data && cd data && wget https://bj.bcebos.com/paddlenlp/datasets/uie/doccano_ext.json
1159+
python doccano.py --doccano_file ./data/doccano_ext.json --task_type ext --save_dir ./data --splits 0.8 0.2 0 --schema_lang ch >${log_path}/uie_doccano>>${log_path}/uie_doccano 2>&1
1160+
print_info $? uie_doccano
1161+
python -u -m paddle.distributed.launch finetune.py --device gpu --logging_steps 2 --save_steps 2 --eval_steps 2 --seed 42 \
1162+
--model_name_or_path uie-base --output_dir ./checkpoint/model_best --train_path data/train.txt --dev_path data/dev.txt \
1163+
--max_seq_length 512 --per_device_eval_batch_size 16 --per_device_train_batch_size 16 --num_train_epochs 100 --learning_rate 1e-5 \
1164+
--do_train --do_eval --do_export --export_model_dir ./checkpoint/model_best --label_names start_positions end_positions \
1165+
--overwrite_output_dir --disable_tqdm True --metric_for_best_model eval_f1 --load_best_model_at_end True \
1166+
--save_total_limit 1 --max_steps 2 >${log_path}/uie_train>>${log_path}/uie_train2>&1
1167+
print_info $? uie_train
1168+
python evaluate.py --model_path ./checkpoint/model_best --test_path ./data/dev.txt --batch_size 16 --max_seq_len 512 >${log_path}/uie_eval>>${log_path}/uie_eval 2>&1
1169+
print_info $? uie_eval
11841170
}
11851171
ernie-layout(){
11861172
cd ${nlp_dir}/model_zoo/ernie-layout/
1187-
if [ ! -f 'test.py' ];then
1188-
echo '模型测试文件不存在!'
1189-
else
1190-
python -m pytest ${nlp_dir}/model_zoo/ernie-layout/ >${log_path}/ernie-layout >>${log_path}/ernie-layout 2>&1
1191-
print_info $? ernie-layout
1192-
fi
1173+
# train ner
1174+
python -u run_ner.py --model_name_or_path ernie-layoutx-base-uncased --output_dir ./ernie-layoutx-base-uncased/models/funsd/ \
1175+
--dataset_name funsd --do_train --do_eval --max_steps 2 --eval_steps 2 --save_steps 2 --save_total_limit 1 --seed 1000 --overwrite_output_dir \
1176+
--load_best_model_at_end --pattern ner-bio --preprocessing_num_workers 4 --overwrite_cache false --doc_stride 128 --target_size 1000 \
1177+
--per_device_train_batch_size 4 --per_device_eval_batch_size 4 --learning_rate 2e-5 --lr_scheduler_type constant --gradient_accumulation_steps 1 \
1178+
--metric_for_best_model eval_f1 --greater_is_better true >${log_path}/ernie-layout_train>>${log_path}/ernie-layout_train 2>&1
1179+
print_info $? ernie-layout_train
1180+
# export ner
1181+
python export_model.py --task_type ner --model_path ./ernie-layoutx-base-uncased/models/funsd/ --output_path ./ner_export >${log_path}/ernie-layout_export>>${log_path}/ernie-layout_export2>&1
1182+
print_info $? ernie-layout_export
1183+
# deploy ner
1184+
cd ${nlp_dir}/model_zoo/ernie-layout/deploy/python
1185+
wget https://bj.bcebos.com/paddlenlp/datasets/document_intelligence/images.zip && unzip images.zip
1186+
python infer.py --model_path_prefix ../../ner_export/inference --task_type ner --lang "en" --batch_size 8 >${log_path}/ernie-layout_deploy>>${log_path}/ernie-layout_deploy 2>&1
1187+
print_info $? ernie-layout_deploy
11931188
}
11941189
$1

scripts/regression/run_release.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ else
9595
P0case_EXCODE=0
9696
fi
9797
if [ $P0case_EXCODE -ne 0 ] ; then
98-
cd logs
98+
cd model_logs/
9999
FF=`ls *_FAIL*|wc -l`
100100
echo -e "\033[31m ---- P0case failed number: ${FF} \033[0m"
101101
ls *_FAIL*

0 commit comments

Comments
 (0)