Skip to content

Commit 97b22f3

Browse files
authored
[CI] add_llm_unittest (#6913)
* [CI] add_llm_unittest * fix lint * update * update * update * fix lint * update * fix finetune output * fix lint * fix predict * update
1 parent dcad8b0 commit 97b22f3

File tree

8 files changed

+336
-13
lines changed

8 files changed

+336
-13
lines changed

scripts/regression/ci_case.sh

Lines changed: 4 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1076,17 +1076,10 @@ python -m pytest scripts/regression/test_taskflow.py >${log_path}/taskflow >>${l
10761076
print_info $? taskflow
10771077
}
10781078
transformers(){
1079-
echo ' RUN all transformers unittest'
1080-
cd ${nlp_dir}/tests/transformers/
1081-
for apicase in `ls`;do
1082-
if [[ ${apicase##*.} == "py" ]];then
1083-
continue
1084-
else
1085-
cd ${nlp_dir}
1086-
python -m pytest tests/transformers/${apicase}/test_*.py >${nlp_dir}/unittest_logs/${apicase}_unittest.log 2>&1
1087-
print_info $? tests ${apicase}_unittest
1088-
fi
1089-
done
1079+
echo ' RUN all LLMs unittest'
1080+
export RUN_SLOW_TEST=True
1081+
python -m pytest tests/llm/test_*.py >${nlp_dir}/unittest_logs/llm_unittest.log 2>&1
1082+
print_info $? llm_unittest
10901083
}
10911084
fast_generation(){
10921085
cd ${nlp_dir}/fast_generation/samples

scripts/regression/run_ci.sh

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ all_P0case_dic=(["waybill_ie"]=3 ["msra_ner"]=15 ["glue"]=2 ["bert"]=2 ["skep"]=
3232
["ofa"]=2 ["albert"]=2 ["SQuAD"]=20 ["lexical_analysis"]=5 ["seq2seq"]=5 ["word_embedding"]=5 \
3333
["ernie-ctm"]=5 ["distilbert"]=5 ["transformer"]=5 ["pet"]=5 ["efl"]=5 ["p-tuning"]=5 ["ernie-doc"]=20 ["transformer-xl"]=5 \
3434
["question_matching"]=5 ["ernie-csc"]=5 ["nptag"]=5 ["ernie-m"]=5 ["taskflow"]=5 ["clue"]=5 ["textcnn"]=5 \
35-
["fast_generation"]=10 ["ernie-3.0"]=5 ["ernie-layout"]=5 ["uie"]=5 ["ernie-health"]=5 \
35+
["fast_generation"]=10 ["ernie-3.0"]=5 ["ernie-layout"]=5 ["uie"]=5 ["ernie-health"]=5 ["transformers"]=5 \
3636
["ernie"]=2 ["ernie_m"]=5 ["ernie_layout"]=5 ["ernie_csc"]=5 ["ernie_ctm"]=5 ["ernie_doc"]=20 ["ernie_health"]=5 ["gpt-3"]=5)
3737
####################################
3838
# Insatll paddlepaddle-gpu
@@ -118,7 +118,7 @@ for file_name in `git diff --numstat upstream/${AGILE_COMPILE_BRANCH} |awk '{pri
118118
elif [[ ${!all_P0case_dic[*]} =~ ${dir2} ]];then
119119
P0case_list[${#P0case_list[*]}]=${dir2}
120120
elif [[ ${dir2} =~ "transformers" ]];then
121-
# P0case_list[${#P0case_list[*]}]=transformers
121+
P0case_list[${#P0case_list[*]}]=transformers
122122
if [[ ${!all_P0case_dic[*]} =~ ${dir3} ]];then
123123
P0case_list[${#P0case_list[*]}]=${dir3}
124124
fi
@@ -147,6 +147,8 @@ for file_name in `git diff --numstat upstream/${AGILE_COMPILE_BRANCH} |awk '{pri
147147
# P0case_list[${#P0case_list[*]}]=${dir2}
148148
# Normal_dic[${dir2}]="${dir1}/${dir2}/"
149149
fi
150+
elif [[ ${dir1} =~ "llm" ]];then # 模型升级
151+
P0case_list[${#P0case_list[*]}]=transformers
150152
elif [[ ${dir1} =~ "tests" ]];then # 新增单测
151153
if [[ ${dir2} =~ "transformers" ]] ;then
152154
if [[ ${dir3##*.} == "py" ]];then

tests/fixtures/llm/data/dev.json

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
{"src": "类型#上衣*材质#牛仔布*颜色#白色*风格#简约*图案#刺绣*衣样式#外套*衣款式#破洞", "tgt": "简约而不简单的牛仔外套,白色的衣身十分百搭。衣身多处有做旧破洞设计,打破单调乏味,增加一丝造型看点。衣身后背处有趣味刺绣装饰,丰富层次感,彰显别样时尚。"}
2+
{"src": "类型#裙*材质#针织*颜色#纯色*风格#复古*风格#文艺*风格#简约*图案#格子*图案#纯色*图案#复古*裙型#背带裙*裙长#连衣裙*裙领型#半高领", "tgt": "这款BRAND针织两件套连衣裙,简约的纯色半高领针织上衣,修饰着颈部线,尽显优雅气质。同时搭配叠穿起一条背带式的复古格纹裙,整体散发着一股怀旧的时髦魅力,很是文艺范。"}
3+
{"src": "类型#上衣*风格#嘻哈*图案#卡通*图案#印花*图案#撞色*衣样式#卫衣*衣款式#连帽", "tgt": "嘻哈玩转童年,随时<UNK>,没错,出街还是要靠卫衣来装酷哦!时尚个性的连帽设计,率性有范还防风保暖。还有胸前撞色的卡通印花设计,靓丽抢眼更富有趣味性,加上前幅大容量又时尚美观的袋鼠兜,简直就是孩子耍帅装酷必备的利器。"}
4+
{"src": "类型#裤*风格#英伦*风格#简约", "tgt": "裤子是简约大方的版型设计,带来一种极简主义风格而且不乏舒适优雅感,是衣橱必不可少的一件百搭单品。标志性的logo可以体现出一股子浓郁的英伦风情,轻而易举带来独一无二的<UNK>体验。"}
5+
{"src": "类型#裙*裙下摆#弧形*裙腰型#高腰*裙长#半身裙*裙款式#不规则*裙款式#收腰", "tgt": "这款来自梵凯的半身裙富有十足的设计感,采用了别致的不规则设计,凸显出时尚前卫的格调,再搭配俏皮的高腰设计,收腰提臀的同时还勾勒出优美迷人的身材曲线,而且还帮你拉长腿部比例,释放出优雅娇俏的小女人味。并且独特的弧形下摆还富有流畅的线条美,一颦一动间展现出灵动柔美的气质。"}

tests/fixtures/llm/data/train.json

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
{"src": "类型#裤*版型#宽松*风格#性感*图案#线条*裤型#阔腿裤", "tgt": "宽松的阔腿裤这两年真的吸粉不少,明星时尚达人的心头爱。毕竟好穿时尚,谁都能穿出腿长2米的效果宽松的裤腿,当然是遮肉小能手啊。上身随性自然不拘束,面料亲肤舒适贴身体验感棒棒哒。系带部分增加设计看点,还让单品的设计感更强。腿部线条若隐若现的,性感撩人。颜色敲温柔的,与裤子本身所呈现的风格有点反差萌。"}
2+
{"src": "类型#裙*风格#简约*图案#条纹*图案#线条*图案#撞色*裙型#鱼尾裙*裙袖长#无袖", "tgt": "圆形领口修饰脖颈线条,适合各种脸型,耐看有气质。无袖设计,尤显清凉,简约横条纹装饰,使得整身人鱼造型更为生动立体。加之撞色的鱼尾下摆,深邃富有诗意。收腰包臀,修饰女性身体曲线,结合别出心裁的鱼尾裙摆设计,勾勒出自然流畅的身体轮廓,展现了婀娜多姿的迷人姿态。"}
3+
{"src": "类型#上衣*版型#宽松*颜色#粉红色*图案#字母*图案#文字*图案#线条*衣样式#卫衣*衣款式#不规则", "tgt": "宽松的卫衣版型包裹着整个身材,宽大的衣身与身材形成鲜明的对比描绘出纤瘦的身形。下摆与袖口的不规则剪裁设计,彰显出时尚前卫的形态。被剪裁过的样式呈现出布条状自然地垂坠下来,别具有一番设计感。线条分明的字母样式有着花式的外观,棱角分明加上具有少女元气的枣红色十分有年轻活力感。粉红色的衣身把肌肤衬托得很白嫩又健康。"}
4+
{"src": "类型#裙*版型#宽松*材质#雪纺*风格#清新*裙型#a字*裙长#连衣裙", "tgt": "踩着轻盈的步伐享受在午后的和煦风中,让放松与惬意感为你免去一身的压力与束缚,仿佛要将灵魂也寄托在随风摇曳的雪纺连衣裙上,吐露出<UNK>微妙而又浪漫的清新之意。宽松的a字版型除了能够带来足够的空间,也能以上窄下宽的方式强化立体层次,携带出自然优雅的曼妙体验。"}
5+
{"src": "类型#上衣*材质#棉*颜色#蓝色*风格#潮*衣样式#polo*衣领型#polo领*衣袖长#短袖*衣款式#拼接", "tgt": "想要在人群中脱颖而出吗?那么最适合您的莫过于这款polo衫短袖,采用了经典的polo领口和柔软纯棉面料,让您紧跟时尚潮流。再配合上潮流的蓝色拼接设计,使您的风格更加出众。就算单从选料上来说,这款polo衫的颜色沉稳经典,是这个季度十分受大众喜爱的风格了,而且兼具舒适感和时尚感。"}
6+
{"src": "类型#上衣*版型#h*材质#蚕丝*风格#复古*图案#条纹*图案#复古*图案#撞色*衣样式#衬衫*衣领型#小立领", "tgt": "小女人十足的条纹衬衣,缎面一点点的复古,还有蓝绿色这种高级气质复古色,真丝材质,撞色竖条纹特别的现代感味道,直h型的裁剪和特别的衣长款式,更加独立性格。双层小立领,更显脸型。"}
7+
{"src": "类型#裙*材质#网纱*颜色#粉红色*图案#线条*图案#刺绣*裙腰型#高腰*裙长#连衣裙*裙袖长#短袖*裙领型#圆领", "tgt": "这款连衣裙,由上到下都透出一丝迷人诱惑的女性魅力,经典圆领型,开口度恰好,露出你的迷人修长的脖颈线条,很是优雅气质,短袖设计,在这款上竟是撩人美貌,高腰线,散开的裙摆,到小腿的长度,遮住了腿部粗的部分,对身材有很好的修饰作用,穿起来很女神;裙身粉红色花枝重工刺绣,让人一眼难忘!而且在这种网纱面料上做繁复图案的绣花,是很考验工艺的,对机器的要求会更高,更加凸显我们的高品质做工;"}
8+
{"src": "类型#上衣*颜色#纯色*图案#纯色*图案#文字*图案#印花*衣样式#卫衣", "tgt": "一款非常简洁大方的纯色卫衣,设计点在于胸前的“<UNK><UNK>”的中文字印花,新颖特别,让人眼前一亮。简单又吸睛的款式,而且不失时髦感,很适合个性年轻人。"}
9+
{"src": "类型#上衣*版型#宽松*颜色#黑色*颜色#灰色*颜色#姜黄色*风格#休闲*图案#线条*图案#撞色*衣样式#毛衣*衣袖型#落肩袖", "tgt": "看惯了灰色的冷淡和黑色的沉闷感,来一点醒目的彩色增添点活力吧。亮眼又吸睛的姜黄色色调,嫩肤显白非常的有设计感。趣味的撞色和宽松的版型相交辉映,修饰身形小缺点的同时,时尚又百搭。优雅的落肩袖,轻松修饰肩部线条,让毛衣上身凸显出一丝慵懒随性的休闲感,时尚魅力尽显。"}
10+
{"src": "类型#上衣*风格#休闲*风格#潮*图案#印花*图案#撞色*衣样式#衬衫*衣领型#圆领*衣长#中长款*衣长#常规*衣袖长#无袖", "tgt": "黑与白,两种最极端的颜色却轻松搭配成了经典,就像此款衬衣,无需过多装饰,仅色调就足够醒目个性,受潮<UNK>所喜欢。做了无袖中长款的样式,走路带风的感觉着实不错,圆领的设计,不是常规的衬衫领,少了点正式反而有种休闲感觉,适合孩子们穿着。后背大面积撞色印花装点,是时尚潮流的象征,也让衣衣不至于单调,轻松就能穿出彩。"}
11+
{"src": "类型#上衣*版型#宽松*风格#街头*风格#休闲*风格#朋克*图案#字母*图案#文字*图案#印花*衣样式#卫衣*衣款式#连帽*衣款式#对称", "tgt": "个性休闲风的连帽卫衣造型时髦大方,宽松的版型剪裁让肉肉的小宝贝也可以穿着,保暖的连帽设计时刻给予宝贝温柔的呵护,袖子和后背别致时髦的字母印花点缀,满满的街头元素融入,演绎休闲朋克风,对称的小口袋美观大方,方便放置更多的随身物品。"}
12+
{"src": "类型#裙*裙款式#链条", "tgt": "简单大气的设计,不费吹灰之力就能搭配的时髦范儿。时尚的配色一点都不觉得平淡了,有种浑然天成的大气感。强调了整体的装饰,和谐又不失个性,搭配裤装帅气十足,搭配裙子精致优雅。链条和肩带的搭配让使用感更加舒服,单肩手提都好看。"}
13+
{"src": "类型#裤*版型#显瘦*材质#牛仔布*颜色#深蓝色*风格#复古*图案#复古*图案#线条*裤腰型#高腰*裤口#微喇裤", "tgt": "深蓝色的高腰牛仔裤,修身的款式勾勒出纤细的美腿。牛仔裤的裤脚设计<UNK>张开的喇叭型,巧妙地修饰了小腿的线条,洋溢着复古的年代感。"}
14+
{"src": "类型#上衣*风格#清新*风格#潮*风格#性感*图案#条纹*图案#蝴蝶结*衣样式#衬衫*衣领型#一字领*衣门襟#系带*衣款式#不对称", "tgt": "这是一件显得特别清新的衬衣,采用了条纹的设计,给予人一种甜美可人的气质。并且融合了别致的斜肩一字领设计,高调的展示出性感的锁骨,将迷人的香肩展现在外,性感中不失去清纯的气息。袖口处的蝴蝶结系带装饰,增添了俏皮的韵味,简洁大方。且在下摆处采用了不对称的设计,增强了视觉效果,更显潮流。"}
15+
{"src": "类型#裤*材质#牛仔布*风格#复古*图案#复古*裤型#直筒裤*裤款式#纽扣*裤腰型#高腰", "tgt": "作为基础款单品,牛仔裤也<UNK><UNK>,想要呈现给大家的是——每次搭配都有新感觉。裤子经过复古做旧处理,风格鲜明,也很注重细节,连纽扣也做了统一的做旧处理,融入个性十足的磨破设计,高腰直筒basic裤型,修饰身材,穿出高挑长腿。"}
16+
{"src": "类型#上衣*版型#宽松*版型#显瘦*图案#线条*图案#刺绣*衣样式#针织衫*衣领型#v领", "tgt": "一款温暖柔软又富有弹性的针织衫,不仅可以抵御严寒侵袭,还能更好地进行搭配。v领的设计,能勾勒出迷人的天鹅颈以及衬托出娇小的脸型。宽松又别致的剪裁,能从视觉上显露纤长的下半身,起到显瘦的效果。直筒造型的袖子,修饰出优美的手臂线条,衣身上的方格刺绣,时尚又吸睛。"}
17+
{"src": "类型#上衣*颜色#绿色*风格#清新*图案#线条*衣样式#衬衫*衣领型#翻领", "tgt": "绿色的衣身上镶嵌着<UNK>,就是这款衬衫最大的迷人之处,“红花配绿叶”般的色调,将清新气息阐述的淋漓尽致。经典的翻领更是贴心,修饰颈部线条的同时,尽显精致干练的气质,出街轻松凹造型。"}
18+
{"src": "类型#上衣*图案#字母*图案#文字*图案#印花*图案#撞色*衣样式#外套*衣门襟#拉链*衣款式#拉链", "tgt": "这款外套采用了撞色拉链织带以及字母印花设计。这两种元素的融入使外套不会显得过于单调沉闷,吸睛而亮眼,充满年轻与朝气感,非常减龄。"}
19+
{"src": "类型#裙*版型#显瘦*版型#h*风格#复古*图案#复古*图案#刺绣*裙长#连衣裙*裙袖长#长袖*裙领型#翻领*裙衣门襟#单排扣", "tgt": "本款连衣裙整体采用h型的轮廓设计,藏肉显瘦,不挑身材,适合各种身形的人穿着。小翻领的领口设计,使得本款连衣裙穿在身上看起来十分的精神帅气,具有青春活力。单排扣的衣门襟设计,又给本款连衣裙带来了一丝的复古味道。裙身上的刺绣花朵装饰,使得本款连衣裙不显得单调,富有层次感,上身给人一种独特的时尚魅力。长袖的设计,更加的贴合手臂曲线,上身更加的舒适贴身。"}
20+
{"src": "类型#上衣*颜色#粉色*风格#清新*衣样式#外套*衣样式#西装*衣门襟#双排扣", "tgt": "这款外套设计成西装的版型,彰显经典优雅的气质,结合了粉色又添清新气息,甜美百搭时尚感满满。利落的版型简洁流畅,亮色双排扣更添精致感。"}

tests/fixtures/llm/llama.yaml

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
pretrain:
2+
default:
3+
model_type: llama
4+
model_name_or_path: __internal_testing__/micro-random-llama
5+
input_dir: ./llm/llama/data/
6+
output: ./output/pretrain
7+
weight_decay: 0.01
8+
max_steps: 2
9+
save_steps: 2
10+
eval_steps: 2
11+
logging_steps: 1
12+
warmup_steps: 2
13+
warmup_ratio: 0.01
14+
per_device_train_batch_size: 1
15+
per_device_eval_batch_size: 1
16+
device: gpu
17+
data_impl: mmap
18+
fp16: true
19+
fp16_opt_level: O2
20+
do_train: true
21+
do_predict: true
22+
use_flash_attention: 1
23+
use_fused_rms_norm: 0
24+
continue_training: 1
25+
26+
finetune:
27+
default:
28+
model_name_or_path: __internal_testing__/micro-random-llama
29+
dataset_name_or_path: ./tests/fixtures/llm/data
30+
output_dir: ./checkpoints/llama_sft_ckpts
31+
save_steps: 2
32+
max_steps: 2
33+
per_device_train_batch_size: 1
34+
per_device_eval_batch_size: 1
35+
tensor_parallel_degree: 1
36+
pipeline_parallel_degree: 1
37+
38+
quant:
39+
default:
40+
dataset_name_or_path: ./tests/fixtures/llm/data
41+
per_device_train_batch_size: 2
42+
per_device_eval_batch_size: 2
43+
model_name_or_path: ./checkpoints/llama_sft_ckpts/checkpoint-2
44+
output_dir: ./checkpoints/llama_ptq_ckpts
45+
46+
merge_tp_params:
47+
default:
48+
model_name_or_path: ./checkpoints/llama_sft_ckpts/checkpoint-2
49+
50+
merge_lora_params:
51+
default:
52+
model_name_or_path: __internal_testing__/micro-random-llama
53+
lora_path: ./checkpoints/llama_lora_ckpts/checkpoint-2
54+
55+
predict:
56+
default:
57+
model_name_or_path: __internal_testing__/micro-random-llama
58+
batch_size: 1
59+
data_file: ./tests/fixtures/llm/data/dev.json
60+
dtype: float16
61+
mode: dynamic

tests/llm/__init__.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.

0 commit comments

Comments
 (0)