@@ -288,7 +288,7 @@ function llama_dygraph_auto_bs4_bf16_SD2() {
288
288
--tensor_parallel_degree 1 \
289
289
--sharding " stage1" \
290
290
--data_parallel_config " enable_allreduce_avg_in_gradinent_scale gradient_sync_after_accumulate" \
291
- --sharding_parallel_config $sharding_config \
291
+ --sharding_parallel_config " $sharding_config " \
292
292
--to_static 0 \
293
293
--amp_custom_black_list " reduce_sum" " c_softmax_with_cross_entropy" \
294
294
--amp_custom_white_list " lookup_table" " lookup_table_v2" \
@@ -301,7 +301,7 @@ function llama_dygraph_auto_bs4_bf16_SD2() {
301
301
echo " case=$case_name sharding_config=$sharding_config acc_step=$acc_step "
302
302
if [ " $case_name " = " default" ]; then
303
303
loss_base=9.23504105
304
- elif [ " $case_name " = " tensor_fusion_overlap" ]; then
304
+ elif [[ " $case_name " =~ " tensor_fusion_overlap" ] ]; then
305
305
if [ $acc_step -eq 1 ]; then
306
306
loss_base=9.23504868
307
307
else
@@ -1826,7 +1826,7 @@ function llama_align_dygraph_dy2st_pir_auto_grad_merge_bs2_fp32_DP1-MP1-PP1() {
1826
1826
rm -rf $case_log_dir
1827
1827
rm -rf ${log_path} /$FUNCNAME
1828
1828
1829
- /usr/bin/ python -u -m paddle.distributed.launch \
1829
+ python -u -m paddle.distributed.launch \
1830
1830
--gpus " 0" \
1831
1831
--log_dir $case_log_dir \
1832
1832
run_pretrain_auto.py \
@@ -2011,14 +2011,19 @@ function llama_align_dy2st_fthenb_and_vpp_auto_bs2_fp32_DP1-MP1-PP4() {
2011
2011
fi
2012
2012
echo " result: $pp_mode loss=$loss "
2013
2013
done
2014
+ loss_base_fthenb=10.24240494
2015
+ loss_base_vpp=10.24149513 # Paddle PR#74530
2014
2016
ips=-1
2015
2017
mem=-1
2016
2018
ips_base=-1
2017
2019
mem_base=-1
2018
2020
for step in $( seq 1 $max_step ) ; do
2019
2021
echo " step=$step fthenb loss: ${loss1_array[$step-1]} , vpp loss: ${loss2_array[$step-1]} "
2020
2022
done
2021
- check_result $FUNCNAME ${loss1} ${loss2} ${ips_base} ${ips} ${mem_base} ${mem}
2023
+ echo " FThenB check"
2024
+ check_result $FUNCNAME ${loss_base_fthenb} ${loss1} ${ips_base} ${ips} ${mem_base} ${mem}
2025
+ echo " VPP check"
2026
+ check_result $FUNCNAME ${loss_base_vpp} ${loss2} ${ips_base} ${ips} ${mem_base} ${mem}
2022
2027
echo " =========== $FUNCNAME run end ==========="
2023
2028
}
2024
2029
0 commit comments