Skip to content

Commit 8212b53

Browse files
authored
[AutoParallel] enable ci for dp amp clip (#9062)
* Update ci_case_auto.sh * Update ci_case_auto.sh * Update ci_case_auto.sh * Update ci_case_auto.sh * Update ci_case_auto.sh * Update ci_case_auto.sh
1 parent c4f7acf commit 8212b53

File tree

1 file changed

+7
-7
lines changed

1 file changed

+7
-7
lines changed

scripts/distribute/ci_case_auto.sh

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1169,7 +1169,7 @@ function llama_align_dygraph_dy2st_auto_bs2_bf16_DP2-MP1-PP1() {
11691169
export FLAGS_call_stack_level=3
11701170
export NVIDIA_TF32_OVERRIDE=0
11711171
export FLAGS_enable_pir_api=1
1172-
export FLAGS_max_inplace_grad_add=3
1172+
export FLAGS_max_inplace_grad_add=4
11731173

11741174
task_name="llama_align_dygraph_dy2st_auto_bs2_bf16_dp2"
11751175
case_out_dir="output/$task_name"
@@ -1191,7 +1191,7 @@ function llama_align_dygraph_dy2st_auto_bs2_bf16_DP2-MP1-PP1() {
11911191
--weight_decay 0.01 \
11921192
--warmup_ratio 0.01 \
11931193
--warmup_steps 30 \
1194-
--max_grad_norm 0.0 \
1194+
--max_grad_norm 1.0 \
11951195
--learning_rate 3e-05 \
11961196
--min_learning_rate 3e-06 \
11971197
--max_steps 10 \
@@ -1217,17 +1217,17 @@ function llama_align_dygraph_dy2st_auto_bs2_bf16_DP2-MP1-PP1() {
12171217
--recompute_use_reentrant true \
12181218
--recompute_granularity full \
12191219
--pp_recompute_interval 0 \
1220-
--bf16 1\
1220+
--bf16 1 \
12211221
--fp16_opt_level "O2" \
12221222
--amp_custom_black_list "reduce_sum" "c_softmax_with_cross_entropy" \
12231223
--amp_custom_white_list "lookup_table" "lookup_table_v2" \
12241224
--amp_master_grad 1 \
12251225
--fuse_attention_ffn true \
1226-
--fuse_attention_qkv false \
1226+
--fuse_attention_qkv true \
12271227
--fuse_sequence_parallel_allreduce false \
12281228
--use_flash_attention 0 \
12291229
--use_fused_rope false \
1230-
--use_fused_rms_norm 0 \
1230+
--use_fused_rms_norm 1 \
12311231
--max_seq_length 4096 \
12321232
--sep_parallel_degree 1 \
12331233
--sequence_parallel false \
@@ -1244,9 +1244,9 @@ function llama_align_dygraph_dy2st_auto_bs2_bf16_DP2-MP1-PP1() {
12441244
ips=-1
12451245
mem=-1
12461246
echo "result: to_static=$to_static loss=$loss ips=$ips mem=$mem"
1247-
loss_base=10.06303482
1247+
loss_base=9.97198105
12481248
if [ $IS_A100 -ne 0 ];then
1249-
loss_base=10.24704742
1249+
loss_base=10.18783569
12501250
fi
12511251
ips_base=-1
12521252
mem_base=-1

0 commit comments

Comments
 (0)