File tree Expand file tree Collapse file tree 1 file changed +15
-3
lines changed
Expand file tree Collapse file tree 1 file changed +15
-3
lines changed Original file line number Diff line number Diff line change @@ -1037,9 +1037,7 @@ class TrainingArguments:
10371037 )
10381038 zcc_ema_loss_threshold : Optional [float ] = field (
10391039 default = None ,
1040- metadata = {
1041- "help" : "If set not None, only do EMA when the training loss is smaller than the threshold value"
1042- },
1040+ metadata = {"help" : "If set not None, only do EMA when the training loss is smaller than the threshold value" },
10431041 )
10441042 save_tokenizer : Optional [bool ] = field (
10451043 default = True ,
@@ -1354,6 +1352,8 @@ def __post_init__(self):
13541352 "enable_offload_queue" ,
13551353 "use_dualpipev" ,
13561354 "enable_dynamic_shape" ,
1355+ "sync_moment" ,
1356+ "sync_param" ,
13571357 ]:
13581358 raise ValueError (
13591359 f"Found unknown pipeline mode config { x } , accpet config is disable_p2p_cache_shape, disable_partial_send_recv."
@@ -1404,6 +1404,18 @@ def __post_init__(self):
14041404 "use_dualpipev" : "use_dualpipev" in pipeline_parallel_config ,
14051405 "enable_dynamic_shape" : "enable_dynamic_shape" in pipeline_parallel_config ,
14061406 }
1407+
1408+ pp_sync_param = "sync_param" in pipeline_parallel_config
1409+ pp_sync_moment = "sync_moment" in pipeline_parallel_config
1410+
1411+ if pp_sync_param :
1412+ logger .info ("setting pp sync_param" )
1413+ strategy .hybrid_configs ["pp_configs" ].sync_param = True
1414+
1415+ if pp_sync_moment :
1416+ logger .info ("setting pp sync_moment" )
1417+ strategy .hybrid_configs ["pp_configs" ].sync_moment = True
1418+
14071419 if dygraph_pp_configs ["dp_comm_overlap" ]:
14081420 raise ValueError ("overlap has accuracy issue" ) # TODO: fix `overalap` + `delay_scale` issue
14091421
You can’t perform that action at this time.
0 commit comments