Skip to content

Commit 2273ee7

Browse files
authored
[Distributed] Support pp non batch comm (#8097) (#8222)
* add disable_non_batch_p2p_comm to pipeline_parallel_config
1 parent 7b493a8 commit 2273ee7

File tree

1 file changed

+5
-0
lines changed

1 file changed

+5
-0
lines changed

paddlenlp/trainer/training_args.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,7 @@ class TrainingArguments:
253253
enable_release_grads, reduce peak memory usage by releasing gradients after each iteration. The creation of gradients will be postponed until backward propagation of the next iteration.
254254
enable_overlap_p2p_comm, overlap p2p communication with computation.
255255
enable_clear_every_step_cache, clear every step cache for pipeline parallel.
256+
disable_non_batch_p2p_comm, disable batched send/recv in pipeline parallel mode.
256257
sharding_parallel_config (`str`, *optional*)(
257258
Some additional config it highly affect the useage of sharding parallel, we provide some option to config it.
258259
following config is support:
@@ -616,6 +617,7 @@ class TrainingArguments:
616617
"enable_sharding_comm_overlap, fuse sharding stage 1 parallel gradient communication. \n"
617618
"enable_overlap_p2p_comm, overlap p2p communication with computation. \n"
618619
"enable_clear_every_step_cache, clear every step cache for pipeline parallel. \n"
620+
"disable_batch_p2p_comm, disable batched send/recv in pipeline parallel mode. \n"
619621
)
620622
},
621623
)
@@ -993,6 +995,7 @@ def __post_init__(self):
993995
"enable_dp_comm_overlap",
994996
"enable_clear_every_step_cache",
995997
"enable_overlap_p2p_comm",
998+
"disable_batch_p2p_comm",
996999
]:
9971000
raise ValueError(
9981001
f"Found unknown pipeline mode config {x}, accpet config is disable_p2p_cache_shape, disable_partial_send_recv."
@@ -1025,6 +1028,7 @@ def __post_init__(self):
10251028
"release_gradients": "enable_release_grads" in pipeline_parallel_config,
10261029
"overlap_p2p_comm": "enable_overlap_p2p_comm" in pipeline_parallel_config,
10271030
"clear_every_step_cache": "enable_clear_every_step_cache" in pipeline_parallel_config,
1031+
"use_batch_p2p_comm": "disable_batch_p2p_comm" not in pipeline_parallel_config,
10281032
}
10291033
if dygraph_pp_configs["dp_comm_overlap"]:
10301034
raise ValueError("overlap has accuracy issue") # TODO: fix `overalap` + `delay_scale` issue
@@ -1249,6 +1253,7 @@ def is_segment_parallel_supported():
12491253
# "enable_dp_comm_overlap", # no implemenation for auto_parallel
12501254
# "enable_sharding_comm_overlap", # no implemenation for auto_parallel
12511255
# "enable_timer", # no implemenation for auto_parallel
1256+
# "disable_batch_p2p_comm", # no implemenation for auto_parallel
12521257
]:
12531258
raise ValueError(
12541259
f"Found unknown pipeline mode config {x}, accpet config is enable_send_recv_overlap."

0 commit comments

Comments
 (0)