Skip to content

Commit 2d92b6b

Browse files
committed
merge develop
test=develop
2 parents c20db63 + c64d959 commit 2d92b6b

File tree

175 files changed

+8243
-2019
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

175 files changed

+8243
-2019
lines changed

paddle/fluid/API.spec

Lines changed: 20 additions & 22 deletions
Large diffs are not rendered by default.

paddle/fluid/framework/details/CMakeLists.txt

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ cc_library(rpc_op_handle SRCS rpc_op_handle.cc DEPS framework_proto scope place
99
cc_library(multi_devices_helper SRCS multi_devices_helper.cc DEPS graph graph_helper)
1010
cc_library(multi_devices_graph_print_pass SRCS multi_devices_graph_print_pass.cc DEPS multi_devices_helper)
1111
cc_library(multi_devices_graph_check_pass SRCS multi_devices_graph_check_pass.cc DEPS multi_devices_helper)
12+
cc_library(alloc_continuous_space_for_grad_pass SRCS alloc_continuous_space_for_grad_pass.cc DEPS graph graph_helper)
1213

1314
cc_library(variable_visitor SRCS variable_visitor.cc DEPS lod_tensor selected_rows)
1415

@@ -22,6 +23,8 @@ endif()
2223
if(WITH_GPU)
2324
nv_library(all_reduce_op_handle SRCS all_reduce_op_handle.cc DEPS op_handle_base scope lod_tensor ddim memory
2425
dynload_cuda variable_visitor)
26+
nv_library(fused_all_reduce_op_handle SRCS fused_all_reduce_op_handle.cc DEPS op_handle_base scope lod_tensor ddim memory
27+
dynload_cuda variable_visitor)
2528
if(WITH_DISTRIBUTE)
2629
nv_library(reduce_op_handle SRCS reduce_op_handle.cc DEPS op_handle_base variable_visitor scope
2730
ddim dynload_cuda selected_rows_functor sendrecvop_rpc)
@@ -35,6 +38,8 @@ if(WITH_GPU)
3538
else()
3639
cc_library(all_reduce_op_handle SRCS all_reduce_op_handle.cc DEPS op_handle_base scope lod_tensor ddim memory
3740
variable_visitor)
41+
cc_library(fused_all_reduce_op_handle SRCS fused_all_reduce_op_handle.cc DEPS op_handle_base scope lod_tensor ddim memory
42+
variable_visitor)
3843
if(WITH_DISTRIBUTE)
3944
cc_library(reduce_op_handle SRCS reduce_op_handle.cc DEPS op_handle_base variable_visitor scope
4045
ddim selected_rows_functor sendrecvop_rpc)
@@ -46,9 +51,7 @@ else()
4651
cc_library(fused_broadcast_op_handle SRCS fused_broadcast_op_handle.cc DEPS broadcast_op_handle)
4752
endif()
4853

49-
cc_library(data_balance_op_handle SRCS data_balance_op_handle.cc DEPS op_handle_base scope lod_tensor)
5054
cc_library(gather_op_handle SRCS gather_op_handle.cc DEPS op_handle_base scope ddim memory variable_visitor)
51-
cc_library(fuse_vars_op_handle SRCS fuse_vars_op_handle.cc DEPS op_handle_base scope)
5255

5356
if(WITH_GPU)
5457
cc_library(memory_optimize_helper SRCS memory_optimize_helper.cc DEPS graph graph_helper gpu_info)
@@ -69,7 +72,9 @@ cc_library(sequential_execution_pass SRCS sequential_execution_pass.cc DEPS grap
6972
cc_library(all_reduce_deps_pass SRCS all_reduce_deps_pass.cc DEPS graph graph_helper pass)
7073

7174
cc_library(multi_devices_graph_pass SRCS multi_devices_graph_pass.cc DEPS multi_devices_helper computation_op_handle
72-
scale_loss_grad_op_handle rpc_op_handle all_reduce_op_handle reduce_op_handle broadcast_op_handle data_balance_op_handle fused_broadcast_op_handle)
75+
scale_loss_grad_op_handle rpc_op_handle all_reduce_op_handle reduce_op_handle broadcast_op_handle fused_broadcast_op_handle)
76+
77+
cc_library(fuse_all_reduce_op_pass SRCS fuse_all_reduce_op_pass.cc DEPS graph graph_helper fused_all_reduce_op_handle)
7378

7479
set(SSA_GRAPH_EXECUTOR_DEPS graph framework_proto sequential_execution_pass modify_op_lock_and_record_event_pass all_reduce_deps_pass reference_count_pass eager_deletion_pass memory_optimize_pass inplace_op_pass)
7580
if (WITH_GPU)
@@ -98,5 +103,5 @@ cc_library(build_strategy SRCS build_strategy.cc DEPS
98103
graph_viz_pass multi_devices_graph_pass
99104
multi_devices_graph_print_pass multi_devices_graph_check_pass
100105
fuse_elewise_add_act_pass multi_batch_merge_pass
101-
fuse_relu_depthwise_conv_pass
102-
memory_optimize_pass lock_free_optimize_pass)
106+
fuse_relu_depthwise_conv_pass
107+
memory_optimize_pass lock_free_optimize_pass alloc_continuous_space_for_grad_pass fuse_all_reduce_op_pass)

paddle/fluid/framework/details/all_reduce_op_handle.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,8 @@
1111
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212
// See the License for the specific language governing permissions and
1313
// limitations under the License.
14-
#include <algorithm>
15-
1614
#include "paddle/fluid/framework/details/all_reduce_op_handle.h"
15+
#include <algorithm>
1716
#include "paddle/fluid/framework/details/container_cast.h"
1817
#include "paddle/fluid/framework/details/reduce_and_gather.h"
1918
#include "paddle/fluid/framework/details/variable_visitor.h"
@@ -56,6 +55,7 @@ void AllReduceOpHandle::RunImpl() {
5655
platform::RecordEvent record_event(Name());
5756

5857
WaitInputVarGenerated();
58+
5959
auto in_var_handles = DynamicCast<VarHandle>(this->Inputs());
6060
auto out_var_handles = DynamicCast<VarHandle>(this->Outputs());
6161
PADDLE_ENFORCE_EQ(

0 commit comments

Comments
 (0)