@@ -9,6 +9,7 @@ cc_library(rpc_op_handle SRCS rpc_op_handle.cc DEPS framework_proto scope place
9
9
cc_library (multi_devices_helper SRCS multi_devices_helper.cc DEPS graph graph_helper )
10
10
cc_library (multi_devices_graph_print_pass SRCS multi_devices_graph_print_pass.cc DEPS multi_devices_helper )
11
11
cc_library (multi_devices_graph_check_pass SRCS multi_devices_graph_check_pass.cc DEPS multi_devices_helper )
12
+ cc_library (alloc_continuous_space_for_grad_pass SRCS alloc_continuous_space_for_grad_pass.cc DEPS graph graph_helper )
12
13
13
14
cc_library (variable_visitor SRCS variable_visitor.cc DEPS lod_tensor selected_rows )
14
15
@@ -22,6 +23,8 @@ endif()
22
23
if (WITH_GPU )
23
24
nv_library (all_reduce_op_handle SRCS all_reduce_op_handle.cc DEPS op_handle_base scope lod_tensor ddim memory
24
25
dynload_cuda variable_visitor )
26
+ nv_library (fused_all_reduce_op_handle SRCS fused_all_reduce_op_handle.cc DEPS op_handle_base scope lod_tensor ddim memory
27
+ dynload_cuda variable_visitor )
25
28
if (WITH_DISTRIBUTE )
26
29
nv_library (reduce_op_handle SRCS reduce_op_handle.cc DEPS op_handle_base variable_visitor scope
27
30
ddim dynload_cuda selected_rows_functor sendrecvop_rpc )
@@ -35,6 +38,8 @@ if(WITH_GPU)
35
38
else ()
36
39
cc_library (all_reduce_op_handle SRCS all_reduce_op_handle.cc DEPS op_handle_base scope lod_tensor ddim memory
37
40
variable_visitor )
41
+ cc_library (fused_all_reduce_op_handle SRCS fused_all_reduce_op_handle.cc DEPS op_handle_base scope lod_tensor ddim memory
42
+ variable_visitor )
38
43
if (WITH_DISTRIBUTE )
39
44
cc_library (reduce_op_handle SRCS reduce_op_handle.cc DEPS op_handle_base variable_visitor scope
40
45
ddim selected_rows_functor sendrecvop_rpc )
@@ -71,6 +76,8 @@ cc_library(all_reduce_deps_pass SRCS all_reduce_deps_pass.cc DEPS graph graph_he
71
76
cc_library (multi_devices_graph_pass SRCS multi_devices_graph_pass.cc DEPS multi_devices_helper computation_op_handle
72
77
scale_loss_grad_op_handle rpc_op_handle all_reduce_op_handle reduce_op_handle broadcast_op_handle data_balance_op_handle fused_broadcast_op_handle )
73
78
79
+ cc_library (fuse_all_reduce_op_pass SRCS fuse_all_reduce_op_pass.cc DEPS graph graph_helper fused_all_reduce_op_handle )
80
+
74
81
set (SSA_GRAPH_EXECUTOR_DEPS graph framework_proto sequential_execution_pass modify_op_lock_and_record_event_pass all_reduce_deps_pass reference_count_pass eager_deletion_pass memory_optimize_pass inplace_op_pass )
75
82
if (WITH_GPU )
76
83
list (APPEND SSA_GRAPH_EXECUTOR_DEPS reference_count_pass )
@@ -98,5 +105,5 @@ cc_library(build_strategy SRCS build_strategy.cc DEPS
98
105
graph_viz_pass multi_devices_graph_pass
99
106
multi_devices_graph_print_pass multi_devices_graph_check_pass
100
107
fuse_elewise_add_act_pass multi_batch_merge_pass
101
- fuse_relu_depthwise_conv_pass
102
- memory_optimize_pass lock_free_optimize_pass )
108
+ fuse_relu_depthwise_conv_pass
109
+ memory_optimize_pass lock_free_optimize_pass alloc_continuous_space_for_grad_pass fuse_all_reduce_op_pass )
0 commit comments