@@ -9,6 +9,7 @@ cc_library(rpc_op_handle SRCS rpc_op_handle.cc DEPS framework_proto scope place
9
9
cc_library (multi_devices_helper SRCS multi_devices_helper.cc DEPS graph graph_helper )
10
10
cc_library (multi_devices_graph_print_pass SRCS multi_devices_graph_print_pass.cc DEPS multi_devices_helper )
11
11
cc_library (multi_devices_graph_check_pass SRCS multi_devices_graph_check_pass.cc DEPS multi_devices_helper )
12
+ cc_library (alloc_continuous_space_for_grad_pass SRCS alloc_continuous_space_for_grad_pass.cc DEPS graph graph_helper )
12
13
13
14
cc_library (variable_visitor SRCS variable_visitor.cc DEPS lod_tensor selected_rows )
14
15
@@ -22,6 +23,8 @@ endif()
22
23
if (WITH_GPU )
23
24
nv_library (all_reduce_op_handle SRCS all_reduce_op_handle.cc DEPS op_handle_base scope lod_tensor ddim memory
24
25
dynload_cuda variable_visitor )
26
+ nv_library (fused_all_reduce_op_handle SRCS fused_all_reduce_op_handle.cc DEPS op_handle_base scope lod_tensor ddim memory
27
+ dynload_cuda variable_visitor )
25
28
if (WITH_DISTRIBUTE )
26
29
nv_library (reduce_op_handle SRCS reduce_op_handle.cc DEPS op_handle_base variable_visitor scope
27
30
ddim dynload_cuda selected_rows_functor sendrecvop_rpc )
@@ -35,6 +38,8 @@ if(WITH_GPU)
35
38
else ()
36
39
cc_library (all_reduce_op_handle SRCS all_reduce_op_handle.cc DEPS op_handle_base scope lod_tensor ddim memory
37
40
variable_visitor )
41
+ cc_library (fused_all_reduce_op_handle SRCS fused_all_reduce_op_handle.cc DEPS op_handle_base scope lod_tensor ddim memory
42
+ variable_visitor )
38
43
if (WITH_DISTRIBUTE )
39
44
cc_library (reduce_op_handle SRCS reduce_op_handle.cc DEPS op_handle_base variable_visitor scope
40
45
ddim selected_rows_functor sendrecvop_rpc )
46
51
cc_library (fused_broadcast_op_handle SRCS fused_broadcast_op_handle.cc DEPS broadcast_op_handle )
47
52
endif ()
48
53
49
- cc_library (data_balance_op_handle SRCS data_balance_op_handle.cc DEPS op_handle_base scope lod_tensor )
50
54
cc_library (gather_op_handle SRCS gather_op_handle.cc DEPS op_handle_base scope ddim memory variable_visitor )
51
- cc_library (fuse_vars_op_handle SRCS fuse_vars_op_handle.cc DEPS op_handle_base scope )
52
55
53
56
if (WITH_GPU )
54
57
cc_library (memory_optimize_helper SRCS memory_optimize_helper.cc DEPS graph graph_helper gpu_info )
@@ -69,7 +72,9 @@ cc_library(sequential_execution_pass SRCS sequential_execution_pass.cc DEPS grap
69
72
cc_library (all_reduce_deps_pass SRCS all_reduce_deps_pass.cc DEPS graph graph_helper pass )
70
73
71
74
cc_library (multi_devices_graph_pass SRCS multi_devices_graph_pass.cc DEPS multi_devices_helper computation_op_handle
72
- scale_loss_grad_op_handle rpc_op_handle all_reduce_op_handle reduce_op_handle broadcast_op_handle data_balance_op_handle fused_broadcast_op_handle )
75
+ scale_loss_grad_op_handle rpc_op_handle all_reduce_op_handle reduce_op_handle broadcast_op_handle fused_broadcast_op_handle )
76
+
77
+ cc_library (fuse_all_reduce_op_pass SRCS fuse_all_reduce_op_pass.cc DEPS graph graph_helper fused_all_reduce_op_handle )
73
78
74
79
set (SSA_GRAPH_EXECUTOR_DEPS graph framework_proto sequential_execution_pass modify_op_lock_and_record_event_pass all_reduce_deps_pass reference_count_pass eager_deletion_pass memory_optimize_pass inplace_op_pass )
75
80
if (WITH_GPU )
@@ -98,5 +103,5 @@ cc_library(build_strategy SRCS build_strategy.cc DEPS
98
103
graph_viz_pass multi_devices_graph_pass
99
104
multi_devices_graph_print_pass multi_devices_graph_check_pass
100
105
fuse_elewise_add_act_pass multi_batch_merge_pass
101
- fuse_relu_depthwise_conv_pass
102
- memory_optimize_pass lock_free_optimize_pass )
106
+ fuse_relu_depthwise_conv_pass
107
+ memory_optimize_pass lock_free_optimize_pass alloc_continuous_space_for_grad_pass fuse_all_reduce_op_pass )
0 commit comments