Skip to content

Commit 76b49f0

Browse files
Merge pull request #16539 from guru4elephant/train_with_pipe_reader_merge_develop
Train with pipe reader merge develop
2 parents d8d73ff + d7963e1 commit 76b49f0

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

84 files changed

+10322
-431
lines changed

paddle/fluid/API.spec

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,9 @@ paddle.fluid.cpu_places (ArgSpec(args=['device_count'], varargs=None, keywords=N
1515
paddle.fluid.cuda_pinned_places (ArgSpec(args=['device_count'], varargs=None, keywords=None, defaults=(None,)), ('document', 'd0c3ebd813c39958c92b78e3eef7e912'))
1616
paddle.fluid.Executor.__init__ (ArgSpec(args=['self', 'place'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
1717
paddle.fluid.Executor.close (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', 'f5369953dd0c443961cf79f7a00e1a03'))
18+
paddle.fluid.Executor.infer_from_dataset (ArgSpec(args=['self', 'program', 'dataset', 'scope', 'thread', 'debug', 'fetch_list', 'fetch_info', 'print_period'], varargs=None, keywords=None, defaults=(None, None, None, 0, False, None, None, 100)), ('document', '9c7decb955b9c4f718114179c8985581'))
1819
paddle.fluid.Executor.run (ArgSpec(args=['self', 'program', 'feed', 'fetch_list', 'feed_var_name', 'fetch_var_name', 'scope', 'return_numpy', 'use_program_cache'], varargs=None, keywords=None, defaults=(None, None, None, 'feed', 'fetch', None, True, False)), ('document', 'f482e93b38b4018796969a2e1dde479d'))
20+
paddle.fluid.Executor.train_from_dataset (ArgSpec(args=['self', 'program', 'dataset', 'scope', 'thread', 'debug', 'fetch_list', 'fetch_info', 'print_period'], varargs=None, keywords=None, defaults=(None, None, None, 0, False, None, None, 100)), ('document', 'd521011d79e71080fe9b5bb179b43518'))
1921
paddle.fluid.global_scope (ArgSpec(args=[], varargs=None, keywords=None, defaults=None), ('document', 'e148d3ab1ed8edf3e928212a375959c0'))
2022
paddle.fluid.scope_guard (ArgSpec(args=['scope'], varargs=None, keywords=None, defaults=None), ('document', 'b94d1f6bcc29c4fb58fc0058561250c2'))
2123
paddle.fluid.DistributeTranspiler.__init__ (ArgSpec(args=['self', 'config'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
@@ -36,15 +38,15 @@ paddle.fluid.DataFeedDesc.desc (ArgSpec(args=['self'], varargs=None, keywords=No
3638
paddle.fluid.DataFeedDesc.set_batch_size (ArgSpec(args=['self', 'batch_size'], varargs=None, keywords=None, defaults=None), ('document', '8d9f44601e0a99dd431f14fd9250cd21'))
3739
paddle.fluid.DataFeedDesc.set_dense_slots (ArgSpec(args=['self', 'dense_slots_name'], varargs=None, keywords=None, defaults=None), ('document', 'eb894b464bbcd1b4bc8038398954f766'))
3840
paddle.fluid.DataFeedDesc.set_use_slots (ArgSpec(args=['self', 'use_slots_name'], varargs=None, keywords=None, defaults=None), ('document', '415c56600ce4e198c071cad01409a690'))
39-
paddle.fluid.AsyncExecutor.__init__ (ArgSpec(args=['self', 'place', 'run_mode'], varargs=None, keywords=None, defaults=(None, '')), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
40-
paddle.fluid.AsyncExecutor.config_distributed_nodes (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '4810dbe1870452f16b3c60b6c5fd1459'))
41-
paddle.fluid.AsyncExecutor.download_data (ArgSpec(args=['self', 'afs_path', 'local_path', 'fs_default_name', 'ugi', 'file_cnt', 'hadoop_home', 'process_num'], varargs=None, keywords=None, defaults=('$HADOOP_HOME', 12)), ('document', '799a2066cc26819f1ed31f47c15ad083'))
41+
paddle.fluid.AsyncExecutor.__init__ (ArgSpec(args=['self', 'place', 'run_mode'], varargs=None, keywords=None, defaults=(None, '')), ('document', '4e85874dddcd06c38f5717992d741589'))
42+
paddle.fluid.AsyncExecutor.config_distributed_nodes (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '762980fe0181eb41e3d1081b26ed76b1'))
43+
paddle.fluid.AsyncExecutor.download_data (ArgSpec(args=['self', 'afs_path', 'local_path', 'fs_default_name', 'ugi', 'file_cnt', 'hadoop_home', 'process_num'], varargs=None, keywords=None, defaults=('$HADOOP_HOME', 12)), ('document', '39e3ccddf8ea8db75ea85287c9147c3b'))
4244
paddle.fluid.AsyncExecutor.get_instance (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', 'f8688f76a2db1243c7097a60c507b182'))
4345
paddle.fluid.AsyncExecutor.init_model (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '504f39be2007404a17e5cabea1256c7d'))
44-
paddle.fluid.AsyncExecutor.init_server (ArgSpec(args=['self', 'dist_desc'], varargs=None, keywords=None, defaults=None), ('document', 'c403ab46c5d3ef25c0f7e94ae75dcb68'))
45-
paddle.fluid.AsyncExecutor.init_worker (ArgSpec(args=['self', 'dist_desc', 'startup_program'], varargs=None, keywords=None, defaults=None), ('document', 'dcf08f4bf2f3282acf11391f5d39c536'))
46+
paddle.fluid.AsyncExecutor.init_server (ArgSpec(args=['self', 'dist_desc'], varargs=None, keywords=None, defaults=None), ('document', '384fa5fbb99912db1baf7ef7784bd312'))
47+
paddle.fluid.AsyncExecutor.init_worker (ArgSpec(args=['self', 'dist_desc', 'startup_program'], varargs=None, keywords=None, defaults=None), ('document', 'f0a36d7c8561039f60a6f6555c7fee0b'))
4648
paddle.fluid.AsyncExecutor.run (ArgSpec(args=['self', 'program', 'data_feed', 'filelist', 'thread_num', 'fetch', 'mode', 'debug'], varargs=None, keywords=None, defaults=('', False)), ('document', '848fc53484e8326f6325feea87fe955c'))
47-
paddle.fluid.AsyncExecutor.save_model (ArgSpec(args=['self', 'save_path'], varargs=None, keywords=None, defaults=None), ('document', 'c8ac0dfcb3b187aba25d03af7fea56b2'))
49+
paddle.fluid.AsyncExecutor.save_model (ArgSpec(args=['self', 'save_path'], varargs=None, keywords=None, defaults=None), ('document', '145b5c0da01bfff397142e51361f4b75'))
4850
paddle.fluid.AsyncExecutor.stop (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '5f23d043607bb5d55e466ec3f578e093'))
4951
paddle.fluid.CompiledProgram.__init__ (ArgSpec(args=['self', 'program_or_graph'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
5052
paddle.fluid.CompiledProgram.with_data_parallel (ArgSpec(args=['self', 'loss_name', 'build_strategy', 'exec_strategy', 'share_vars_from', 'places'], varargs=None, keywords=None, defaults=(None, None, None, None, None)), ('document', 'a8c7793803cf976680d9478e378fa356'))

paddle/fluid/framework/CMakeLists.txt

Lines changed: 40 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
12
#windows treat symbolic file as a real file, which is different with unix
23
#We create a hidden file and compile it instead of origin source file.
34
function(windows_symbolic TARGET)
@@ -22,9 +23,13 @@ endfunction()
2223

2324
add_subdirectory(ir)
2425
add_subdirectory(details)
26+
add_subdirectory(fleet)
27+
add_subdirectory(io)
2528
#ddim lib
2629
proto_library(framework_proto SRCS framework.proto)
30+
proto_library(data_feed_proto SRCS data_feed.proto)
2731
proto_library(async_executor_proto SRCS data_feed.proto)
32+
proto_library(trainer_desc_proto SRCS trainer_desc.proto data_feed.proto)
2833

2934
cc_library(ddim SRCS ddim.cc DEPS eigen3 boost enforce)
3035
cc_test(ddim_test SRCS ddim_test.cc DEPS ddim)
@@ -129,9 +134,11 @@ cc_test(version_test SRCS version_test.cc DEPS version)
129134
cc_library(proto_desc SRCS var_desc.cc op_desc.cc block_desc.cc program_desc.cc DEPS shape_inference op_info operator glog version)
130135

131136
cc_library(op_registry SRCS op_registry.cc DEPS op_proto_maker op_info operator glog proto_desc memory_optimize_helper)
137+
132138
nv_test(op_registry_test SRCS op_registry_test.cc DEPS op_registry)
133139

134140
py_proto_compile(framework_py_proto SRCS framework.proto data_feed.proto)
141+
py_proto_compile(trainer_py_proto SRCS trainer_desc.proto data_feed.proto)
135142
#Generate an empty \
136143
#__init__.py to make framework_py_proto as a valid python module.
137144
add_custom_target(framework_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py)
@@ -165,14 +172,24 @@ else()
165172
endif()
166173

167174
cc_library(executor_gc_helper SRCS executor_gc_helper.cc DEPS scope proto_desc operator garbage_collector)
168-
169175
if(WITH_DISTRIBUTE)
170-
cc_library(executor SRCS executor.cc DEPS op_registry device_context scope framework_proto glog
171-
lod_rank_table feed_fetch_method sendrecvop_rpc ${GLOB_DISTRIBUTE_DEPS} graph_to_program_pass variable_helper ${NGRAPH_EXE_DEPS})
172-
set(DISTRIBUTE_COMPILE_FLAGS "-Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor")
173-
set_source_files_properties(executor.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS})
176+
cc_library(executor SRCS executor.cc multi_trainer.cc dataset_factory.cc
177+
dist_multi_trainer.cc trainer_factory.cc trainer.cc data_feed_factory.cc
178+
data_feed.cc device_worker.cc hogwild_worker.cc downpour_worker.cc
179+
pull_dense_worker.cc device_worker_factory.cc data_set.cc DEPS op_registry
180+
device_context scope framework_proto trainer_desc_proto glog fs shell fleet_wrapper lodtensor_printer
181+
lod_rank_table feed_fetch_method sendrecvop_rpc ${GLOB_DISTRIBUTE_DEPS}
182+
graph_to_program_pass variable_helper data_feed_proto ${NGRAPH_EXE_DEPS} timer)
183+
set(DISTRIBUTE_COMPILE_FLAGS "-Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor")
184+
set_source_files_properties(executor.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS})
174185
else()
175-
cc_library(executor SRCS executor.cc DEPS op_registry device_context scope framework_proto glog lod_rank_table feed_fetch_method graph_to_program_pass variable_helper ${NGRAPH_EXE_DEPS})
186+
cc_library(executor SRCS executor.cc multi_trainer.cc dataset_factory.cc
187+
dist_multi_trainer.cc trainer_factory.cc trainer.cc data_feed_factory.cc
188+
data_feed.cc device_worker.cc hogwild_worker.cc downpour_worker.cc
189+
pull_dense_worker.cc device_worker_factory.cc data_set.cc DEPS op_registry
190+
device_context scope framework_proto data_feed_proto trainer_desc_proto glog
191+
lod_rank_table fs shell fleet_wrapper lodtensor_printer feed_fetch_method
192+
graph_to_program_pass variable_helper ${NGRAPH_EXE_DEPS} timer data_feed_proto)
176193
cc_test(test_naive_executor SRCS naive_executor_test.cc DEPS naive_executor elementwise_add_op)
177194
endif()
178195

@@ -183,11 +200,15 @@ cc_library(parallel_executor SRCS parallel_executor.cc DEPS
183200
graph build_strategy
184201
fast_threaded_ssa_graph_executor variable_helper)
185202

186-
if(WITH_PSLIB)
187-
cc_library(async_executor SRCS async_executor.cc data_feed.cc data_feed_factory.cc executor_thread_worker.cc DEPS op_registry device_context scope framework_proto glog lod_rank_table feed_fetch_method graph_to_program_pass async_executor_proto variable_helper pslib_brpc pslib timer)
188-
else()
189-
cc_library(async_executor SRCS async_executor.cc data_feed.cc data_feed_factory.cc executor_thread_worker.cc DEPS op_registry device_context scope framework_proto glog lod_rank_table feed_fetch_method graph_to_program_pass async_executor_proto variable_helper timer)
190-
endif(WITH_PSLIB)
203+
cc_library(async_executor SRCS async_executor.cc data_feed.cc data_feed_factory.cc
204+
executor_thread_worker.cc multi_trainer.cc dist_multi_trainer.cc
205+
trainer_factory.cc trainer.cc device_worker.cc hogwild_worker.cc
206+
downpour_worker.cc pull_dense_worker.cc device_worker_factory.cc
207+
data_set.cc dataset_factory.cc
208+
DEPS op_registry device_context scope framework_proto
209+
trainer_desc_proto glog lod_rank_table fleet_wrapper lodtensor_printer
210+
feed_fetch_method graph_to_program_pass data_feed_proto
211+
variable_helper timer fs shell)
191212

192213

193214
cc_test(data_feed_test SRCS data_feed_test.cc DEPS async_executor)
@@ -214,18 +235,18 @@ cc_test(dlpack_tensor_test SRCS dlpack_tensor_test.cc DEPS dlpack_tensor glog)
214235
# Get the current working branch
215236
execute_process(
216237
COMMAND git rev-parse --abbrev-ref HEAD
217-
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
218-
OUTPUT_VARIABLE PADDLE_BRANCH
219-
OUTPUT_STRIP_TRAILING_WHITESPACE
220-
)
238+
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
239+
OUTPUT_VARIABLE PADDLE_BRANCH
240+
OUTPUT_STRIP_TRAILING_WHITESPACE
241+
)
221242

222243
# Get the latest abbreviated commit hash of the working branch
223244
execute_process(
224245
COMMAND git log -1 --format=%h
225-
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
226-
OUTPUT_VARIABLE PADDLE_COMMIT
227-
OUTPUT_STRIP_TRAILING_WHITESPACE
228-
)
246+
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
247+
OUTPUT_VARIABLE PADDLE_COMMIT
248+
OUTPUT_STRIP_TRAILING_WHITESPACE
249+
)
229250

230251
message(STATUS "commit: ${PADDLE_COMMIT}")
231252
message(STATUS "branch: ${PADDLE_BRANCH}")

0 commit comments

Comments
 (0)