Skip to content

Commit 99302a7

Browse files
author
root
committed
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into fix_import_plot_py3
2 parents 5a220dc + 177720a commit 99302a7

File tree

141 files changed

+3507
-971
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

141 files changed

+3507
-971
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,3 +28,4 @@ third_party/
2828
build_*
2929
# clion workspace.
3030
cmake-build-*
31+
model_test

CMakeLists.txt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ option(WITH_ANAKIN "Compile with Anakin library" OFF)
6969
option(WITH_GRPC "Use grpc as the default rpc framework" ${WITH_DISTRIBUTE})
7070
option(WITH_BRPC_RDMA "Use brpc rdma as the rpc protocal" OFF)
7171
option(WITH_INFERENCE "Compile fluid inference library" ON)
72+
option(ON_INFER "Turn on inference optimization." OFF)
7273
option(WITH_INFERENCE_API_TEST "Test fluid inference high-level api interface" OFF)
7374
option(WITH_SYSTEM_BLAS "Use system blas library" OFF)
7475
option(PY_VERSION "Compile PaddlePaddle with python3 support" ${PY_VERSION})
@@ -179,6 +180,7 @@ include(external/eigen) # download eigen3
179180
include(external/pybind11) # download pybind11
180181
include(external/cares)
181182
include(external/cub)
183+
include(external/xxhash) # download xxhash
182184

183185
if (NOT WIN32)
184186
# there is no official support of snappystream, warpctc, nccl, cupti in windows
@@ -301,3 +303,8 @@ if(WITH_DOC)
301303
find_python_module(recommonmark REQUIRED)
302304
add_subdirectory(doc)
303305
endif()
306+
307+
if (ON_INFER)
308+
message(WARNING "On inference mode, will take place some specific optimization.")
309+
add_definitions(-DPADDLE_ON_INFERENCE)
310+
endif()

Dockerfile

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -75,14 +75,14 @@ RUN pip3 install -U wheel && \
7575
pip3 install -U docopt PyYAML sphinx==1.5.6 && \
7676
pip3 install sphinx-rtd-theme==0.1.9 recommonmark && \
7777
easy_install -U pip && \
78-
pip install -U wheel && \
78+
pip install -U pip setuptools wheel && \
7979
pip install -U docopt PyYAML sphinx==1.5.6 && \
8080
pip install sphinx-rtd-theme==0.1.9 recommonmark
8181

82-
RUN pip3 install pre-commit 'ipython==5.3.0' && \
82+
RUN pip3 install 'pre-commit==1.10.4' 'ipython==5.3.0' && \
8383
pip3 install 'ipykernel==4.6.0' 'jupyter==1.0.0' && \
8484
pip3 install opencv-python && \
85-
pip install pre-commit 'ipython==5.3.0' && \
85+
pip install 'pre-commit==1.10.4' 'ipython==5.3.0' && \
8686
pip install 'ipykernel==4.6.0' 'jupyter==1.0.0' && \
8787
pip install opencv-python
8888

benchmark/fluid/args.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,5 +142,10 @@ def parse_args():
142142
choices=['reduce', 'all_reduce'],
143143
default='all_reduce',
144144
help='Specify the reduce strategy, can be reduce, all_reduce')
145+
parser.add_argument(
146+
'--fuse_broadcast_op',
147+
action='store_true',
148+
help='If set, would fuse multiple broadcast operators into one fused_broadcast operator.'
149+
)
145150
args = parser.parse_args()
146151
return args

benchmark/fluid/fluid_benchmark.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,7 @@ def train_parallel(train_args, test_args, args, train_prog, test_prog,
177177
else:
178178
build_strategy.reduce_strategy = fluid.BuildStrategy(
179179
).ReduceStrategy.AllReduce
180+
build_strategy.fuse_broadcast_op = args.fuse_broadcast_op
180181

181182
avg_loss = train_args[0]
182183

@@ -240,7 +241,6 @@ def train_parallel(train_args, test_args, args, train_prog, test_prog,
240241

241242
if args.use_fake_data or args.use_reader_op:
242243
try:
243-
244244
fetch_ret = exe.run(fetch_list)
245245
except fluid.core.EOFException as eof:
246246
break

cmake/external/xxhash.cmake

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
INCLUDE(ExternalProject)
2+
3+
set(XXHASH_SOURCE_DIR ${THIRD_PARTY_PATH}/xxhash)
4+
set(XXHASH_INSTALL_DIR ${THIRD_PARTY_PATH}/install/xxhash)
5+
set(XXHASH_INCLUDE_DIR "${XXHASH_INSTALL_DIR}/include")
6+
7+
IF(WITH_STATIC_LIB)
8+
SET(BUILD_CMD make lib)
9+
ELSE()
10+
SET(BUILD_CMD sed -i "s/-Wstrict-prototypes -Wundef/-Wstrict-prototypes -Wundef -fPIC/g" ${XXHASH_SOURCE_DIR}/src/extern_xxhash/Makefile && make lib)
11+
ENDIF()
12+
13+
ExternalProject_Add(
14+
extern_xxhash
15+
${EXTERNAL_PROJECT_LOG_ARGS}
16+
GIT_REPOSITORY "https://github.com/Cyan4973/xxHash"
17+
GIT_TAG "v0.6.5"
18+
PREFIX ${XXHASH_SOURCE_DIR}
19+
DOWNLOAD_NAME "xxhash"
20+
UPDATE_COMMAND ""
21+
CONFIGURE_COMMAND ""
22+
BUILD_IN_SOURCE 1
23+
PATCH_COMMAND
24+
BUILD_COMMAND ${BUILD_CMD}
25+
INSTALL_COMMAND export PREFIX=${XXHASH_INSTALL_DIR}/ && make install
26+
TEST_COMMAND ""
27+
)
28+
29+
set(XXHASH_LIBRARIES "${XXHASH_INSTALL_DIR}/lib/libxxhash.a")
30+
INCLUDE_DIRECTORIES(${XXHASH_INCLUDE_DIR})
31+
32+
add_library(xxhash STATIC IMPORTED GLOBAL)
33+
set_property(TARGET xxhash PROPERTY IMPORTED_LOCATION ${XXHASH_LIBRARIES})
34+
include_directories(${XXHASH_INCLUDE_DIR})
35+
add_dependencies(xxhash extern_xxhash)
36+
37+
LIST(APPEND external_project_dependencies xxhash)
38+
39+
IF(WITH_C_API)
40+
INSTALL(DIRECTORY ${XXHASH_INCLUDE_DIR} DESTINATION third_party/xxhash)
41+
IF(ANDROID)
42+
INSTALL(FILES ${XXHASH_LIBRARIES} DESTINATION third_party/xxhash/lib/${ANDROID_ABI})
43+
ELSE()
44+
INSTALL(FILES ${XXHASH_LIBRARIES} DESTINATION third_party/xxhash/lib)
45+
ENDIF()
46+
ENDIF()

cmake/inference_lib.cmake

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,9 @@
1414

1515
# make package for paddle fluid shared and static library
1616
function(copy TARGET)
17+
if (NOT ON_INFER)
18+
message(WARNING "Turn on the ON_INFER flag when building inference_lib only.")
19+
endif()
1720
set(options "")
1821
set(oneValueArgs "")
1922
set(multiValueArgs SRCS DSTS DEPS)
@@ -31,7 +34,7 @@ function(copy TARGET)
3134
foreach(index RANGE ${len})
3235
list(GET copy_lib_SRCS ${index} src)
3336
list(GET copy_lib_DSTS ${index} dst)
34-
add_custom_command(TARGET ${TARGET} PRE_BUILD
37+
add_custom_command(TARGET ${TARGET} PRE_BUILD
3538
COMMAND mkdir -p "${dst}"
3639
COMMAND cp -r "${src}" "${dst}"
3740
COMMENT "copying ${src} -> ${dst}")
@@ -67,6 +70,13 @@ copy(boost_lib
6770
DEPS boost
6871
)
6972

73+
set(dst_dir "${FLUID_INSTALL_DIR}/third_party/install/xxhash")
74+
copy(xxhash_lib
75+
SRCS ${XXHASH_INCLUDE_DIR} ${XXHASH_LIBRARIES}
76+
DSTS ${dst_dir} ${dst_dir}/lib
77+
DEPS xxhash
78+
)
79+
7080
if(NOT PROTOBUF_FOUND)
7181
set(dst_dir "${FLUID_INSTALL_DIR}/third_party/install/protobuf")
7282
copy(protobuf_lib
@@ -186,7 +196,7 @@ copy(cmake_cache
186196
DSTS ${FLUID_INSTALL_DIR})
187197

188198
# This command generates a complete fluid library for both train and inference
189-
add_custom_target(fluid_lib_dist DEPENDS ${fluid_lib_dist_dep})
199+
add_custom_target(fluid_lib_dist DEPENDS ${fluid_lib_dist_dep})
190200

191201
# Following commands generate a inference-only fluid library
192202
# third_party, version.txt and CMakeCache.txt are the same position with ${FLUID_INSTALL_DIR}

paddle/fluid/API.spec

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ paddle.fluid.layers.reduce_prod ArgSpec(args=['input', 'dim', 'keep_dim', 'name'
8686
paddle.fluid.layers.sequence_first_step ArgSpec(args=['input'], varargs=None, keywords=None, defaults=None)
8787
paddle.fluid.layers.sequence_last_step ArgSpec(args=['input'], varargs=None, keywords=None, defaults=None)
8888
paddle.fluid.layers.sequence_slice ArgSpec(args=['input', 'offset', 'length', 'name'], varargs=None, keywords=None, defaults=(None,))
89-
paddle.fluid.layers.dropout ArgSpec(args=['x', 'dropout_prob', 'is_test', 'seed', 'name'], varargs=None, keywords=None, defaults=(False, None, None))
89+
paddle.fluid.layers.dropout ArgSpec(args=['x', 'dropout_prob', 'is_test', 'seed', 'name', 'dropout_implementation'], varargs=None, keywords=None, defaults=(False, None, None, 'downgrade_in_infer'))
9090
paddle.fluid.layers.split ArgSpec(args=['input', 'num_or_sections', 'dim', 'name'], varargs=None, keywords=None, defaults=(-1, None))
9191
paddle.fluid.layers.ctc_greedy_decoder ArgSpec(args=['input', 'blank', 'name'], varargs=None, keywords=None, defaults=(None,))
9292
paddle.fluid.layers.edit_distance ArgSpec(args=['input', 'label', 'normalized', 'ignored_tokens'], varargs=None, keywords=None, defaults=(True, None))
@@ -107,7 +107,7 @@ paddle.fluid.layers.softmax_with_cross_entropy ArgSpec(args=['logits', 'label',
107107
paddle.fluid.layers.smooth_l1 ArgSpec(args=['x', 'y', 'inside_weight', 'outside_weight', 'sigma'], varargs=None, keywords=None, defaults=(None, None, None))
108108
paddle.fluid.layers.one_hot ArgSpec(args=['input', 'depth'], varargs=None, keywords=None, defaults=None)
109109
paddle.fluid.layers.autoincreased_step_counter ArgSpec(args=['counter_name', 'begin', 'step'], varargs=None, keywords=None, defaults=(None, 1, 1))
110-
paddle.fluid.layers.reshape ArgSpec(args=['x', 'shape', 'actual_shape', 'act', 'inplace', 'name'], varargs=None, keywords=None, defaults=(None, None, True, None))
110+
paddle.fluid.layers.reshape ArgSpec(args=['x', 'shape', 'actual_shape', 'act', 'inplace', 'name'], varargs=None, keywords=None, defaults=(None, None, False, None))
111111
paddle.fluid.layers.squeeze ArgSpec(args=['input', 'axes', 'name'], varargs=None, keywords=None, defaults=(None,))
112112
paddle.fluid.layers.unsqueeze ArgSpec(args=['input', 'axes', 'name'], varargs=None, keywords=None, defaults=(None,))
113113
paddle.fluid.layers.lod_reset ArgSpec(args=['x', 'y', 'target_lod'], varargs=None, keywords=None, defaults=(None, None))
@@ -174,7 +174,9 @@ paddle.fluid.layers.mean ArgSpec(args=['x', 'name'], varargs=None, keywords=None
174174
paddle.fluid.layers.mul ArgSpec(args=['x', 'y', 'x_num_col_dims', 'y_num_col_dims', 'name'], varargs=None, keywords=None, defaults=(1, 1, None))
175175
paddle.fluid.layers.sigmoid_cross_entropy_with_logits ArgSpec(args=['x', 'label', 'name'], varargs=None, keywords=None, defaults=(None,))
176176
paddle.fluid.layers.maxout ArgSpec(args=['x', 'groups', 'name'], varargs=None, keywords=None, defaults=(None,))
177+
paddle.fluid.layers.sequence_reverse ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
177178
paddle.fluid.layers.affine_channel ArgSpec(args=['x', 'scale', 'bias', 'data_layout', 'name'], varargs=None, keywords=None, defaults=(None, None, 'NCHW', None))
179+
paddle.fluid.layers.hash ArgSpec(args=['input', 'hash_size', 'num_hash', 'name'], varargs=None, keywords=None, defaults=(1, None))
178180
paddle.fluid.layers.data ArgSpec(args=['name', 'shape', 'append_batch_size', 'dtype', 'lod_level', 'type', 'stop_gradient'], varargs=None, keywords=None, defaults=(True, 'float32', 0, VarType.LOD_TENSOR, True))
179181
paddle.fluid.layers.open_files ArgSpec(args=['filenames', 'shapes', 'lod_levels', 'dtypes', 'thread_num', 'buffer_size', 'pass_num', 'is_test'], varargs=None, keywords=None, defaults=(None, None, 1, None))
180182
paddle.fluid.layers.read_file ArgSpec(args=['reader'], varargs=None, keywords=None, defaults=None)
@@ -353,6 +355,8 @@ paddle.fluid.optimizer.ModelAverage.__init__ ArgSpec(args=['self', 'average_wind
353355
paddle.fluid.optimizer.ModelAverage.apply ArgSpec(args=[], varargs='args', keywords='kwds', defaults=None)
354356
paddle.fluid.optimizer.ModelAverage.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None))
355357
paddle.fluid.optimizer.ModelAverage.restore ArgSpec(args=['self', 'executor'], varargs=None, keywords=None, defaults=None)
358+
paddle.fluid.optimizer.LarsMomentumOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'momentum', 'lars_coeff', 'lars_weight_decay', 'regularization', 'name'], varargs=None, keywords=None, defaults=(0.001, 0.0005, None, None))
359+
paddle.fluid.optimizer.LarsMomentumOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None))
356360
paddle.fluid.backward.append_backward ArgSpec(args=['loss', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None))
357361
paddle.fluid.regularizer.L1DecayRegularizer.__init__ ArgSpec(args=['self', 'regularization_coeff'], varargs=None, keywords=None, defaults=(0.0,))
358362
paddle.fluid.regularizer.L2DecayRegularizer.__init__ ArgSpec(args=['self', 'regularization_coeff'], varargs=None, keywords=None, defaults=(0.0,))

paddle/fluid/framework/details/CMakeLists.txt

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,14 @@ if(WITH_GPU)
1616
dynload_cuda variable_visitor)
1717
nv_library(reduce_op_handle SRCS reduce_op_handle.cc DEPS op_handle_base variable_visitor scope ddim dynload_cuda)
1818
nv_library(broadcast_op_handle SRCS broadcast_op_handle.cc DEPS op_handle_base scope ddim memory variable_visitor dynload_cuda)
19+
nv_library(fused_broadcast_op_handle SRCS fused_broadcast_op_handle.cc DEPS broadcast_op_handle)
1920

2021
else()
2122
cc_library(all_reduce_op_handle SRCS all_reduce_op_handle.cc DEPS op_handle_base scope lod_tensor ddim memory
2223
variable_visitor)
2324
cc_library(reduce_op_handle SRCS reduce_op_handle.cc DEPS op_handle_base variable_visitor scope ddim)
2425
cc_library(broadcast_op_handle SRCS broadcast_op_handle.cc DEPS op_handle_base scope ddim memory variable_visitor)
26+
cc_library(fused_broadcast_op_handle SRCS fused_broadcast_op_handle.cc DEPS broadcast_op_handle)
2527
endif()
2628

2729
cc_library(data_balance_op_handle SRCS data_balance_op_handle.cc DEPS op_handle_base scope lod_tensor)
@@ -34,7 +36,7 @@ if(WITH_GPU)
3436
endif()
3537

3638
cc_library(multi_devices_graph_pass SRCS multi_devices_graph_pass.cc DEPS multi_devices_helper computation_op_handle
37-
scale_loss_grad_op_handle rpc_op_handle all_reduce_op_handle reduce_op_handle broadcast_op_handle data_balance_op_handle)
39+
scale_loss_grad_op_handle rpc_op_handle all_reduce_op_handle reduce_op_handle broadcast_op_handle data_balance_op_handle fused_broadcast_op_handle)
3840

3941
if(WITH_GPU)
4042
cc_library(ssa_graph_executor SRCS ssa_graph_executor.cc DEPS graph framework_proto reference_count_pass)
@@ -58,4 +60,4 @@ cc_library(fast_threaded_ssa_graph_executor SRCS fast_threaded_ssa_graph_executo
5860
cc_library(build_strategy SRCS build_strategy.cc DEPS
5961
graph_viz_pass multi_devices_graph_pass
6062
multi_devices_graph_print_pass multi_devices_graph_check_pass
61-
fuse_elewise_add_act_pass)
63+
fuse_elewise_add_act_pass multi_batch_merge_pass)

paddle/fluid/framework/details/broadcast_op_handle.cc

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -48,16 +48,23 @@ void BroadcastOpHandle::RunImpl() {
4848
var_scopes.emplace_back(s->FindVar(kLocalExecScopeName)->Get<Scope *>());
4949
}
5050

51+
BroadcastOneVar(*in_var_handle, out_var_handles, var_scopes);
52+
}
53+
54+
void BroadcastOpHandle::BroadcastOneVar(
55+
const VarHandle &in_var_handle,
56+
const std::vector<VarHandle *> &out_var_handles,
57+
const std::vector<const Scope *> &var_scopes) {
5158
auto *in_var =
52-
var_scopes.at(in_var_handle->scope_idx_)->FindVar(in_var_handle->name_);
59+
var_scopes.at(in_var_handle.scope_idx_)->FindVar(in_var_handle.name_);
5360
PADDLE_ENFORCE_NOT_NULL(in_var);
5461
Tensor &in_tensor = VariableVisitor::GetMutableTensor(in_var);
5562

56-
InitOutputValue(*in_var_handle, out_var_handles);
63+
InitOutputValue(in_var_handle, out_var_handles);
5764

5865
if (platform::is_cpu_place(in_tensor.place())) {
5966
for (auto *out_var_handle : out_var_handles) {
60-
if (out_var_handle->IsTheSameVar(*in_var_handle)) {
67+
if (out_var_handle->IsTheSameVar(in_var_handle)) {
6168
continue;
6269
}
6370
auto &out_p = out_var_handle->place_;
@@ -114,12 +121,12 @@ void BroadcastOpHandle::RunImpl() {
114121
}
115122
}
116123

117-
if (!out_handle->IsTheSameVar(*in_var_handle)) {
118-
auto out_var = var_scopes.at(in_var_handle->scope_idx_)
124+
if (!out_handle->IsTheSameVar(in_var_handle)) {
125+
auto out_var = var_scopes.at(in_var_handle.scope_idx_)
119126
->FindVar(out_var_handles[0]->name_);
120127
paddle::framework::TensorCopy(
121-
in_tensor, in_var_handle->place_,
122-
*(dev_ctxes_.at(in_var_handle->place_)),
128+
in_tensor, in_var_handle.place_,
129+
*(dev_ctxes_.at(in_var_handle.place_)),
123130
&VariableVisitor::GetMutableTensor(out_var));
124131
}
125132
});

0 commit comments

Comments
 (0)