Skip to content

Commit d41b623

Browse files
committed
Merge branch 'develop' of https://github.com/PaddlePaddle/paddle into quan_ck
test=develop
2 parents 6db7c2a + 1096746 commit d41b623

File tree

268 files changed

+4939
-1635
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

268 files changed

+4939
-1635
lines changed

CMakeLists.txt

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -193,6 +193,12 @@ if(WITH_GPU)
193193
include(tensorrt)
194194
include(anakin_subgraph)
195195
endif()
196+
197+
if(WITH_GPU AND NOT WIN32)
198+
message(STATUS "add dgc lib.")
199+
include(external/dgc)
200+
endif()
201+
196202
if(WITH_MKL OR WITH_MKLML)
197203
include(external/anakin)
198204
elseif()

cmake/external/dgc.cmake

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
INCLUDE(ExternalProject)
16+
17+
SET(DGC_SOURCES_DIR "${THIRD_PARTY_PATH}/dgc")
18+
SET(DGC_INSTALL_DIR "${THIRD_PARTY_PATH}/install/dgc")
19+
SET(DGC_INCLUDE_DIR "${DGC_INSTALL_DIR}/include" CACHE PATH "dgc include directory." FORCE)
20+
SET(DGC_LIBRARIES "${DGC_INSTALL_DIR}/lib/libdgc.a" CACHE FILEPATH "dgc library." FORCE)
21+
INCLUDE_DIRECTORIES(${DGC_INCLUDE_DIR})
22+
23+
ExternalProject_Add(
24+
extern_dgc
25+
${EXTERNAL_PROJECT_LOG_ARGS}
26+
GIT_REPOSITORY "https://github.com/PaddlePaddle/Fleet"
27+
GIT_TAG "2d04dc3800cdd0601f1b65d547dabcc60b0cf9dc"
28+
SOURCE_DIR "${DGC_SOURCES_DIR}"
29+
CONFIGURE_COMMAND ""
30+
BUILD_COMMAND cd collective && make -j
31+
INSTALL_COMMAND mkdir -p ${DGC_INSTALL_DIR}/lib/ ${DGC_INCLUDE_DIR}/dgc
32+
&& cp ${DGC_SOURCES_DIR}/collective/build/lib/libdgc.a ${DGC_LIBRARIES}
33+
&& cp ${DGC_SOURCES_DIR}/collective/build/include/dgc.h ${DGC_INCLUDE_DIR}/dgc/
34+
BUILD_IN_SOURCE 1
35+
)
36+
37+
ADD_LIBRARY(dgc SHARED IMPORTED GLOBAL)
38+
SET_PROPERTY(TARGET dgc PROPERTY IMPORTED_LOCATION ${DGC_LIBRARIES})
39+
ADD_DEPENDENCIES(dgc extern_dgc)
40+
41+
LIST(APPEND external_project_dependencies dgc)
42+

cmake/external/ngraph.cmake

Lines changed: 19 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -57,20 +57,25 @@ SET(NGRAPH_TBB_LIB ${NGRAPH_LIB_DIR}/${NGRAPH_TBB_LIB_NAME})
5757
ExternalProject_Add(
5858
${NGRAPH_PROJECT}
5959
${EXTERNAL_PROJECT_LOG_ARGS}
60-
DEPENDS ${MKLDNN_PROJECT} ${MKLML_PROJECT}
61-
GIT_REPOSITORY ${NGRAPH_GIT_REPO}
62-
GIT_TAG ${NGRAPH_GIT_TAG}
63-
PREFIX ${NGRAPH_SOURCES_DIR}
64-
UPDATE_COMMAND ""
65-
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${NGRAPH_INSTALL_DIR}
66-
CMAKE_ARGS -DNGRAPH_UNIT_TEST_ENABLE=FALSE
67-
CMAKE_ARGS -DNGRAPH_TOOLS_ENABLE=FALSE
68-
CMAKE_ARGS -DNGRAPH_INTERPRETER_ENABLE=FALSE
69-
CMAKE_ARGS -DNGRAPH_DEX_ONLY=TRUE
70-
CMAKE_ARGS -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
71-
CMAKE_ARGS -DMKLDNN_INCLUDE_DIR=${MKLDNN_INC_DIR}
72-
CMAKE_ARGS -DMKLDNN_LIB_DIR=${MKLDNN_INSTALL_DIR}/${CMAKE_INSTALL_LIBDIR}
73-
CMAKE_ARGS -DMKLML_LIB_DIR=${MKLML_INSTALL_DIR}/lib
60+
DEPENDS ${MKLDNN_PROJECT} ${MKLML_PROJECT}
61+
GIT_REPOSITORY ${NGRAPH_GIT_REPO}
62+
GIT_TAG ${NGRAPH_GIT_TAG}
63+
PREFIX ${NGRAPH_SOURCES_DIR}
64+
UPDATE_COMMAND ""
65+
CMAKE_GENERATOR ${CMAKE_GENERATOR}
66+
CMAKE_GENERATOR_PLATFORM ${CMAKE_GENERATOR_PLATFORM}
67+
CMAKE_GENERATOR_TOOLSET ${CMAKE_GENERATOR_TOOLSET}
68+
CMAKE_ARGS -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
69+
CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
70+
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${NGRAPH_INSTALL_DIR}
71+
CMAKE_ARGS -DNGRAPH_UNIT_TEST_ENABLE=FALSE
72+
CMAKE_ARGS -DNGRAPH_TOOLS_ENABLE=FALSE
73+
CMAKE_ARGS -DNGRAPH_INTERPRETER_ENABLE=FALSE
74+
CMAKE_ARGS -DNGRAPH_DEX_ONLY=TRUE
75+
CMAKE_ARGS -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
76+
CMAKE_ARGS -DMKLDNN_INCLUDE_DIR=${MKLDNN_INC_DIR}
77+
CMAKE_ARGS -DMKLDNN_LIB_DIR=${MKLDNN_INSTALL_DIR}/${CMAKE_INSTALL_LIBDIR}
78+
CMAKE_ARGS -DMKLML_LIB_DIR=${MKLML_INSTALL_DIR}/lib
7479
)
7580

7681
add_dependencies(ngraph ${NGRAPH_PROJECT})

cmake/inference_lib.cmake

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,15 @@ elseif (NOT CBLAS_FOUND OR WIN32)
131131
)
132132
endif ()
133133

134+
if (WITH_GPU AND NOT WIN32)
135+
set(dgc_dir "${FLUID_INSTALL_DIR}/third_party/install/dgc")
136+
copy(dgc_lib
137+
SRCS ${DGC_INSTALL_DIR}/lib ${DGC_INSTALL_DIR}/include
138+
DSTS ${dgc_dir} ${dgc_dir}
139+
DEPS dgc)
140+
endif()
141+
142+
134143
if (WITH_MKLDNN)
135144
set(dst_dir "${FLUID_INSTALL_DIR}/third_party/install/mkldnn")
136145
copy(mkldnn_lib

cmake/operators.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ function(op_library TARGET)
110110
# Define operators that don't need pybind here.
111111
foreach(manual_pybind_op "compare_op" "logical_op" "nccl_op"
112112
"tensor_array_read_write_op" "tensorrt_engine_op" "conv_fusion_op"
113-
"fusion_transpose_flatten_concat_op" "fusion_conv_inception_op" "sync_batch_norm_op")
113+
"fusion_transpose_flatten_concat_op" "fusion_conv_inception_op" "sync_batch_norm_op" "dgc_op")
114114
if ("${TARGET}" STREQUAL "${manual_pybind_op}")
115115
set(pybind_flag 1)
116116
endif()

paddle/fluid/API.spec

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -483,6 +483,11 @@ paddle.fluid.optimizer.LarsMomentumOptimizer.apply_gradients (ArgSpec(args=['sel
483483
paddle.fluid.optimizer.LarsMomentumOptimizer.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'ba3a113d0229ff7bc9d39bda0a6d947f'))
484484
paddle.fluid.optimizer.LarsMomentumOptimizer.get_opti_var_name_list (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
485485
paddle.fluid.optimizer.LarsMomentumOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '35fd5d3330c97903528c7e0dacc7f6ea'))
486+
paddle.fluid.optimizer.DGCMomentumOptimizer.__init__ (ArgSpec(args=['self', 'learning_rate', 'momentum', 'rampup_begin_step', 'rampup_step', 'sparsity', 'use_nesterov', 'local_grad_clip_norm', 'num_trainers', 'regularization', 'name'], varargs=None, keywords=None, defaults=(1, [0.999], False, None, None, None, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
487+
paddle.fluid.optimizer.DGCMomentumOptimizer.apply_gradients (ArgSpec(args=['self', 'params_grads'], varargs=None, keywords=None, defaults=None), ('document', 'bfe7305918552aaecfdaa22411dbe871'))
488+
paddle.fluid.optimizer.DGCMomentumOptimizer.backward (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None, None)), ('document', 'ba3a113d0229ff7bc9d39bda0a6d947f'))
489+
paddle.fluid.optimizer.DGCMomentumOptimizer.get_opti_var_name_list (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
490+
paddle.fluid.optimizer.DGCMomentumOptimizer.minimize (ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '35fd5d3330c97903528c7e0dacc7f6ea'))
486491
paddle.fluid.backward.append_backward (ArgSpec(args=['loss', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None)), ('document', '1a79bd7d10ae54ca763ec81bca36ba24'))
487492
paddle.fluid.regularizer.L1DecayRegularizer.__init__ (ArgSpec(args=['self', 'regularization_coeff'], varargs=None, keywords=None, defaults=(0.0,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
488493
paddle.fluid.regularizer.L2DecayRegularizer.__init__ (ArgSpec(args=['self', 'regularization_coeff'], varargs=None, keywords=None, defaults=(0.0,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))

paddle/fluid/framework/data_layout_transform.cc

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,11 @@ void TransDataLayoutFromMKLDNN(const OpKernelType& kernel_type_for_var,
134134
out_layout =
135135
out_layout == DataLayout::kAnyLayout ? DataLayout::kNCHW : out_layout;
136136

137+
auto& pool = platform::DeviceContextPool::Instance();
138+
auto* dev_ctx = dynamic_cast<platform::MKLDNNDeviceContext*>(
139+
pool.Get(expected_kernel_type.place_));
140+
auto& cpu_engine = dev_ctx->GetEngine();
141+
137142
std::vector<int> in_tz = paddle::framework::vectorize2int(in.dims());
138143
std::vector<int> out_tz = in_tz;
139144

@@ -142,25 +147,29 @@ void TransDataLayoutFromMKLDNN(const OpKernelType& kernel_type_for_var,
142147
"Input tensor type is not supported: %s", in.type());
143148
memory::data_type out_type = in_type;
144149

150+
auto in_format = platform::MKLDNNFormatForSize(in_tz.size(), in.format());
151+
auto out_format =
152+
platform::MKLDNNFormatForSize(in_tz.size(), ToMKLDNNFormat(out_layout));
153+
145154
// output tensor has the same dims as input. Reorder don't change dims
146155
out->Resize(in.dims());
147156

148-
// tempory mem pd fr out , to make reorder
149-
auto out_mem_pd = paddle::platform::create_prim_desc_from_dims(
150-
paddle::framework::vectorize2int(out->dims()),
151-
mkldnn::memory::format::blocked, out_type);
152-
if (in.get_mkldnn_prim_desc() != out_mem_pd) {
157+
if (in_format != out_format) {
153158
void* in_data = GetDataFromTensor(in, in_type);
154159
auto out_data = out->mutable_data(expected_kernel_type.place_, in.type());
155160

156-
auto in_memory = memory(in.get_mkldnn_prim_desc(), in_data);
157-
auto out_memory = memory(out_mem_pd, out_data);
161+
auto in_memory =
162+
memory({{{in_tz}, in_type, in_format}, cpu_engine}, in_data);
163+
auto out_memory =
164+
memory({{{out_tz}, out_type, out_format}, cpu_engine}, out_data);
158165

159166
platform::Reorder(in_memory, out_memory);
160167
} else {
161168
out->ShareDataWith(in);
162169
}
163170
out->set_layout(out_layout);
171+
// reset format since the out tensor will be feed to non-MKLDNN OPkernel
172+
out->set_format(memory::format::format_undef);
164173
#endif
165174
}
166175

paddle/fluid/framework/data_transform.cc

Lines changed: 6 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -51,31 +51,13 @@ void TransformData(const OpKernelType &expected_kernel_type,
5151
#ifdef PADDLE_WITH_MKLDNN
5252
// Case1 - transform from Non-MKLDNN OPKernel to MKLDNN OPKernel
5353
// Just set layout/format. No real transform occur
54+
55+
auto out_format = platform::MKLDNNFormatForSize(in.dims().size(),
56+
ToMKLDNNFormat(lin));
57+
5458
out.ShareDataWith(input_tensor);
55-
// TODO(jczaja): Remove that once all mkldnn ops
56-
// are modified to work with mkldnn_blocked
57-
auto mkldnn_fmt = [&](int rank) {
58-
switch (rank) {
59-
case 5:
60-
return mkldnn::memory::format::ncdhw;
61-
case 4:
62-
return mkldnn::memory::format::nchw;
63-
case 3:
64-
return mkldnn::memory::format::ncw;
65-
case 2:
66-
return mkldnn::memory::format::nc;
67-
case 1:
68-
return mkldnn::memory::format::x;
69-
default:
70-
return mkldnn::memory::format::blocked;
71-
}
72-
};
73-
74-
auto out_mem_pd = paddle::platform::create_prim_desc_from_dims(
75-
paddle::framework::vectorize2int(out.dims()),
76-
mkldnn_fmt(out.dims().size()));
77-
78-
out.set_mkldnn_prim_desc(out_mem_pd);
59+
out.set_layout(DataLayout::kMKLDNN);
60+
out.set_format(out_format);
7961
#endif
8062
} else {
8163
// Case2 - transfrom from MKLDNN OPKernel to Non-MKLDNN OPKernel

paddle/fluid/framework/details/CMakeLists.txt

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,10 @@ cc_library(fetch_barrier_op_handle SRCS fetch_barrier_op_handle.cc DEPS framewor
1010
cc_library(multi_devices_helper SRCS multi_devices_helper.cc DEPS graph graph_helper)
1111
cc_library(multi_devices_graph_print_pass SRCS multi_devices_graph_print_pass.cc DEPS multi_devices_helper)
1212
cc_library(multi_devices_graph_check_pass SRCS multi_devices_graph_check_pass.cc DEPS multi_devices_helper)
13+
1314
cc_library(alloc_continuous_space_for_grad_pass SRCS alloc_continuous_space_for_grad_pass.cc DEPS graph graph_helper)
15+
cc_library(fuse_adam_op_pass SRCS fuse_adam_op_pass.cc fuse_optimizer_op_pass.cc DEPS graph graph_helper)
16+
cc_library(fuse_sgd_op_pass SRCS fuse_sgd_op_pass.cc fuse_optimizer_op_pass.cc DEPS graph graph_helper)
1417

1518
cc_library(variable_visitor SRCS variable_visitor.cc DEPS lod_tensor selected_rows)
1619

@@ -23,7 +26,7 @@ endif()
2326

2427
if(WITH_GPU)
2528
nv_library(all_reduce_op_handle SRCS all_reduce_op_handle.cc DEPS op_handle_base scope lod_tensor ddim memory
26-
dynload_cuda variable_visitor)
29+
dynload_cuda variable_visitor dgc)
2730
nv_library(fused_all_reduce_op_handle SRCS fused_all_reduce_op_handle.cc DEPS op_handle_base scope lod_tensor ddim memory
2831
dynload_cuda variable_visitor)
2932
if(WITH_DISTRIBUTE)
@@ -104,5 +107,7 @@ cc_library(build_strategy SRCS build_strategy.cc DEPS
104107
graph_viz_pass multi_devices_graph_pass
105108
multi_devices_graph_print_pass multi_devices_graph_check_pass
106109
fuse_elewise_add_act_pass multi_batch_merge_pass
107-
fuse_relu_depthwise_conv_pass
108-
memory_optimize_pass lock_free_optimize_pass alloc_continuous_space_for_grad_pass fuse_all_reduce_op_pass)
110+
fuse_relu_depthwise_conv_pass
111+
memory_optimize_pass lock_free_optimize_pass
112+
alloc_continuous_space_for_grad_pass fuse_all_reduce_op_pass
113+
fuse_adam_op_pass fuse_sgd_op_pass)

paddle/fluid/framework/details/all_reduce_deps_pass.cc

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,7 @@ VarHandle* GetValidInput(const OpHandleBase* a) {
4242
return nullptr;
4343
}
4444

45-
std::unique_ptr<ir::Graph> AllReduceDepsPass::ApplyImpl(
46-
std::unique_ptr<ir::Graph> graph) const {
45+
void AllReduceDepsPass::ApplyImpl(ir::Graph* graph) const {
4746
auto graph_ops = ir::FilterByNodeWrapper<OpHandleBase>(*graph);
4847

4948
// get vars order
@@ -86,7 +85,8 @@ std::unique_ptr<ir::Graph> AllReduceDepsPass::ApplyImpl(
8685
}
8786
}
8887

89-
VLOG(10) << "dist_ops size:" << dist_ops.size() << std::endl;
88+
VLOG(10) << "dist_ops size:" << dist_ops.size()
89+
<< ", outputs size:" << vars.size() << ", ops size:" << ops.size();
9090

9191
std::sort(dist_ops.begin(), dist_ops.end(), [&](OpHandleBase* op1,
9292
OpHandleBase* op2) {
@@ -99,6 +99,10 @@ std::unique_ptr<ir::Graph> AllReduceDepsPass::ApplyImpl(
9999
auto l_it = vars.find(i0->name());
100100
auto r_it = vars.find(i1->name());
101101

102+
PADDLE_ENFORCE(l_it != vars.end() && r_it != vars.end(),
103+
"can't find var's name %s and %s in opdesc", i0->name(),
104+
i1->name());
105+
102106
if (l_it->second < r_it->second) return true;
103107

104108
if (l_it->second == r_it->second) {
@@ -126,8 +130,6 @@ std::unique_ptr<ir::Graph> AllReduceDepsPass::ApplyImpl(
126130
VLOG(10) << "pre_op:" << pre_op->DebugString()
127131
<< ", op:" << op->DebugString();
128132
}
129-
130-
return graph;
131133
}
132134

133135
} // namespace details

0 commit comments

Comments
 (0)