Skip to content

Commit 97f1b98

Browse files
committed
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into seq_expand_op
2 parents 2961674 + 630644e commit 97f1b98

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

86 files changed

+4401
-784
lines changed

CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,7 @@ include(external/warpctc) # download, build, install warpctc
127127
include(external/any) # download libn::any
128128
include(external/eigen) # download eigen3
129129
include(external/pybind11) # download pybind11
130+
include(external/nccl)
130131

131132
include(cudnn) # set cudnn libraries, must before configure
132133
include(configure) # add paddle env configuration
@@ -159,7 +160,7 @@ set(EXTERNAL_LIBS
159160
if(WITH_GPU)
160161
list(APPEND EXTERNAL_LIBS ${CUDA_LIBRARIES} ${CUDA_rt_LIBRARY})
161162
if(NOT WITH_DSO)
162-
list(APPEND EXTERNAL_LIBS ${CUDNN_LIBRARY} ${CUDA_CUBLAS_LIBRARIES} ${CUDA_curand_LIBRARY})
163+
list(APPEND EXTERNAL_LIBS ${CUDNN_LIBRARY} ${CUDA_CUBLAS_LIBRARIES} ${CUDA_curand_LIBRARY} ${NCCL_LIBRARY})
163164
endif(NOT WITH_DSO)
164165
endif(WITH_GPU)
165166

cmake/configure.cmake

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,11 +62,11 @@ else()
6262
FIND_PACKAGE(CUDA REQUIRED)
6363

6464
if(${CUDA_VERSION_MAJOR} VERSION_LESS 7)
65-
message(FATAL_ERROR "Paddle need CUDA >= 7.0 to compile")
65+
message(FATAL_ERROR "Paddle needs CUDA >= 7.0 to compile")
6666
endif()
6767

6868
if(NOT CUDNN_FOUND)
69-
message(FATAL_ERROR "Paddle need cudnn to compile")
69+
message(FATAL_ERROR "Paddle needs cudnn to compile")
7070
endif()
7171

7272
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Xcompiler ${SIMD_FLAG}")

cmake/external/nccl.cmake

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
INCLUDE(ExternalProject)
2+
3+
SET(NCCL_SOURCE_DIR ${THIRD_PARTY_PATH}/nccl)
4+
5+
INCLUDE_DIRECTORIES(${NCCL_SOURCE_DIR}/src/extern_nccl/src)
6+
7+
8+
if(WITH_DSO)
9+
# If we use DSO, we do not build nccl, just download the dependencies
10+
set(NCCL_BUILD_COMMAND "")
11+
set(NCCL_INSTALL_COMMAND "")
12+
set(NCCL_INSTALL_DIR "")
13+
else()
14+
# otherwise, we build nccl and link it.
15+
set(NCCL_BUILD_COMMAND "make -j 8")
16+
set(NCCL_INSTALL_COMMAND "make install")
17+
SET(NCCL_INSTALL_DIR ${THIRD_PARTY_PATH}/install/nccl)
18+
endif()
19+
20+
ExternalProject_Add(
21+
extern_nccl
22+
${EXTERNAL_PROJECT_LOG_ARGS}
23+
GIT_REPOSITORY "https://github.com/NVIDIA/nccl.git"
24+
GIT_TAG "v1.3.4-1"
25+
PREFIX "${NCCL_SOURCE_DIR}"
26+
UPDATE_COMMAND ""
27+
CONFIGURE_COMMAND ""
28+
BUILD_COMMAND "${NCCL_BUILD_COMMAND}"
29+
INSTALL_COMMAND "${NCCL_INSTALL_COMMAND}"
30+
INSTALL_DIR "${NCCL_INSTALL_DIR}"
31+
TEST_COMMAND ""
32+
)
33+
34+
if (WITH_DSO)
35+
if (${CMAKE_VERSION} VERSION_LESS "3.3.0")
36+
set(dummyfile ${CMAKE_CURRENT_BINARY_DIR}/lib_any_dummy.c)
37+
file(WRITE ${dummyfile} "const char * dummy_any = \"${dummyfile}\";")
38+
add_library(nccl STATIC ${dummyfile})
39+
else()
40+
add_library(nccl INTERFACE)
41+
endif()
42+
else()
43+
ADD_LIBRARY(nccl STATIC IMPORTED GLOBAL)
44+
SET_PROPERTY(TARGET nccl PROPERTY IMPORTED_LOCATION
45+
${NCCL_INSTALL_DIR}/lib/libnccl.a)
46+
endif()
47+
48+
add_dependencies(nccl extern_nccl)
49+
50+
LIST(APPEND external_project_dependencies nccl)

paddle/framework/op_info.h

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -87,11 +87,8 @@ class OpInfoMap {
8787
}
8888
}
8989

90-
template <typename Callback>
91-
void IterAllInfo(Callback callback) {
92-
for (auto& it : map_) {
93-
callback(it.first, it.second);
94-
}
90+
const std::unordered_map<std::string, const OpInfo>& map() const {
91+
return map_;
9592
}
9693

9794
private:

paddle/framework/var_desc.cc

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,10 @@ limitations under the License. */
1818
namespace paddle {
1919
namespace framework {
2020

21+
VarDesc::VarType VarDescBind::GetType() const { return desc_.type(); }
22+
23+
void VarDescBind::SetType(VarDesc::VarType type) { desc_.set_type(type); }
24+
2125
void VarDescBind::SetShape(const std::vector<int64_t> &dims) {
2226
VectorToRepeated(dims, mutable_tensor_desc()->mutable_dims());
2327
}

paddle/framework/var_desc.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,9 +75,9 @@ class VarDescBind {
7575

7676
int32_t GetLodLevel() const;
7777

78-
VarDesc::VarType GetType() const { return desc_.type(); }
78+
VarDesc::VarType GetType() const;
7979

80-
void SetType(VarDesc::VarType type) { desc_.set_type(type); }
80+
void SetType(VarDesc::VarType type);
8181

8282
bool Persistable() const { return desc_.persistable(); }
8383

paddle/gserver/layers/MKLDNNLayer.h

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -339,9 +339,13 @@ class MKLDNNLayer : public Layer {
339339
* clear all grad
340340
*/
341341
void clearGrads() {
342-
output_.grad->zeroMem();
342+
if (output_.grad) {
343+
output_.grad->zeroMem();
344+
}
343345
for (size_t i = 0; i < outputOtherDevice_.size(); i++) {
344-
outputOtherDevice_[i].grad->zeroMem();
346+
if (outputOtherDevice_[i].grad) {
347+
outputOtherDevice_[i].grad->zeroMem();
348+
}
345349
}
346350
}
347351

paddle/operators/CMakeLists.txt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,8 @@ set(DEPS_OPS
115115
softmax_with_cross_entropy_op
116116
sum_op
117117
pool_op
118-
pool_with_index_op)
118+
pool_with_index_op
119+
lstm_op)
119120

120121

121122
op_library(recurrent_op SRCS recurrent_op.cc rnn/recurrent_op_utils.cc
@@ -126,6 +127,7 @@ op_library(softmax_with_cross_entropy_op DEPS cross_entropy softmax)
126127
op_library(sum_op DEPS net_op)
127128
op_library(pool_op DEPS pooling)
128129
op_library(pool_with_index_op DEPS pooling)
130+
op_library(lstm_op DEPS sequence2batch lstm_compute)
129131

130132
list(REMOVE_ITEM GENERAL_OPS ${DEPS_OPS})
131133
foreach(src ${GENERAL_OPS})

paddle/operators/conv2d_op.h

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ class GemmConv2DKernel : public framework::OpKernel<T> {
114114
// im2col
115115
Tensor in_slice = in_batch.Slice(g * in_step, (g + 1) * in_step);
116116
im2col(context.device_context(), in_slice, col, strides[0], strides[1],
117-
paddings[0], paddings[1]);
117+
paddings[0], paddings[0], paddings[1], paddings[1]);
118118

119119
// gemm
120120
Tensor out_slice = out_batch.Slice(g * out_step, (g + 1) * out_step);
@@ -213,7 +213,8 @@ class GemmConvGrad2DKernel : public framework::OpKernel<T> {
213213
Tensor in_grad_slice =
214214
in_grad_batch.Slice(g * in_step, (g + 1) * in_step);
215215
col2im(context.device_context(), in_grad_slice, col, strides[0],
216-
strides[1], paddings[0], paddings[1]);
216+
strides[1], paddings[0], paddings[0], paddings[1],
217+
paddings[1]);
217218
}
218219
}
219220
}
@@ -235,7 +236,8 @@ class GemmConvGrad2DKernel : public framework::OpKernel<T> {
235236
out_grad_batch.Slice(g * out_step, (g + 1) * out_step);
236237
Tensor in_slice = in_batch.Slice(g * in_step, (g + 1) * in_step);
237238
im2col(context.device_context(), in_slice, col, strides[0],
238-
strides[1], paddings[0], paddings[1]);
239+
strides[1], paddings[0], paddings[0], paddings[1],
240+
paddings[1]);
239241

240242
// gemm
241243
Tensor filter_grad_slice =
Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
2+
3+
Licensed under the Apache License, Version 2.0 (the "License");
4+
you may not use this file except in compliance with the License.
5+
You may obtain a copy of the License at
6+
7+
http://www.apache.org/licenses/LICENSE-2.0
8+
9+
Unless required by applicable law or agreed to in writing, software
10+
distributed under the License is distributed on an "AS IS" BASIS,
11+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
See the License for the specific language governing permissions and
13+
limitations under the License. */
14+
15+
#include "paddle/operators/conv2dtranspose_op.h"
16+
17+
namespace paddle {
18+
namespace operators {
19+
20+
void Conv2DTransposeOp::InferShape(framework::InferShapeContext* ctx) const {
21+
PADDLE_ENFORCE(ctx->HasInput("Input"),
22+
"Input(Input) of Conv2DTransposeOp should not be null.");
23+
PADDLE_ENFORCE(ctx->HasInput("Filter"),
24+
"Input(Filter) of Conv2DTransposeOp should not be null.");
25+
PADDLE_ENFORCE(ctx->HasOutput("Output"),
26+
"Output(Output) of Conv2DTransposeOp should not be null.");
27+
28+
auto in_dims = ctx->GetInputDim("Input");
29+
auto filter_dims = ctx->GetInputDim("Filter");
30+
std::vector<int> strides = ctx->Attrs().Get<std::vector<int>>("strides");
31+
std::vector<int> paddings = ctx->Attrs().Get<std::vector<int>>("paddings");
32+
33+
for (size_t i = 0; i < paddings.size(); ++i) {
34+
PADDLE_ENFORCE_EQ(paddings[i], 0,
35+
"No Padding allowed in conv transpose op.");
36+
}
37+
38+
PADDLE_ENFORCE_EQ(in_dims.size(), 4,
39+
"Conv2DTransposeOp input should be 4-D tensor.");
40+
PADDLE_ENFORCE_EQ(filter_dims.size(), 4,
41+
"Conv2DTransposeOp filter should be 4-D tensor.");
42+
PADDLE_ENFORCE_EQ(in_dims[1], filter_dims[0],
43+
"input and kernel input dimension should be equal.");
44+
45+
auto output_height = (in_dims[2] - 1) * strides[0] + filter_dims[2];
46+
auto output_width = (in_dims[3] - 1) * strides[1] + filter_dims[3];
47+
ctx->SetOutputDim("Output",
48+
{in_dims[0], filter_dims[1], output_height, output_width});
49+
}
50+
51+
Conv2DTransposeOpMaker::Conv2DTransposeOpMaker(
52+
framework::OpProto* proto, framework::OpAttrChecker* op_checker)
53+
: OpProtoAndCheckerMaker(proto, op_checker) {
54+
AddInput(
55+
"Input",
56+
"(Tensor) The input tensor of convolution transpose operator. "
57+
"The format of input tensor is NCHW. Where N is batch size, C is the "
58+
"number of input channels, H and W is the height and width of image.");
59+
AddInput("Filter",
60+
"(Tensor) The filter tensor of convolution transpose operator."
61+
"The format of the filter tensor is CMHW, where C is the number of "
62+
"output image channels, M is the number of input image channels, "
63+
"H and W is height and width of filter. "
64+
"We enforce groups number == 1 and padding == 0 in "
65+
"convolution transpose Scenario.");
66+
AddOutput("Output",
67+
"(Tensor) The output tensor of convolution transpose operator."
68+
"The format of output tensor is also NCHW.");
69+
AddAttr<std::vector<int>>("strides",
70+
"strides of convolution transpose operator.")
71+
.SetDefault({1, 1});
72+
AddAttr<std::vector<int>>("paddings",
73+
"paddings of convolution transpose operator.")
74+
.SetDefault({0, 0});
75+
AddComment(R"DOC(
76+
The convolution transpose operation calculates the output based on the input, filter
77+
and strides, paddings, groups parameters. The size of each dimension of the
78+
parameters is checked in the infer-shape.
79+
)DOC");
80+
}
81+
82+
void Conv2DTransposeOpGrad::InferShape(
83+
framework::InferShapeContext* ctx) const {
84+
auto in_dims = ctx->GetInputDim("Input");
85+
auto filter_dims = ctx->GetInputDim("Filter");
86+
if (ctx->HasOutput(framework::GradVarName("Input"))) {
87+
ctx->SetOutputDim(framework::GradVarName("Input"), in_dims);
88+
}
89+
if (ctx->HasOutput(framework::GradVarName("Filter"))) {
90+
ctx->SetOutputDim(framework::GradVarName("Filter"), filter_dims);
91+
}
92+
}
93+
94+
} // namespace operators
95+
} // namespace paddle
96+
97+
namespace ops = paddle::operators;
98+
REGISTER_OP(conv2dtranspose, ops::Conv2DTransposeOp,
99+
ops::Conv2DTransposeOpMaker, conv2dtranspose_grad,
100+
ops::Conv2DTransposeOpGrad);
101+
102+
REGISTER_OP_CPU_KERNEL(
103+
conv2dtranspose,
104+
ops::GemmConv2DTransposeKernel<paddle::platform::CPUPlace, float>);
105+
REGISTER_OP_CPU_KERNEL(
106+
conv2dtranspose_grad,
107+
ops::GemmConv2DTransposeGradKernel<paddle::platform::CPUPlace, float>);

0 commit comments

Comments
 (0)