Skip to content

Commit 2f33f74

Browse files
committed
Merge remote-tracking branch 'origin/develop' into feature/evaluator
2 parents 12858ba + c3a6134 commit 2f33f74

File tree

213 files changed

+1643
-750
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

213 files changed

+1643
-750
lines changed

.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ third_party/
2121
cmake-build-*
2222

2323
# generated while compiling
24-
python/paddle/v2/framework/core.so
24+
python/paddle/v2/fluid/core.so
2525
paddle/pybind/pybind.h
2626
CMakeFiles
2727
cmake_install.cmake

paddle/capi/Matrix.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,7 @@ paddle_error paddle_matrix_get_shape(paddle_matrix mat,
121121

122122
paddle_matrix paddle_matrix_create_sparse(
123123
uint64_t height, uint64_t width, uint64_t nnz, bool isBinary, bool useGpu) {
124+
#ifndef PADDLE_MOBILE_INFERENCE
124125
auto ptr = new paddle::capi::CMatrix();
125126
ptr->mat = paddle::Matrix::createSparseMatrix(
126127
height,
@@ -131,6 +132,9 @@ paddle_matrix paddle_matrix_create_sparse(
131132
false,
132133
useGpu);
133134
return ptr;
135+
#else
136+
return nullptr;
137+
#endif
134138
}
135139

136140
paddle_error paddle_matrix_sparse_copy_from(paddle_matrix mat,
@@ -140,6 +144,7 @@ paddle_error paddle_matrix_sparse_copy_from(paddle_matrix mat,
140144
uint64_t colSize,
141145
float* valueArray,
142146
uint64_t valueSize) {
147+
#ifndef PADDLE_MOBILE_INFERENCE
143148
if (mat == nullptr) return kPD_NULLPTR;
144149
auto ptr = cast(mat);
145150
if (rowArray == nullptr || colArray == nullptr ||
@@ -160,4 +165,7 @@ paddle_error paddle_matrix_sparse_copy_from(paddle_matrix mat,
160165
} else {
161166
return kPD_NOT_SUPPORTED;
162167
}
168+
#else
169+
return kPD_NOT_SUPPORTED;
170+
#endif
163171
}

paddle/capi/matrix.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ PD_API paddle_matrix paddle_matrix_create(uint64_t height,
4848
* @param isBinary is binary (either 1 or 0 in matrix) or not.
4949
* @param useGpu is using GPU or not.
5050
* @return paddle_matrix.
51+
* @note Mobile inference does not support this interface.
5152
*/
5253
PD_API paddle_matrix paddle_matrix_create_sparse(
5354
uint64_t height, uint64_t width, uint64_t nnz, bool isBinary, bool useGpu);
@@ -129,6 +130,7 @@ PD_API paddle_error paddle_matrix_get_shape(paddle_matrix mat,
129130
* NULL if the matrix is binary.
130131
* @param [in] valueSize length of value array. Zero if the matrix is binary.
131132
* @return paddle_error
133+
* @note Mobile inference does not support this interface.
132134
*/
133135
PD_API paddle_error paddle_matrix_sparse_copy_from(paddle_matrix mat,
134136
int* rowArray,

paddle/cuda/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,9 @@ if(WITH_GPU)
2727
set_source_files_properties(${CUDA_CXX_SOURCES}
2828
PROPERTIES COMPILE_FLAGS "-D__NVCC__")
2929
else()
30+
if (NOT MOBILE_INFERENCE)
3031
set(CUDA_CXX_SOURCES src/hl_warpctc_wrap.cc)
32+
endif()
3133
endif()
3234

3335
set(CUDA_CU_SOURCES

paddle/cuda/include/hl_cnn.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ limitations under the License. */
1818
#include "hl_base.h"
1919

2020
/**
21-
* @brief Maximum pool forward.
21+
* @brief Maximum pool forward with Mask output.
2222
*
2323
* @param[in] frameCnt batch size of input image.
2424
* @param[in] inputData input data.
@@ -35,7 +35,7 @@ limitations under the License. */
3535
* @param[in] paddingW padding width.
3636
* @param[out] tgtData output data.
3737
* @param[in] tgtStride stride between output data samples.
38-
*
38+
* @param[out] maskData the location indices of select max data.
3939
*/
4040
extern void hl_maxpool_forward(const int frameCnt,
4141
const real* inputData,
@@ -51,7 +51,8 @@ extern void hl_maxpool_forward(const int frameCnt,
5151
const int paddingH,
5252
const int paddingW,
5353
real* tgtData,
54-
const int tgtStride);
54+
const int tgtStride,
55+
real* maskData = NULL);
5556

5657
/**
5758
* @brief Maximum pool backward.

paddle/cuda/include/stub/hl_cnn_stub.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,8 @@ inline void hl_maxpool_forward(const int frameCnt,
3131
const int paddingH,
3232
const int paddingW,
3333
real* tgtData,
34-
const int tgtStride) {}
34+
const int tgtStride,
35+
real* MaskData) {}
3536

3637
inline void hl_maxpool_backward(const int frameCnt,
3738
const real* inputData,

paddle/cuda/src/hl_cuda_cnn.cu

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,8 @@ __global__ void KeMaxPoolForward(const int nthreads,
3131
const int offsetH,
3232
const int offsetW,
3333
real* tgtData,
34-
const int tgtStride) {
34+
const int tgtStride,
35+
real* maskData) {
3536
int index = blockIdx.x * blockDim.x + threadIdx.x;
3637
if (index < nthreads) {
3738
int pw = index % pooledW;
@@ -45,16 +46,22 @@ __global__ void KeMaxPoolForward(const int nthreads,
4546
hstart = max(hstart, 0);
4647
wstart = max(wstart, 0);
4748
real maxval = -FLT_MAX;
49+
int max_index = -1;
4850
inputData += (frameNum * channels + c) * height * width;
4951
for (int h = hstart; h < hend; ++h) {
5052
for (int w = wstart; w < wend; ++w) {
51-
if (maxval < inputData[h * width + w])
52-
maxval = inputData[h * width + w];
53+
if (maxval < inputData[h * width + w]) {
54+
max_index = h * width + w;
55+
maxval = inputData[max_index];
56+
}
5357
}
5458
}
5559
int tgtIndex =
5660
index % (pooledW * pooledH * channels) + frameNum * tgtStride;
5761
tgtData[tgtIndex] = maxval;
62+
if (maskData != NULL) {
63+
maskData[tgtIndex] = max_index;
64+
}
5865
}
5966
}
6067

@@ -72,7 +79,8 @@ void hl_maxpool_forward(const int frameCnt,
7279
const int paddingH,
7380
const int paddingW,
7481
real* tgtData,
75-
const int tgtStride) {
82+
const int tgtStride,
83+
real* maskData) {
7684
int num_kernels = pooledH * pooledW * channels * frameCnt;
7785
int blocks = (num_kernels + 1024 - 1) / 1024;
7886
dim3 threads(1024, 1);
@@ -92,7 +100,8 @@ void hl_maxpool_forward(const int frameCnt,
92100
paddingH,
93101
paddingW,
94102
tgtData,
95-
tgtStride);
103+
tgtStride,
104+
maskData);
96105
CHECK_SYNC("hl_maxpool_forward failed");
97106
}
98107

paddle/function/ConvOp.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ class ConvFunctionBase : public FunctionBase {
6161
// function arguments
6262
strides_ = config.get<std::vector<size_t>>("strides");
6363
paddings_ = config.get<std::vector<size_t>>("paddings");
64+
dilations_ = config.get<std::vector<size_t>>("dilations");
6465
groups_ = config.get<size_t>("groups");
6566

6667
// number of inputs and outputs
@@ -118,6 +119,7 @@ class ConvFunctionBase : public FunctionBase {
118119

119120
std::vector<size_t> strides_;
120121
std::vector<size_t> paddings_;
122+
std::vector<size_t> dilations_;
121123

122124
/// Group size, refer to grouped convolution in
123125
/// Alex Krizhevsky's paper: when group=2, the first half of the
@@ -133,6 +135,10 @@ class ConvFunctionBase : public FunctionBase {
133135

134136
inline int paddingW() const { return paddings_[1]; }
135137

138+
inline int dilationH() const { return dilations_[0]; }
139+
140+
inline int dilationW() const { return dilations_[1]; }
141+
136142
// A temporary memory in convolution calculation.
137143
MemoryHandlePtr memory_;
138144

paddle/function/ConvOpTest.h

Lines changed: 53 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -79,45 +79,59 @@ void Convolution(const std::string& conv1,
7979
if (outputChannels < inputChannels) continue;
8080
for (size_t stride : {1, 2}) {
8181
for (size_t padding : {0, 1}) {
82-
if (padding >= filterSize) break;
82+
for (size_t dilation : {1, 3}) {
83+
if (padding >= filterSize) break;
84+
size_t filterS = (filterSize - 1) * dilation + 1;
8385

84-
// NNPACK only supports stride = 1 if batchSize > 1
85-
if ((conv1 == "NNPACKConv-CPU" || conv2 == "NNPACKConv-CPU") &&
86-
batchSize > 1 && stride > 1)
87-
break;
86+
if (inputSize + 2 * padding < filterS) break;
8887

89-
size_t outputSize =
90-
(inputSize - filterSize + 2 * padding + stride) / stride;
91-
VLOG(3) << " batchSize=" << batchSize
92-
<< " inputChannels=" << inputChannels
93-
<< " inputHeight=" << inputSize
94-
<< " inputWidth=" << inputSize
95-
<< " outputChannels=" << outputChannels
96-
<< " filterHeight=" << filterSize
97-
<< " filterWidth=" << filterSize
98-
<< " outputHeight=" << outputSize
99-
<< " outputWidth=" << outputSize << " stride=" << stride
100-
<< " padding=" << padding;
88+
if ((conv1 == "NaiveConv-CPU" || conv2 == "NaiveConv-CPU" ||
89+
conv1 == "NNPACKConv-CPU" ||
90+
conv2 == "NNPACKConv-CPU") &&
91+
dilation > 1)
92+
break;
10193

102-
std::vector<size_t> paddings = {padding, padding};
103-
std::vector<size_t> strides = {stride, stride};
104-
Compare2Function<DType1, DType2> test(
105-
conv1,
106-
conv2,
107-
FuncConfig()
108-
.set("paddings", paddings)
109-
.set("strides", strides)
110-
.set("groups", (size_t)1)
111-
.set("algo", (std::string) "auto"));
94+
// NNPACK only supports stride = 1 if batchSize > 1
95+
if ((conv1 == "NNPACKConv-CPU" ||
96+
conv2 == "NNPACKConv-CPU") &&
97+
batchSize > 1 && stride > 1)
98+
break;
11299

113-
TensorShape input{
114-
batchSize, inputChannels, inputSize, inputSize};
115-
TensorShape filter{
116-
outputChannels, inputChannels, filterSize, filterSize};
117-
TensorShape output{
118-
batchSize, outputChannels, outputSize, outputSize};
100+
size_t outputSize =
101+
(inputSize - filterS + 2 * padding + stride) / stride;
102+
VLOG(3) << " batchSize=" << batchSize
103+
<< " inputChannels=" << inputChannels
104+
<< " inputHeight=" << inputSize
105+
<< " inputWidth=" << inputSize
106+
<< " outputChannels=" << outputChannels
107+
<< " filterHeight=" << filterSize
108+
<< " filterWidth=" << filterSize
109+
<< " outputHeight=" << outputSize
110+
<< " outputWidth=" << outputSize
111+
<< " stride=" << stride << " padding=" << padding;
119112

120-
function(test, input, filter, output);
113+
std::vector<size_t> paddings = {padding, padding};
114+
std::vector<size_t> strides = {stride, stride};
115+
std::vector<size_t> dilations = {dilation, dilation};
116+
Compare2Function<DType1, DType2> test(
117+
conv1,
118+
conv2,
119+
FuncConfig()
120+
.set("paddings", paddings)
121+
.set("strides", strides)
122+
.set("dilations", dilations)
123+
.set("groups", (size_t)1)
124+
.set("algo", (std::string) "auto"));
125+
126+
TensorShape input{
127+
batchSize, inputChannels, inputSize, inputSize};
128+
TensorShape filter{
129+
outputChannels, inputChannels, filterSize, filterSize};
130+
TensorShape output{
131+
batchSize, outputChannels, outputSize, outputSize};
132+
133+
function(test, input, filter, output);
134+
}
121135
}
122136
}
123137
}
@@ -144,6 +158,7 @@ void Convolution2(const std::string& conv1,
144158
for (size_t outputChannels : {7}) {
145159
size_t stride = 1;
146160
size_t padding = 0;
161+
size_t dilation = 1;
147162
size_t outputHeight =
148163
(inputHeight - filterHeight + 2 * padding + stride) /
149164
stride;
@@ -162,13 +177,15 @@ void Convolution2(const std::string& conv1,
162177

163178
std::vector<size_t> paddings = {padding, padding};
164179
std::vector<size_t> strides = {stride, stride};
180+
std::vector<size_t> dilations = {dilation, dilation};
165181
Compare2Function<DType1, DType2> test(
166182
conv1,
167183
conv2,
168184
FuncConfig()
169185
.set("paddings", paddings)
170186
.set("strides", strides)
171187
.set("groups", (size_t)1)
188+
.set("dilations", dilations)
172189
.set("algo", (std::string) "auto"));
173190

174191
TensorShape input{
@@ -223,6 +240,7 @@ void DepthwiseConvolution(const std::string& conv1,
223240

224241
std::vector<size_t> paddings = {padding, padding};
225242
std::vector<size_t> strides = {stride, stride};
243+
std::vector<size_t> dilations = {1, 1};
226244
size_t groups = inputChannels;
227245
Compare2Function<DType1, DType2> test(
228246
conv1,
@@ -231,6 +249,7 @@ void DepthwiseConvolution(const std::string& conv1,
231249
.set("paddings", paddings)
232250
.set("strides", strides)
233251
.set("groups", groups)
252+
.set("dilations", dilations)
234253
.set("algo", (std::string) "auto"));
235254

236255
TensorShape input{

paddle/function/GemmConvOp.cpp

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,9 @@ class GemmConvFunction : public ConvFunctionBase {
100100
strideH(),
101101
strideW(),
102102
paddingH(),
103-
paddingW());
103+
paddingW(),
104+
dilationH(),
105+
dilationW());
104106
} else {
105107
colData = inputData + g * inputOffset;
106108
}
@@ -223,7 +225,9 @@ class GemmConvGradInputFunction : public ConvFunctionBase {
223225
strideH(),
224226
strideW(),
225227
paddingH(),
226-
paddingW());
228+
paddingW(),
229+
dilationH(),
230+
dilationW());
227231
}
228232
}
229233
inputGrad += inputChannels * inputHeight * inputWidth;
@@ -310,7 +314,9 @@ class GemmConvGradFilterFunction : public ConvFunctionBase {
310314
strideH(),
311315
strideW(),
312316
paddingH(),
313-
paddingW());
317+
paddingW(),
318+
dilationH(),
319+
dilationW());
314320
} else {
315321
colData = inputData + g * inputOffset;
316322
}

0 commit comments

Comments
 (0)