Skip to content

Commit b8afb14

Browse files
author
wangyang59
committed
cudnn deconv implememtation
1 parent 5a933b4 commit b8afb14

15 files changed

+789
-389
lines changed

paddle/gserver/CMakeLists.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,12 +25,16 @@ filter_test(GSERVER_HEADER)
2525
filter_test(GSERVER_SOURCES)
2626
if(NOT WITH_GPU)
2727
list(REMOVE_ITEM GSERVER_HEADER
28+
layers/CudnnConvBaseLayer.h
2829
layers/CudnnConvLayer.h
30+
layers/CudnnConvTransLayer.h
2931
layers/CudnnPoolLayer.h
3032
layers/CudnnBatchNormLayer.h)
3133

3234
list(REMOVE_ITEM GSERVER_SOURCES
35+
layers/CudnnConvBaseLayer.cpp
3336
layers/CudnnConvLayer.cpp
37+
layers/CudnnConvTransLayer.cpp
3438
layers/CudnnPoolLayer.cpp
3539
layers/CudnnBatchNormLayer.cpp)
3640
compile_cu_as_cpp(layers/LstmCompute.cu)
Lines changed: 204 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,204 @@
1+
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
2+
3+
Licensed under the Apache License, Version 2.0 (the "License");
4+
you may not use this file except in compliance with the License.
5+
You may obtain a copy of the License at
6+
7+
http://www.apache.org/licenses/LICENSE-2.0
8+
9+
Unless required by applicable law or agreed to in writing, software
10+
distributed under the License is distributed on an "AS IS" BASIS,
11+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
See the License for the specific language governing permissions and
13+
limitations under the License. */
14+
15+
#include "ConvBaseProjection.h"
16+
#include "paddle/utils/Stat.h"
17+
18+
namespace paddle {
19+
20+
ThreadLocalD<std::vector<MemoryHandle *>> ConvBaseProjection::convMem_;
21+
22+
ConvBaseProjection::ConvBaseProjection(const ProjectionConfig &config,
23+
ParameterPtr parameter,
24+
bool useGpu)
25+
: Projection(config, parameter, useGpu) {
26+
CHECK(useGpu); // only support GPU
27+
getConvParams();
28+
initCudnn();
29+
30+
size_t height = filterH_ * filterW_ * channels_ / groups_;
31+
size_t width = numFilters_;
32+
weight_.reset(new Weight(height, width, parameter));
33+
weightOffset_ = height * width / groups_;
34+
}
35+
36+
void ConvBaseProjection::getConvParams() {
37+
const ConvConfig &conf = config_.conv_conf();
38+
paddingH_ = conf.padding_y();
39+
paddingW_ = conf.padding();
40+
41+
strideH_ = conf.stride_y();
42+
strideW_ = conf.stride();
43+
44+
filterH_ = conf.filter_size_y();
45+
filterW_ = conf.filter_size();
46+
47+
configImgH_ = conf.has_img_size_y() ? conf.img_size_y() : conf.img_size();
48+
configImgW_ = conf.img_size();
49+
50+
configOutH_ = conf.has_output_y() ? conf.output_y() : conf.output_x();
51+
configOutW_ = conf.output_x();
52+
53+
configChannels_ = conf.channels();
54+
configNumFilters_ = config_.num_filters();
55+
56+
isDeconv_ = (config_.type() == "conv") ? false : true;
57+
58+
channels_ = (isDeconv_) ? configNumFilters_ : configChannels_;
59+
numFilters_ = (isDeconv_) ? configChannels_ : configNumFilters_;
60+
61+
groups_ = conf.groups();
62+
CHECK_EQ(channels_ % groups_, 0);
63+
CHECK_EQ(numFilters_ % groups_, 0);
64+
}
65+
66+
void ConvBaseProjection::initCudnn() {
67+
hl_create_filter_descriptor(&filterDesc_,
68+
channels_ / groups_,
69+
numFilters_ / groups_,
70+
filterH_,
71+
filterW_);
72+
hl_create_tensor_descriptor(&imageDesc_);
73+
hl_create_tensor_descriptor(&outputDesc_);
74+
hl_create_convolution_descriptor(&convDesc_,
75+
imageDesc_,
76+
filterDesc_,
77+
paddingH_,
78+
paddingW_,
79+
strideH_,
80+
strideW_);
81+
82+
// initialize all to default algorithms
83+
fwdAlgo_ = 0;
84+
bwdFilterAlgo_ = 0;
85+
bwdDataAlgo_ = 0;
86+
fwdLimitBytes_ = 0;
87+
bwdDataLimitBytes_ = 0;
88+
bwdFilterLimitBytes_ = 0;
89+
workSpaceInBytes_ = 0;
90+
91+
batchNum_ = 0;
92+
isSelectAlgo_ = false;
93+
}
94+
95+
void ConvBaseProjection::reshapeTensorDesc(int batchSize) {
96+
hl_tensor_reshape(imageDesc_,
97+
batchSize,
98+
channels_ / groups_,
99+
imageH_,
100+
imageW_,
101+
channels_ * imageH_ * imageW_,
102+
imageH_ * imageW_,
103+
imageW_,
104+
1);
105+
hl_reset_convolution_descriptor(convDesc_,
106+
imageDesc_,
107+
filterDesc_,
108+
paddingH_,
109+
paddingW_,
110+
strideH_,
111+
strideW_);
112+
113+
// The stride between two consecutive images in ConvProjection may not be 1,
114+
// for example, in the case of layer ConcatenateLayer2 with two
115+
// ConvProjection, the stride is the output_size of layer ConcatenateLayer2.
116+
// So the calculation of nStride is different from CudnnConvLayer.
117+
// In fact, only "nStride = out_->value->getStride()" is ok.
118+
// size_t nStride = numFilters_ * outputH_ * outputW_;
119+
// if (out_->value->isContiguous()) {
120+
// CHECK_EQ(nStride, out_->value->getWidth());
121+
// } else {
122+
// nStride = out_->value->getStride();
123+
// }
124+
size_t nStride = out_->value->getStride();
125+
126+
hl_tensor_reshape(outputDesc_,
127+
batchSize,
128+
numFilters_ / groups_,
129+
outputH_,
130+
outputW_,
131+
nStride,
132+
outputH_ * outputW_,
133+
outputW_,
134+
1);
135+
}
136+
137+
void ConvBaseProjection::reshape(int batchSize) {
138+
size_t width = calOutputSize();
139+
CHECK_EQ(width, out_->value->getWidth());
140+
if (isDeconv_) {
141+
CHECK_EQ(static_cast<size_t>(configChannels_ * outputH_ * outputW_),
142+
in_->value->getWidth())
143+
<< "Wrong input size for convolution transpose"
144+
<< " channels=" << configChannels_ << " outputH=" << outputH_
145+
<< " outputW=" << outputW_ << " inputSize=" << in_->value->getWidth();
146+
} else {
147+
CHECK_EQ(static_cast<size_t>(configChannels_ * imageH_ * imageW_),
148+
in_->value->getWidth())
149+
<< "Wrong input size for convolution"
150+
<< " channels=" << configChannels_ << " imageH=" << imageH_
151+
<< " imageW=" << imageW_ << " inputSize=" << in_->value->getWidth();
152+
}
153+
154+
isSelectAlgo_ = (batchSize == batchNum_);
155+
batchNum_ = batchSize;
156+
157+
if (!isSelectAlgo_) {
158+
reshapeTensorDesc(batchSize);
159+
hl_conv_workspace(imageDesc_,
160+
outputDesc_,
161+
filterDesc_,
162+
convDesc_,
163+
&fwdAlgo_,
164+
&fwdLimitBytes_,
165+
&bwdDataAlgo_,
166+
&bwdDataLimitBytes_,
167+
&bwdFilterAlgo_,
168+
&bwdFilterLimitBytes_);
169+
170+
size_t maxWorkSpace = 0;
171+
maxWorkSpace = std::max(fwdLimitBytes_, bwdDataLimitBytes_);
172+
maxWorkSpace = std::max(maxWorkSpace, bwdFilterLimitBytes_);
173+
workSpaceInBytes_ = maxWorkSpace;
174+
175+
VLOG(3) << getName() << " Fwd / BwdData / BwdFilter algo: " << fwdAlgo_
176+
<< " / " << bwdDataAlgo_ << " / " << bwdFilterAlgo_;
177+
}
178+
179+
isSelectAlgo_ = true;
180+
}
181+
182+
void *ConvBaseProjection::getSpaceBytes(size_t size) {
183+
std::vector<MemoryHandle *> &convMem = *convMem_;
184+
if (convMem.empty()) {
185+
int numDevices = hl_get_device_count();
186+
convMem.resize(numDevices);
187+
}
188+
189+
int devId = hl_get_device();
190+
MemoryHandle **localMem = &(convMem[devId]);
191+
if (NULL == *localMem || size > (*localMem)->getAllocSize()) {
192+
*localMem = new GpuMemoryHandle(size);
193+
}
194+
return (*localMem)->getBuf();
195+
}
196+
197+
ConvBaseProjection::~ConvBaseProjection() {
198+
hl_destroy_tensor_descriptor(imageDesc_);
199+
hl_destroy_tensor_descriptor(outputDesc_);
200+
hl_destroy_filter_descriptor(filterDesc_);
201+
hl_destroy_convolution_descriptor(convDesc_);
202+
}
203+
204+
} // namespace paddle
Lines changed: 162 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,162 @@
1+
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
2+
3+
Licensed under the Apache License, Version 2.0 (the "License");
4+
you may not use this file except in compliance with the License.
5+
You may obtain a copy of the License at
6+
7+
http://www.apache.org/licenses/LICENSE-2.0
8+
9+
Unless required by applicable law or agreed to in writing, software
10+
distributed under the License is distributed on an "AS IS" BASIS,
11+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
See the License for the specific language governing permissions and
13+
limitations under the License. */
14+
15+
#pragma once
16+
17+
#include "Projection.h"
18+
#include "paddle/math/MathUtils.h"
19+
20+
namespace paddle {
21+
22+
/**
23+
* @brief Base class for ConvProjection and ConvTransProjection.
24+
*/
25+
class ConvBaseProjection : public Projection {
26+
public:
27+
/**
28+
* Constructor.
29+
*/
30+
ConvBaseProjection(const ProjectionConfig& config,
31+
ParameterPtr parameter,
32+
bool useGpu);
33+
34+
~ConvBaseProjection();
35+
36+
protected:
37+
void getConvParams();
38+
void initCudnn();
39+
40+
void reshapeTensorDesc(int batchSize);
41+
void reshape(int batchSize);
42+
43+
size_t calOutputSize() {
44+
if (isDeconv_) {
45+
outputH_ = in_->getFrameHeight();
46+
outputW_ = in_->getFrameWidth();
47+
if (outputH_ == 0) outputH_ = configOutH_;
48+
if (outputW_ == 0) outputW_ = configOutW_;
49+
imageH_ = imageSize(outputH_,
50+
filterH_,
51+
paddingH_,
52+
strideH_,
53+
/* caffeMode */ true);
54+
55+
imageW_ = imageSize(outputW_,
56+
filterW_,
57+
paddingW_,
58+
strideW_,
59+
/* caffeMode */ true);
60+
61+
const_cast<Argument*>(out_)->setFrameHeight(imageH_);
62+
const_cast<Argument*>(out_)->setFrameWidth(imageW_);
63+
64+
inputOffset_ = (configChannels_ / groups_) * outputH_ * outputW_;
65+
outputOffset_ = (configNumFilters_ / groups_) * imageH_ * imageW_;
66+
return imageH_ * imageW_ * configNumFilters_;
67+
} else {
68+
imageH_ = in_->getFrameHeight();
69+
imageW_ = in_->getFrameWidth();
70+
if (imageH_ == 0) imageH_ = configImgH_;
71+
if (imageW_ == 0) imageW_ = configImgW_;
72+
outputH_ = outputSize(imageH_,
73+
filterH_,
74+
paddingH_,
75+
strideH_,
76+
/* caffeMode */ true);
77+
outputW_ = outputSize(imageW_,
78+
filterW_,
79+
paddingW_,
80+
strideW_,
81+
/* caffeMode */ true);
82+
83+
const_cast<Argument*>(out_)->setFrameHeight(outputH_);
84+
const_cast<Argument*>(out_)->setFrameWidth(outputW_);
85+
86+
inputOffset_ = (configChannels_ / groups_) * imageH_ * imageW_;
87+
outputOffset_ = (configNumFilters_ / groups_) * outputH_ * outputW_;
88+
return outputH_ * outputW_ * configNumFilters_;
89+
}
90+
}
91+
92+
static void* getSpaceBytes(size_t size);
93+
94+
/// True if it's deconv projection layer, false if it's ConvProjection layer
95+
bool isDeconv_;
96+
/// imageH_ and imageW_ / outputH_ and outputW_
97+
/// is calculated from the input layer.
98+
int imageH_, imageW_;
99+
int outputH_, outputW_;
100+
/// configImgH_ and configImgW_ / configOutH_ and configOutW_
101+
/// is obtained from config.
102+
int configImgH_, configImgW_;
103+
int configOutH_, configOutW_;
104+
/// channels_ and numFilters_ are defined in terms of convolution semantics
105+
int channels_, numFilters_;
106+
/// configChannels and configNumFilters_ are obtained from config
107+
/// For Conv they are the same as channels_ and numFilters
108+
/// For ConvTrans they are opposite to channels_ and numFilters
109+
int configChannels_, configNumFilters_;
110+
int paddingH_, paddingW_;
111+
int strideH_, strideW_;
112+
int filterH_, filterW_;
113+
/// One group offset of input data.
114+
int inputOffset_;
115+
/// One group offset of output data.
116+
int outputOffset_;
117+
/// One group offset of weight.
118+
int weightOffset_;
119+
int groups_;
120+
121+
/// Cudnn tensor descriptor for input.
122+
hl_tensor_descriptor imageDesc_;
123+
/// Cudnn tensor descriptor for output.
124+
hl_tensor_descriptor outputDesc_;
125+
/// Cudnn tensor descriptor for filter.
126+
hl_filter_descriptor filterDesc_;
127+
/// Cudnn tensor descriptor for a convolution operation.
128+
hl_convolution_descriptor convDesc_;
129+
130+
/// Record the algorithm for forward convolution, which is obtained by cudnn
131+
/// api to search the best suited algorithm.
132+
int fwdAlgo_;
133+
/// Record the algorithm for computing convolution gradient with respect to
134+
/// filter coefficients.
135+
int bwdFilterAlgo_;
136+
/// Record the algorithm for computing convolution gradient with respect to
137+
/// the output.
138+
int bwdDataAlgo_;
139+
/// Amount of GPU memory needed as workspace to be able to execute a
140+
/// forward convolution with the specified algo.
141+
size_t fwdLimitBytes_;
142+
/// Amount of GPU memory needed as workspace to be able to execute a
143+
/// backwardFilter with the specified algo.
144+
size_t bwdDataLimitBytes_;
145+
/// Amount of GPU memory needed as workspace to be able to execute a
146+
/// backwardData with the specified algo.
147+
size_t bwdFilterLimitBytes_;
148+
/// Size of total work space.
149+
size_t workSpaceInBytes_;
150+
151+
/// Whether to call cuDNN api to choose conv algorithm.
152+
bool isSelectAlgo_;
153+
/// batchNum is used to record batch size. If the batch size is changed,
154+
/// the selection algorithm will be called.
155+
int batchNum_;
156+
bool bias_;
157+
158+
std::unique_ptr<Weight> weight_;
159+
static ThreadLocalD<std::vector<MemoryHandle*>> convMem_;
160+
};
161+
162+
} // namespace paddle

0 commit comments

Comments
 (0)