|
| 1 | +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. |
| 2 | +
|
| 3 | +Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | +you may not use this file except in compliance with the License. |
| 5 | +You may obtain a copy of the License at |
| 6 | +
|
| 7 | + http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | +
|
| 9 | +Unless required by applicable law or agreed to in writing, software |
| 10 | +distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | +See the License for the specific language governing permissions and |
| 13 | +limitations under the License. */ |
| 14 | + |
| 15 | +#include "ConvBaseProjection.h" |
| 16 | +#include "paddle/utils/Stat.h" |
| 17 | + |
| 18 | +namespace paddle { |
| 19 | + |
| 20 | +ThreadLocalD<std::vector<MemoryHandle *>> ConvBaseProjection::convMem_; |
| 21 | + |
| 22 | +ConvBaseProjection::ConvBaseProjection(const ProjectionConfig &config, |
| 23 | + ParameterPtr parameter, |
| 24 | + bool useGpu) |
| 25 | + : Projection(config, parameter, useGpu) { |
| 26 | + CHECK(useGpu); // only support GPU |
| 27 | + getConvParams(); |
| 28 | + initCudnn(); |
| 29 | + |
| 30 | + size_t height = filterH_ * filterW_ * channels_ / groups_; |
| 31 | + size_t width = numFilters_; |
| 32 | + weight_.reset(new Weight(height, width, parameter)); |
| 33 | + weightOffset_ = height * width / groups_; |
| 34 | +} |
| 35 | + |
| 36 | +void ConvBaseProjection::getConvParams() { |
| 37 | + const ConvConfig &conf = config_.conv_conf(); |
| 38 | + paddingH_ = conf.padding_y(); |
| 39 | + paddingW_ = conf.padding(); |
| 40 | + |
| 41 | + strideH_ = conf.stride_y(); |
| 42 | + strideW_ = conf.stride(); |
| 43 | + |
| 44 | + filterH_ = conf.filter_size_y(); |
| 45 | + filterW_ = conf.filter_size(); |
| 46 | + |
| 47 | + configImgH_ = conf.has_img_size_y() ? conf.img_size_y() : conf.img_size(); |
| 48 | + configImgW_ = conf.img_size(); |
| 49 | + |
| 50 | + configOutH_ = conf.has_output_y() ? conf.output_y() : conf.output_x(); |
| 51 | + configOutW_ = conf.output_x(); |
| 52 | + |
| 53 | + configChannels_ = conf.channels(); |
| 54 | + configNumFilters_ = config_.num_filters(); |
| 55 | + |
| 56 | + isDeconv_ = (config_.type() == "conv") ? false : true; |
| 57 | + |
| 58 | + channels_ = (isDeconv_) ? configNumFilters_ : configChannels_; |
| 59 | + numFilters_ = (isDeconv_) ? configChannels_ : configNumFilters_; |
| 60 | + |
| 61 | + groups_ = conf.groups(); |
| 62 | + CHECK_EQ(channels_ % groups_, 0); |
| 63 | + CHECK_EQ(numFilters_ % groups_, 0); |
| 64 | +} |
| 65 | + |
| 66 | +void ConvBaseProjection::initCudnn() { |
| 67 | + hl_create_filter_descriptor(&filterDesc_, |
| 68 | + channels_ / groups_, |
| 69 | + numFilters_ / groups_, |
| 70 | + filterH_, |
| 71 | + filterW_); |
| 72 | + hl_create_tensor_descriptor(&imageDesc_); |
| 73 | + hl_create_tensor_descriptor(&outputDesc_); |
| 74 | + hl_create_convolution_descriptor(&convDesc_, |
| 75 | + imageDesc_, |
| 76 | + filterDesc_, |
| 77 | + paddingH_, |
| 78 | + paddingW_, |
| 79 | + strideH_, |
| 80 | + strideW_); |
| 81 | + |
| 82 | + // initialize all to default algorithms |
| 83 | + fwdAlgo_ = 0; |
| 84 | + bwdFilterAlgo_ = 0; |
| 85 | + bwdDataAlgo_ = 0; |
| 86 | + fwdLimitBytes_ = 0; |
| 87 | + bwdDataLimitBytes_ = 0; |
| 88 | + bwdFilterLimitBytes_ = 0; |
| 89 | + workSpaceInBytes_ = 0; |
| 90 | + |
| 91 | + batchNum_ = 0; |
| 92 | + isSelectAlgo_ = false; |
| 93 | +} |
| 94 | + |
| 95 | +void ConvBaseProjection::reshapeTensorDesc(int batchSize) { |
| 96 | + hl_tensor_reshape(imageDesc_, |
| 97 | + batchSize, |
| 98 | + channels_ / groups_, |
| 99 | + imageH_, |
| 100 | + imageW_, |
| 101 | + channels_ * imageH_ * imageW_, |
| 102 | + imageH_ * imageW_, |
| 103 | + imageW_, |
| 104 | + 1); |
| 105 | + hl_reset_convolution_descriptor(convDesc_, |
| 106 | + imageDesc_, |
| 107 | + filterDesc_, |
| 108 | + paddingH_, |
| 109 | + paddingW_, |
| 110 | + strideH_, |
| 111 | + strideW_); |
| 112 | + |
| 113 | + // The stride between two consecutive images in ConvProjection may not be 1, |
| 114 | + // for example, in the case of layer ConcatenateLayer2 with two |
| 115 | + // ConvProjection, the stride is the output_size of layer ConcatenateLayer2. |
| 116 | + // So the calculation of nStride is different from CudnnConvLayer. |
| 117 | + // In fact, only "nStride = out_->value->getStride()" is ok. |
| 118 | + // size_t nStride = numFilters_ * outputH_ * outputW_; |
| 119 | + // if (out_->value->isContiguous()) { |
| 120 | + // CHECK_EQ(nStride, out_->value->getWidth()); |
| 121 | + // } else { |
| 122 | + // nStride = out_->value->getStride(); |
| 123 | + // } |
| 124 | + size_t nStride = out_->value->getStride(); |
| 125 | + |
| 126 | + hl_tensor_reshape(outputDesc_, |
| 127 | + batchSize, |
| 128 | + numFilters_ / groups_, |
| 129 | + outputH_, |
| 130 | + outputW_, |
| 131 | + nStride, |
| 132 | + outputH_ * outputW_, |
| 133 | + outputW_, |
| 134 | + 1); |
| 135 | +} |
| 136 | + |
| 137 | +void ConvBaseProjection::reshape(int batchSize) { |
| 138 | + size_t width = calOutputSize(); |
| 139 | + CHECK_EQ(width, out_->value->getWidth()); |
| 140 | + if (isDeconv_) { |
| 141 | + CHECK_EQ(static_cast<size_t>(configChannels_ * outputH_ * outputW_), |
| 142 | + in_->value->getWidth()) |
| 143 | + << "Wrong input size for convolution transpose" |
| 144 | + << " channels=" << configChannels_ << " outputH=" << outputH_ |
| 145 | + << " outputW=" << outputW_ << " inputSize=" << in_->value->getWidth(); |
| 146 | + } else { |
| 147 | + CHECK_EQ(static_cast<size_t>(configChannels_ * imageH_ * imageW_), |
| 148 | + in_->value->getWidth()) |
| 149 | + << "Wrong input size for convolution" |
| 150 | + << " channels=" << configChannels_ << " imageH=" << imageH_ |
| 151 | + << " imageW=" << imageW_ << " inputSize=" << in_->value->getWidth(); |
| 152 | + } |
| 153 | + |
| 154 | + isSelectAlgo_ = (batchSize == batchNum_); |
| 155 | + batchNum_ = batchSize; |
| 156 | + |
| 157 | + if (!isSelectAlgo_) { |
| 158 | + reshapeTensorDesc(batchSize); |
| 159 | + hl_conv_workspace(imageDesc_, |
| 160 | + outputDesc_, |
| 161 | + filterDesc_, |
| 162 | + convDesc_, |
| 163 | + &fwdAlgo_, |
| 164 | + &fwdLimitBytes_, |
| 165 | + &bwdDataAlgo_, |
| 166 | + &bwdDataLimitBytes_, |
| 167 | + &bwdFilterAlgo_, |
| 168 | + &bwdFilterLimitBytes_); |
| 169 | + |
| 170 | + size_t maxWorkSpace = 0; |
| 171 | + maxWorkSpace = std::max(fwdLimitBytes_, bwdDataLimitBytes_); |
| 172 | + maxWorkSpace = std::max(maxWorkSpace, bwdFilterLimitBytes_); |
| 173 | + workSpaceInBytes_ = maxWorkSpace; |
| 174 | + |
| 175 | + VLOG(3) << getName() << " Fwd / BwdData / BwdFilter algo: " << fwdAlgo_ |
| 176 | + << " / " << bwdDataAlgo_ << " / " << bwdFilterAlgo_; |
| 177 | + } |
| 178 | + |
| 179 | + isSelectAlgo_ = true; |
| 180 | +} |
| 181 | + |
| 182 | +void *ConvBaseProjection::getSpaceBytes(size_t size) { |
| 183 | + std::vector<MemoryHandle *> &convMem = *convMem_; |
| 184 | + if (convMem.empty()) { |
| 185 | + int numDevices = hl_get_device_count(); |
| 186 | + convMem.resize(numDevices); |
| 187 | + } |
| 188 | + |
| 189 | + int devId = hl_get_device(); |
| 190 | + MemoryHandle **localMem = &(convMem[devId]); |
| 191 | + if (NULL == *localMem || size > (*localMem)->getAllocSize()) { |
| 192 | + *localMem = new GpuMemoryHandle(size); |
| 193 | + } |
| 194 | + return (*localMem)->getBuf(); |
| 195 | +} |
| 196 | + |
| 197 | +ConvBaseProjection::~ConvBaseProjection() { |
| 198 | + hl_destroy_tensor_descriptor(imageDesc_); |
| 199 | + hl_destroy_tensor_descriptor(outputDesc_); |
| 200 | + hl_destroy_filter_descriptor(filterDesc_); |
| 201 | + hl_destroy_convolution_descriptor(convDesc_); |
| 202 | +} |
| 203 | + |
| 204 | +} // namespace paddle |
0 commit comments